\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage{setspace}
\usepackage{parskip}
\usepackage{titlesec}
\usepackage[section]{placeins}
\usepackage{xcolor}
\usepackage{breakcites}
\usepackage{lineno}
\usepackage{hyphenat}
\PassOptionsToPackage{hyphens}{url}
\usepackage[colorlinks = true,
linkcolor = blue,
urlcolor = blue,
citecolor = blue,
anchorcolor = blue]{hyperref}
\usepackage{etoolbox}
\makeatletter
\patchcmd\@combinedblfloats{\box\@outputbox}{\unvbox\@outputbox}{}{%
\errmessage{\noexpand\@combinedblfloats could not be patched}%
}%
\makeatother
\usepackage{natbib}
\renewenvironment{abstract}
{{\bfseries\noindent{\abstractname}\par\nobreak}\footnotesize}
{\bigskip}
\titlespacing{\section}{0pt}{*3}{*1}
\titlespacing{\subsection}{0pt}{*2}{*0.5}
\titlespacing{\subsubsection}{0pt}{*1.5}{0pt}
\usepackage{authblk}
\usepackage{graphicx}
\usepackage[space]{grffile}
\usepackage{latexsym}
\usepackage{textcomp}
\usepackage{longtable}
\usepackage{tabulary}
\usepackage{booktabs,array,multirow}
\usepackage{amsfonts,amsmath,amssymb}
\providecommand\citet{\cite}
\providecommand\citep{\cite}
\providecommand\citealt{\cite}
% You can conditionalize code for latexml or normal latex using this.
\newif\iflatexml\latexmlfalse
\providecommand{\tightlist}{\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}%
\AtBeginDocument{\DeclareGraphicsExtensions{.pdf,.PDF,.eps,.EPS,.png,.PNG,.tif,.TIF,.jpg,.JPG,.jpeg,.JPEG}}
\usepackage[utf8]{inputenc}
\usepackage[T2A]{fontenc}
\usepackage[ngerman,greek,polish,english]{babel}
% Use this header.tex file for:
% 1. frontmatter/preamble LaTeX definitions
% - Example: \usepackage{xspace}
% 2. global macros available in all document blocks
% - Example: \def\example{This is an example macro.}
%
% You should ONLY add such definitions in this header.tex space,
% and treat the main article content as the body/mainmatter of your document
% Preamble-only macros such as \documentclass and \usepackage are
% NOT allowed in the main document, and definitions will be local to the current block.
\begin{document}
\title{OCR-Stats: Robust estimation of mitochondrial respiration activities
using Seahorse XF Analyzer}
\author[1]{Vicente A. Yépez M.}%
\affil[1]{Affiliation not available}%
\vspace{-1em}
\date{\today}
\begingroup
\let\center\flushleft
\let\endcenter\endflushleft
\maketitle
\endgroup
\sloppy
\section*{Abstract}
{\label{994700}}
Accurate quantification of cellular and mitochondrial bioenergetic
activity is of great interest in many medical and biological areas.
Mitochondrial stress experiments performed with Seahorse Bioscience XF
Analyzers allow estimating 6 bioenergetics measures: basal respiration,
ATP production, proton leak, maximal respiration, spare capacity, and
non-mitochondrial respiration by monitoring oxygen consumption rates
(OCR) of living cells in multi-well plates. However, detailed
statistical analyses of OCR measurements from XF Analyzers have been
lacking so far. Here, we performed 126 mitochondrial stress experiments
involving 206 fibroblast cell lines. We show that the noise of OCR is
multiplicative and that outlier data points can concern individual
measurements or all measurements of a well. Based on these insights, we
developed a novel statistical method, OCR-Stats, that: i) models
multiplicative noise, ii) takes into account replicates both within and
between plates, and iii) automatically identifies outlier data points
and outlier wells. This led to a significant reduction of the
coefficient of variation across experiments of basal respiration, by
36\% (\emph{P} = 0.004), and of maximal respiration, by 32\% (\emph{P} =
0.023). After analyzing the inter and intra-plate variances, we suggest
a minimum number of well and plate replicates needed in order to obtain
confident results. We proposed two different approaches to compare the
OCR levels of two samples, which depend on the experimental design. In
this context, OCR-Stats largely increases the sensitivity over the
state-of-the-art method. OCR-Stats can easily be applied for analyzing
the extracellular acidification rates (ECAR) assessed by the glycolysis
stress test also provided by the XF Analyzer.
\subsection*{Keywords:}
{\label{392271}}
Oxygen Consumption Rate (OCR), mitochondrial respiration, bioenergetics,
statistical testing, outlier detection.
\section*{Introduction}
{\label{183388}}
Mitochondria are double membrane enclosed, ubiquitous, maternally
inherited, cytoplasmic organelles present in most eukaryotic
organisms~\hyperref[csl:1]{(Gorman et al., 2016)}al.,. 2016). They are the bioenergetics
factories of the cell~\hyperref[csl:2]{(Bhola and Letai, 2016}; \hyperref[csl:3]{Sun et al., 2016)}, and are also involved in
regulating reactive oxygen species~\hyperref[csl:4]{(Wallace, 2007)},
apoptosis~\hyperref[csl:2]{(Bhola and Letai, 2016)}, amino acid synthesis~\hyperref[csl:5]{(Birsoy et al., 2015}; \hyperref[csl:6]{Sullivan et al., 2015)},
cell proliferation~\hyperref[csl:6]{(Sullivan et al., 2015)}, cell signaling
\hyperref[csl:7]{(Zong et al., 2016)},and in the regulation of innate and adaptive
immunity~\hyperref[csl:8]{(Weinberg et al., 2015)}(Weinberg.~ 2015). It follows that a decline
in mitochondrial function, reflected by a diminished electron transport
chain activity, is implicated in many human diseases that range from
common disorders such as cancer \hyperref[csl:9]{(Wallace, 2012}; \hyperref[csl:7]{Zong et al., 2016)} 6), diabetes
\hyperref[csl:10]{(Dunham-Snary et al., 2014)}, Dunham-Snaryet al., 2014), neurodegeneration
\hyperref[csl:11]{(Yao et al., 2009)}~and aging \hyperref[csl:3]{(Sun et al., 2016)}, to rare genetic
disorders \hyperref[csl:12]{(Titov et al., 2016)}. One of the most informative assessments
of mitochondrial function is the quantification of cellular respiration,
as it directly reflects electron transport chain dysfunction
\hyperref[csl:12]{(Titov et al., 2016)}~and depends on many sequential reactions from
glycolysis to oxidative phosphorylation \hyperref[csl:13]{(Koopman et al., 2016)}. Estimations
of oxygen consumption rates (OCR) expressed in pmol/min, which are
mainly driven by mitochondrial respiration through oxidative
phosphorylation, and extracellular acidification rates (ECAR) expressed
in mpH/min, which reflect glycolysis \hyperref[csl:14]{(Divakaruni et al., 2014}; \hyperref[csl:15]{Ferrick et al., 2008}; \hyperref[csl:13]{Koopman et al., 2016)}, are more
conclusive for the ability to synthesize ATP and mitochondrial function
than measurements of intermediates (such as ATP or NADH) and potentials
\hyperref[csl:16]{(Brand and Nicholls, 2011}; \hyperref[csl:17]{Dmitriev and Papkovsky, 2012)}.
OCR was classically measured using a Clark-type electrode, which
required a substantial amount of purified mitochondria, was time
consuming, and did not allow automated injection of compounds
\hyperref[csl:18]{(Wu et al., 2007)}. Also, experimentation with isolated mitochondria is
ineffective because cellular regulation of mitochondrial function is
removed during isolation \hyperref[csl:19]{(Hill et al., 2012)}. In the last few years, a
new technology using fluorescent oxygen sensors \hyperref[csl:20]{(Gerencser et al., 2009)}~in a
microplate assay format has been developed by the company Seahorse
Bioscience (now part of Agilent Technologies) \selectlanguage{ngerman}\hyperref[csl:21]{(Ribeiro et al., 2015)}. It
allows simultaneous, real-time measurements of both OCR and ECAR in
multiple cell lines and conditions, reducing the sample material
required and increasing the throughput \hyperref[csl:14]{(Divakaruni et al., 2014}; \hyperref[csl:21]{Ribeiro et al., 2015)}. However,
high throughput assays demand for robust statistical methods to combine
their results.
Typically, OCR and ECAR levels are measured using the Seahorse XF
Analyzer in 96 (or 24) well-plates at multiple time steps under three
consecutive treatments, in a procedure known as mitochondrial stress
test~\hyperref[csl:22]{({Seahorse Bioscience}, n.d.)}. These conditions are obtained by
systematically injecting Oligomycin, FCCP and Rotenone (Fig. 1B). Under
basal conditions, complexes I-IV exploit energy derived from electron
transport to pump protons across the inner mitochondrial membrane. The
thereby generated proton gradient is subsequently harnessed by complex V
to generate ATP. Blockage of the proton translocation through complex V
by Oligomycin represses ATP production and prevents the electron
transport throughout complexes I-IV due to the unexploited gradient.
Administration of FCCP, an ionophor, subsequently dissipates the
gradient uncoupling electron transport from complex V activity and
increasing oxygen consumption. Finally, mitochondrial respiration is
completely halted using the complex I inhibitor Rotenone whereby the
residual non-mitochondrial respiration can be assessed. This approach is
label-free and non-destructive, so the cells can be retained and used
for further assays~\hyperref[csl:15]{(Ferrick et al., 2008)}. The assay enables the estimation
of six different bioenergetic measures: basal respiration, proton leak,
non-mitochondrial respiration, ATP production, spare respiratory
capacity, and maximal respiration, which can be deduced from differences
of OCRs between conditions~\hyperref[csl:16]{(Brand and Nicholls, 2011}; \hyperref[csl:14]{Divakaruni et al., 2014)}. Increase in proton leak
and decrease in maximum respiratory capacity are indicators of
mitochondrial dysfunction~\hyperref[csl:16]{(Brand and Nicholls, 2011)}. ATP production, basal
respiration, and spare capacity alter in response to ATP demand, which
is not necessarily mitochondrion-related as it may be the consequence of
deregulation of any cellular process altering general cellular energy
demand. Analogously, different treatments can be injected to modify ECAR
levels and obtain key parameters of glycolytic function: glycolysis,
glycolytic capacity, glycolytic reserve and non-glycolytic acidification
(Fig. S1), in an assay known as glycolysis stress
test~\hyperref[csl:23]{({Seahorse Bioscience}, 2012)}.
Current literature describing the Seahorse technology addressed
experimental aspects regarding sample preparation~\hyperref[csl:24]{(Dranka et al., 2010}; \hyperref[csl:25]{Zhang et al., 2013)},
the amount of cells to seed~\hyperref[csl:25]{(Zhang et al., 2013}; \hyperref[csl:26]{Zhou et al., 2012)}, and compound
concentration in different organisms~\hyperref[csl:27]{(Dranka et al., 2013}; \hyperref[csl:13]{Koopman et al., 2016}; \hyperref[csl:28]{Shah-Simpson et al., 2016)}. However,
studies suggesting statistical best practices for Seahorse flux analyzer
data are lacking. The goal of our study was to precisely provide such
best practices by analyzing a large-scale dataset of 126 mitochondrial
stress experiments (in 96-well plate format) involving 206 different
fibroblast cell lines. We obtained robust and accurate oxygen
consumption rates for each well, which translates into robust summarized
values of the multiple replicates inside one plate and across
experiments. Our statistical procedure includes normalization of raw
data and outlier identification and led to an increase in the accuracy
over state-of-the-art methods. Then, we focused on how to statistically
compare the results of 2 different samples. First, we noticed that the
variation across experiments is dominant with respect to the variation
inside, suggesting that it is necessary.
\section*{Results}
{\label{507127}}
\subsection*{Experimental design and raw
data}
{\label{124538}}
We derived OCR, ECAR, and cell number for 205 dermal fibroblast cultures
derived from patients suffering from rare mitochondrial diseases, and
control cells from healthy donors (normal human dermal fibroblast -
NHDF, Methods). These were assayed in 126 experiments (or plates), all
using the same protocol ({\ref{625784}}). We grew 28
cell lines multiple times and placed them in more than one plate, and we
will refer to these replicates as different samples. The NHDF cell line
was seeded in all experiments for batch correction. All four corners of
each plate were left as blank, i.e. filled with media but no cells to
control for changes in temperature~\hyperref[csl:24]{(Dranka et al., 2010)}. The typical
layout of a plate is depicted in Fig. 1C, showing how each sample is
present in many well replicates. We seeded between 3 and 7 samples per
experiment (median = 4). This variation reflects typical set-ups of
experiments in a lab gathered over multiple months or years.
We used the standard cellular respiration treatments (Fig. 1A) leading
to four intervals with three time points each and denoted by Int1
(resting cells), Int2 (after Oligomycin), Int3 (after FCCP) and Int4
(after Rotenone). Wells for which the median OCR level did not follow
the expected trend, namely, median(OCR(Int3)) \textgreater{}
median(OCR(Int1)) \textgreater{} median(OCR(Int2)) \textgreater{}
median(OCR(Int4)), were discarded (977 wells, 10.47\%). We also excluded
from the analysis wells in which the cells got detached and contaminated
wells (461 wells, 4.94\%,~{\ref{625784}}).
\subsection*{Random and systematic variations between well
replicates}
{\label{800773}}
Typical replicate time series are shown in Fig. 1C, with data from 8
wells for a single sample in a single plate. It shows the typical kinds
of variations that we observed.
First, outlier data points occurred frequently. We distinguished two
different types of outliers: entire series for a well (e.g., well G5 in
Fig. 1C) and individual data points (e.g., well B6 at time point 6 in
Fig. 1C). In the latter case, eliminating the entire series for well B6
would be too restrictive, and would result in losing valuable data from
the other 11 valid time points. Therefore, we propose a method to find
outliers considering these two possibilities independently.
Second, we noticed that the higher the OCR value, the higher the
variance between replicates, suggesting that the error is
multiplicative. Unequal variance, or heteroscedasticity, can strongly
affect the validity of statistical tests and the robustness of
estimations. We therefore considered modeling OCR on a logarithmic
scale, where the dependency between variance and mean disappears (Figs.
2A, 2B).
Third, we observed systematic biases in OCR between wells (e.g., OCR
values of well C6 are among the highest while OCR values of well B5 are
among the lowest at all time points). Variations in cell number, initial
conditions, treatment concentrations, and fluorophore sleeve calibration
can lead to systematic differences between wells, which we refer to as
well biases. To investigate whether well biases could be corrected using
cell number as suggested in~\hyperref[csl:24]{(Dranka et al., 2010)}, we counted the number
of cells after the experiments using Cyquant
({\ref{625784}}). As expected, median OCR for each
interval grows linearly with cell number measured at the end of the
experiment (Spearman rho between 0.32 and 0.47,~\emph{P} \textless{}
2.2e-16, Fig. S2A). However, the relation is not perfect reflecting
important additional sources of variations but also possible noise in
measuring cell number. Strikingly, dividing OCR by cell count led to a
higher coefficient of variation (standard deviation divided by the mean)
between replicate wells than without that correction (Fig. S2B). This
analysis showed that normalization for cell number should not be done
simply by a blunt division by raw cell counts and motivated us to derive
another method to capture well biases.
\subsection*{A statistical model of
OCR}
{\label{930292}}
For a given biological sample in one plate, we modeled the logarithm of
OCR ~of well \emph{w} at time point \emph{t} as a sum of well bias,
interval effects and noise, i.e.,:~
\begin{equation}
\label{eqn:y_a_b}
y_{w,t} = \alpha_{i(t)} + \beta_w + \epsilon_{w,t} .
\end{equation}
The term ~is the effect of the interval~\emph{i(t)~}of time
point~\emph{t}. The term~\selectlanguage{greek}\emph{β}\selectlanguage{english}\textsubscript{\emph{w}} is the
relative bias of well~\emph{w} compared to a reference well, which is
set arbitrarily and corresponds to the first well in alphabetical order.
The term \selectlanguage{greek}\emph{ε}\selectlanguage{english}\textsubscript{\emph{w,t}} is the error.
We defined the OCR levels the values (\selectlanguage{greek}\emph{θ}\selectlanguage{english}\textsubscript{\emph{i}})
as the expected log OCR per interval, averaged over all wells:
\begin{equation}
\label{eqn:th_a_b}
\theta_i = \alpha_{i(t)} + \frac{\sum_w \beta_w}{n} ,
\end{equation}
where \emph{n} is the number of wells.
Note that the well bias is modeled independently for each plate, i.e.,
the bias of a certain well in one plate is different from the bias of
the well at the same location in another plate.
\subsection*{Robust Fitting}
{\label{839184}}
To handle outliers, we devised a method based on iteratively fitting the
model and removing well and single data-point outliers until no outlier
is found. Specifically, we fit the linear
model~{\ref{eqn:y_a_b}}~using the least-squares
method, which consists in minimizing~\(\Sigma_w\Sigma_t\left(y_{w,t}-\alpha_{i\left(t\right)}+\beta_w\right)^2\), thus obtaining
the
coefficients~\selectlanguage{greek}\emph{α}\selectlanguage{english}\textsubscript{\emph{i}},~\selectlanguage{greek}\emph{β\textsubscript{w}}\selectlanguage{english},
and~\selectlanguage{greek}\emph{θ\textsubscript{i}}\selectlanguage{english}
using~{\ref{eqn:th_a_b}}. Then, for each time
point~\emph{t} in interval~\emph{i} and well~\emph{w}, we define the OCR
residual:~\(\epsilon_{\left\{w,t\right\}}=y_{w,t}-\theta_{i\left(t\right)}\), which will be used to identify well level
outliers. For each sample~\emph{s} and well~\emph{w}, we compute the
mean across time points of its squared residuals:~\(r_w:=\mathrm{mean}_t\left(\epsilon_{w,t}^2\right)\),
thus obtaining a distribution~\textbf{\emph{r}}. We identify as outliers
the wells whose \(r_w>\mathrm{median}\left(\textbf{r}\right)+5\cdot \mathrm{mad}\left(\textbf{r}\right)\), where mad, median absolute
deviation, is a robust estimation of the standard deviation (Fig. S3A).
We found that deviations by 5 mad from the median were selective enough
in practice. We compute the vector of estimates \selectlanguage{greek}\textbf{\emph{θ}} \selectlanguage{english}using
the remaining wells and iterate this procedure until no more well is
identified as outlier. We required 8 iterations until convergence and
found around 16.5\% of all the wells to be outliers (Fig. S3B).
We identify single point outliers in a similar way. After discarding the
wells that were found to be outliers in the previous step, we identify
as outliers single data points whose \(\epsilon_{w,t}^2>\mathrm{median}_t\left(\epsilon_{w,t}^2\right)+5\cdot \mathrm{mad}_t\left(\epsilon_{w,t}^2\right)\)~(Fig. S3C). We
again iterate until we obtain no more outliers. We required 19
iterations until convergence and found approximately 6.1\% of single
points to be outliers (Fig. S3D).
As we modeled in logarithmic scale, we change back to natural scale in
order to compute the bioenergetic measures as defined in Fig. 1A (e.g.:
Basal respiration = \(e^{\theta_1} - e^{\theta_3}\), Maximal respiration =
\(e^{\theta_3} - e^{\theta_4}\), etc.). Note that this method can easily be extended
to ECAR levels estimation obtained after performing the glycolysis
stress test using the same XF Analyzer.
As hypothesized, estimated well biases correlated with cell number (Fig.
S4).
\subsection*{Plate effect}
{\label{272328}}
The next step of our model is to compare the OCRs from replicates across
plates. Fig. 3A shows a similar increase in OCR on the interval 1 on a
similar way for both samples on experiment 20140430 with respect to
experiment 20140428, suggesting a plate effect. To test if this tendency
holds across every repeated sample, we compared all replicate pairings
with their respective controls. For example, if sample~\emph{s} and
control~\emph{c} were both seeded in plates~\emph{p} and~\emph{q}, we
compare for every interval~\emph{i}: \(\hat{\theta}_{i,s,p}-\hat{\theta}_{i,s,q}\) vs.
~\(\hat{\theta}_{i,c,p}-\hat{\theta}_{i,c,q}\). Fig. 3B shows a positive correlation, confirming
that there exists a plate effect. These systematic differences can come
from changes in temperature or the use of different sensor
cartridges~\hyperref[csl:13]{(Koopman et al., 2016)}.
In an attempt to correct for the plate effect, we investigated a linear
model where the levels ~depended on interval \emph{i}, samples \emph{s}
and plate \emph{p}:~\(\theta'_{i,s,p}=\alpha_{i,s}+\beta_{i,p}+\epsilon_{i,s,p}\), thus obtaining one coefficient
~for each plate-interval combination. Finally, we added these plate
effects to our previous estimates using:\(\hat{\theta}\ _{i,s,p}^f=\hat{\theta}_{i,s}-\beta_{i,p}\), obtaining
the final estimates . As before, we used linear regression for
estimation. Here we did not need outlier removal as they were already
detected at a single plate level.
\subsection*{Benchmark}
{\label{853723}}
In order to benchmark our model, we compute the coefficient of variation
(standard deviation divided by mean) of the six bioenergetic measures of
all repeated samples across experiments. The lower the coefficient of
variation among replicates, the better the method. We cannot test using
the final estimates \(\hat{\theta}^f\) , because we would fall into
circularity as correcting using \selectlanguage{greek}\emph{β}\selectlanguage{english}\textsubscript{\emph{i,p}}~
forces replicates to have a more closer value. Therefore, just for
benchmarking purposes, we corrected for plate effect using the data from
the NHDF cell line controls \emph{c} of each plate only, namely:
\begin{equation}
\label{eqn:plate_ef_nh}
y^c_{i,p}=\beta_0^c + \beta_i^c +\beta_p^c +\beta_{i,p} .
\end{equation}
We used the effects~\selectlanguage{greek}\emph{β\textsuperscript{c}\textsubscript{p}}\selectlanguage{english}~as
offset to our model (1), and recompute ~values accordingly. We scale
back to natural scale to calculate the bioenergetic measures and the
coefficient of variation of all repeated samples (except the control to
avoid circularity) using: i) the default Extreme Differences (ED)
method~{\ref{625784}} provided by the vendor, ii) the
log linear (LL) model previously described, iii) the same LL model but
trimmed (TLL) (after removing outliers), and iv) TLL after correcting
for plate effect (TLLPE) using the control as offset. Each step
contributed to lowering the coefficient of variation, obtaining a final
significant reduction of 36\% and 32\% in basal and maximal respiration,
respectively, from TLLPE with respect to ED (Fig. 4)
(\emph{P~}\textless{} 0.03, one-sided Wilcoxon test).
\subsection*{Determining optimal number of
wells}
{\label{728156}}
Having set up a more robust estimation procedure, we next were
interested in determining the minimum amount of wells needed to get good
estimates~\(\hat{\theta}\). Using only the controls NHDF, we computed
the standard deviation~\(\sigma_{i,p}^w\) of OCR across all wells for
each plate~\emph{p} and interval~\emph{i}. We take the median across
plates, thus obtaining one value per interval~\(\sigma_i^w\). As we
worked in the log scale, the error in the natural scale becomes
multiplicative and relative.~The standard error of the estimates can be
written as~\(\sigma_{\hat{\theta_i}}=\frac{\sigma_i^w}{\sqrt{n_w}}\), where~\emph{n\textsubscript{w}}~is the
number of wells. From there, we get a formula for the number of wells:
\begin{equation}
\label{eqn:nw}
n_w = \Big(\frac{\sigma_i^w}{\sigma_{\hat{\theta_i}}}\Big)^2
\end{equation}
The highest value of~\(\sigma_i^w\) was 0.16. So, in order to get a
relative error of 5\%, cells should be seeded in~\(n_w=\left(\frac{0.16}{0.05}\right)^2\approx10\)
wells.~Note that this number is after removing outliers, so considering
that around 16.5\% of wells were found to be outliers, then the initial
number would be~\(10/(1-0.165)\approx12\) wells. This simply means seeding
cells in one and half columns of the plate.
\subsection*{Analysis of variance within and between
plates}
{\label{112485}}
After analyzing the variation among wells inside a same plate, we set up
to study the variation across multiple plates. We use the controls NHDF
and determine the variance between and within using ANOVA. The results
on Table {\ref{336535}}~are conclusive indicating that
the variance between plates is dominant with respect to within (Fig.
S5A).\selectlanguage{english}
\begin{table}[h!]
\centering
\normalsize\begin{tabulary}{1.0\textwidth}{CCCCC}
& S\textsuperscript{\textbf{\emph{2}}}\textsubscript{\textbf{\emph{W}}}
& S\textsuperscript{\textbf{\emph{2}}}\textsubscript{\textbf{\emph{B}}}
& F & P value \\
Interval 1 & 0.01 & 3.52 & 254.9 & <2e-16 \\
Interval 2 & 0.02 & 3.24 & 148 & <2e-16 \\
Interval 3 & 0.02 & 3.78 & 214.7 & <2e-16 \\
Interval 4 & 0.04 & 3.81 & 106.2 & <2e-16 \\
\end{tabulary}
\caption{{ANOVA of OCR between and within plates using controls, for each time
interval.
{\label{336535}}%
}}
\end{table}These findings suggest that in order to compare the bioenergetic
measures of 2 samples, multiple experiments containing the same cell
cultures should be performed (also proposed by \hyperref[csl:13]{(Koopman et al., 2016)}).
\subsubsection*{Case 1: Seed the same samples in multiple plates and
compare them using a t
test}
{\label{917873}}
One first approach to compare the OCR levels of different samples is to
seed them in multiple plates, obtain one value per sample, plate and
interval (or bioenergetic measure) (Fig. 5A) and compare them using a
paired Wilcoxon test, where the pairing is done plate-wisely. Using both
OCR-Stats and the ED method, we computed the bioenergetic measures of
all 8 samples that were seeded in 3 or more plates. Then, we compared
the results with their respective NHDFs using a paired and unpaired
Wilcoxon test. As the samples correspond to patients with mitochondrial
disorders, we expect to find significant different cellular respiration
with respect to the controls. As hypothesized, we obtained more
sensitivity when using a paired test (Fig. 5B). Morover, OCR-Stats
proved to be more sensitive than ED (Fig. 5C). Nevertheless, due to the
very few number of plates, none of the results were significant (in
order to achieve significance (\emph{P} \textless{} 0.05), using a
two-sided, paired Wilcoxon test, we need at least 5 experiments).
Therefore, we suggest using a t test (paired, with unequal variances) as
it has more sensitivity than Wilcoxon test (Fig. 5D), after determining
that the estimates~\(\hat{\theta}\)~are indeed normally distributed
across plates for the control NHDF (Fig. S5B, S5C).
\subsubsection*{Case 2: Learn the plate
variation}
{\label{652619}}
Assume that for a sample~\emph{s} in a plate~\emph{j}, the
estimates~\(\hat{\theta}\)~are normally distributed:
~\(p(\hat{\theta}_{s,j})=N(\hat{\theta}_{s,j}\ |\ \theta_s^T+\gamma_j,\ \sigma^2)\), where~\(\theta^T_s\)is the true estimate of
sample~\emph{s} and~\(\gamma_j\)~is the systematic effect of
plate~\emph{j}. We define~\(x_j:=\hat{\theta}_{f,j}-\hat{\theta}_{NHDF,j}\), where~\(\hat{\theta}_{f,j}\)
is the estimate of another fibroblast~\emph{f} located on the same
plate~\emph{j}~as the control. Then, it follows that~\(p(x_j)=N(x_j\ |\ \theta_s^T+\gamma_j-\theta_{NHDF}^T-\gamma_j,\ 2\sigma^2)\),
and for any two plates seeded with the same samples:~\(p\left(x_2-x_1\right)=N\left(x_2-x_1\ |\ 0,\ 4\sigma^2\right)\),
which allows us to estimate the intra-plate variance~\(4\sigma_p^2\)~
(Fig. 6A). The mean \(\sigma_p\)~across intervals is 0.22, so in
order to get a relative error of 5\%, we need to learn the variance
from~\(n_p=\left(\frac{0.22}{0.05}\right)^2\approx20\) plates.~
As we are interested in comparing the true~\(\hat{\theta}\)~levels of
a fibroblast~\emph{f} with respect to the control NHDF, the null
hypothesis is:~\(H_0: \theta^T_f = \theta^T_{NHDF}\), which has a
distribution~\(p(\hat{\theta}_{f,j}-\hat{\theta}_{NHDF,j})=N(\hat{\theta}_{f,j}-\hat{\theta}_{NHDF,j}\ |\ 0,\ 2\sigma_p^2)\). Note that we can estimate the variance
of this distribution using the intra-plate variance from the previous
step. We can then compare the differences\(\hat{\theta}_{f,j}-\hat{\theta}_{NHDF,j}\)to the null
distribution (Fig. 6B) and obtain corresponding p-values by inserting
them in the normal cumulative distribution function
with~\(\mu=0\) and~\(sd=\sqrt{2}\sigma_p\). As hypothesized,
the~\(\hat{\theta}_{f,j}-\hat{\theta}_{NHDF,j}\)~is skewed towards the left with respect to the
null distribution (Fig. 6B) because the patients are believed to have a
respiration defect. This holds for time intervals 1 to 3, but not on 4
because that one corresponds to non-mitochondrial respiration which
should not be impaired in our patients.
\par\null\par\null
After correcting for plate effect, replicates across plates should also
not be significantly different from one another. However, we observed
that this is not the case (Fig. 5D), suggesting that variations between
plates (and thus between runs of the machine) are not well captured by
our log-linear assumption, consistent with earlier report of important
between-plates variations \hyperref[csl:29]{(Invernizzi et al., 2012)}.
\subsection*{Analysis of samples on multiple
plates}
{\label{689224}}
To enable inter-plate comparisons, the Multi-File XF Cell Energy
Phenotype Report Generator by default performs averaging of measures
across plates {\ref{625784}}. Multi-plate averaging not
only neglects the variation within each experiment, but also provides
only one value per plate per sample, losing significance when comparing
against other samples (Fig. 5F).
\par\null
\section*{Discussion and conclusion}
{\label{932338}}
As mitochondrial studies using extracellular fluxes (specifically the XF
Analyzer from Seahorse) are gaining popularity, it is of paramount
importance to have a proper statistical method to estimate the OCR
levels from the raw data. In this paper, we have developed such a model,
which includes approaches to control for well biases and automatic
outlier identification. By doing so, we were able to significantly
reduce the coefficient of variation of replicates across experiments.
Then, we proposed a solution to compare the bioenergetic measures of two
samples from the same plate using statistical testing, and across plates
using Stouffer's weighted method. We show empirically that both tests
are controlled for type I error.
We found that dividing cellular OCR by cell number was introducing more
noise than was seen for uncorrected data. However, we have seeded here
always the same number of cells. Hence, the variations we observed in
cell number at the end of the experiments are largely overestimated by
noise in measurements. In other experimental settings, in which
different numbers of cells are seeded, we suggest to include an offset
term to the model~{\ref{eqn:y_a_b}} equal to the
logarithm of the seeded cell number to control for this variation by
design. Also, the Seahorse XF Analyzer can be used on isolated
mitochondria and on isolated enzymes, where a normalization approach is
to divide OCR by mitochondrial proteins or enzyme
concentration~\hyperref[csl:30]{({Seahorse Bioscience}, n.d.)}. However, as described here for
cellular assays, robust normalization procedures require careful
analysis.
To use XF Seahorse Analyzers for large-scale experiments, one would need
to be able to compare samples measured on different plates. Our
investigation showed that there is roughly multiplicative bias between
plates that can be controlled to some extent by including control
samples across plates, as we did here with NHDF. We propose an extension
of our intra-plate robust linear regression approach to multiple plates
that can handle model this plate bias. However, we also noticed that the
assumption of a multiplicative plate bias is not sufficient when it
comes to statistical testing. Therefore, for comparing two biological
samples statistically, samples need to be placed on the same plate, and
ideally repeated on multiple plates. It will be important in the future
to understand the source of variations between plates to truly enable
large-scale experiments to be performed.
\section*{Methods}
{\label{625784}}
\subsection*{Biological material}
{\label{358668}}
All samples come from primary fibroblast cell lines of humans suffering
from rare mitochondrial diseases, established in the framework of
mitoNet and GENOMIT. The controls used are primary patient fibroblast
cell lines, normal human dermal fibroblasts (NHDF) from neonatal tissue,
commercially available from Lonza, Basel, Switzerland.
\subsection*{Measure of extracellular fluxes using Seahorse
XF96}
{\label{385290}}
We seeded 20,000 fibroblasts cells in each well of a XF 96-well cell
culture microplate in 80 ml of culture media, and incubated overnight at
37\selectlanguage{ngerman}°C in 5\% CO2. The four corners were left only with medium for
background correction. Culture medium is replaced with 180 ml of
bicarbonate-free DMEM and cells are incubated at~37°C for 30 min before
measurement. Oxygen consumption rates (OCR) were measured using a XF96
Extracellular Flux Analyser (Seahorse Bioscience). OCR was determined at
four levels: with no additions, and after adding: oligomycin (1 \selectlanguage{greek}μ\selectlanguage{english}M);
carbonyl cyanide 4-(trifluoromethoxy) phenylhydrazone (FCCP, 0.4 \selectlanguage{greek}μ\selectlanguage{english}M);
~and rotenone (2 \selectlanguage{greek}μ\selectlanguage{english}M) (additives purchased from Sigma at highest
quality).~After each assay, manual inspection was performed on all wells
using a conventionally light microscope.
\subsection*{Extreme Differences (ED) Method to compute bioenergetic
measures}
{\label{620383}}
This corresponds to the default method provided by the vendor. On every
plate independently, for each well, on interval 1 take the OCR
corresponding to the last measurement, on intervals 2 and 4 take the
minimum and on interval 3 the maximum~\hyperref[csl:31]{(Divakaruni et al., 2014)}. Then, do the
corresponding differences to estimate the bioenergetic measures. Report
the results per patient as the mean across wells plus standard deviation
or standard error, separately for each plate.
\subsection*{Multi-plate averaging
method}
{\label{577744}}
In case of inter-plate comparisons, the multi-plate averaging methods
takes the average and standard error of the bioenergetics measures
obtained using the ED method of all repeated samples across
plates~\hyperref[csl:32]{({Agilent Technologies}, n.d.)}.
\subsection*{Bootstrapping}
{\label{345489}}
We permute with replacement all the wells of each sample (after
discarding outliers)~\emph{B} = 9,999 times in order to get, for
example,~\emph{B} values for maximal respiration (MR) for any sample,
thus creating a distribution. For a sample whose original MR has a value
of~\emph{t} and the~\emph{B} simulated MR have sorted
values~\emph{t*\textsubscript{1,\ldots{},B}}, the 1 -- 2\selectlanguage{greek}α \selectlanguage{english}confidence
interval of this distribution has limits:~\(2t-t_{\left(B+1\right)\left(1-\alpha\right)}^{\ast},\ 2t-t_{\left(B+1\right)\left(\alpha\right)}^{\ast}\)~(Davison,
Hinkley, 2008).
Then, we are interested in testing whether MR of sample 1 (s1) differs
significantly from MR of sample 2 (s2). The corresponding hypotheses of
this test are: H1: MR(s1) \textless{}\textgreater{} MR(s2), H0: MR(s1)
\textgreater{} MR(s2) or MR(s1) \textless{} MR(s2).
We obtain the two-sided p-value for the test using:
\begin{equation}
\label{eqn:pv}
\hat{p} = \frac{r+1}{B+1} ,
\end{equation}
where~\(r=2\min\left(\#\left\{MR\left(s_1\right)-MR\left(s_2\right)\ge0\right\}, \#\left\{MR\left(s_1\right)-MR\left(s_2\right)\le0\right\}\right)\).
Alternatively, if we have prior knowledge that one sample should have
diminished MR with respect to another, e.g., a patient vs. a control, we
can opt for a one sided test. The corresponding hypotheses are: H1:
MR(control) \textgreater{} MR(patient), H0: MR(patient) -- MR(control) [?]
0.
The p-value is obtained using {\ref{eqn:pv}}~with
\(r=\#\left\{MR\left(patient\right)-MR\left(control\right)\ge0\right\}\) . Significance is obtained when \(\hat{p}<0.05\).
\subsection*{Stouffer's Weighted sum z method to combine
p-values}
{\label{763395}}
To compare the significance across \emph{k} experiments, each comparing
a sample \emph{s} and control \emph{c} group with a resulting p-value
\emph{p\textsubscript{i}}, define the statistic:
\par\null
\begin{equation}
\label{eqn:z}
Z=\frac{\Sigma_{i=1}^kw_iz\left(p_i\right)}{\sqrt{\Sigma_{i=1}^kw_i^2}} ,
\end{equation}
where~\emph{w\textsubscript{i}} correspond to the weight of
plate~\emph{i} and is the harmonic mean of the number of wells of the
sample and control groups:~\(w_i=(n_i^s\times n_i^c)/(n_i^s+n_i^c)\) ; and~\(z(p_i)=-\Phi^{-1}(p_i)\)
is the z score corresponding
to~\emph{p\textsubscript{i}~}(with~\(\Phi^{-1}\)being the quantile
function of the normal distribution)~\hyperref[csl:33]{(Hedges et al., 1992)}.
Under~\emph{H\textsubscript{0}}, the p-values follow a uniform
distribution from 0 to 1; therefore, the corresponding z-scores follow a
standard normal distribution. Using the probability density function of
the standard normal distribution, we can obtain the p-value of~\emph{Z}
and compare it against a significance level.
\section*{Acknowledgements}
{\label{778711}}
We would like to thank \selectlanguage{polish}Ž\selectlanguage{english}iga Avsec, Daniel Bader, Jun Cheng and Robert
Kopajtich for valuable discussions and manuscript revision. This study
was supported by the German Bundesministerium f\selectlanguage{ngerman}ür Bildung und Forschung
(BMBF) through the E-Rare project GENOMIT (01GM1603 and 01GM1207, H.P.
and T.M.), through the Juniorverbund in der Systemmedizin `mitOmics'
(FKZ 01ZX1405A J.G., L.W. and V.A.Y.M.) and the DZHK (German Centre for
Cardiovascular Research, L.S.K.). A Fellowship through the Graduate
School of Quantitative Biosciences Munich (QBM) supports V.A.Y.M.. H.P.
is supported by EU FP7 Mitochondrial European Educational Training
Project (317433). J.G. and H.P. are supported by EU Horizon2020
Collaborative Research Project SOUND (633974). We thank the Cell lines
and DNA Bank of Paediatric Movement Disorders and Mitochondrial Diseases
of the Telethon Genetic Biobank Network (GTB09003).
\selectlanguage{english}
\FloatBarrier
\section*{References}\sloppy
\phantomsection
\label{csl:2}Bhola, P.D., Letai, A., 2016. {Mitochondria-Judges and Executioners of Cell Death Sentences}. Molecular Cell 61, 695–704. \url{https://doi.org/10.1016/j.molcel.2016.02.019}
\phantomsection
\label{csl:5}Birsoy, K., Wang, T., Chen, W.W., Freinkman, E., Abu-Remaileh, M., Sabatini, D.M., 2015. {An Essential Role of the Mitochondrial Electron Transport Chain in Cell Proliferation Is to Enable Aspartate Synthesis}. Cell 162, 540–551. \url{https://doi.org/10.1016/j.cell.2015.07.016}
\phantomsection
\label{csl:16}Brand, M.D., Nicholls, D.G., 2011. {Assessing mitochondrial dysfunction in cells.}. The Biochemical journal 435, 297–312. \url{https://doi.org/10.1042/BJ20110162}
\phantomsection
\label{csl:14}Divakaruni, A.S., Paradyse, A., Ferrick, D.A., Murphy, A.N., Jastroch, M., 2014. {Analysis and interpretation of microplate-based oxygen consumption and pH data.}. Methods in enzymology 547, 309–54. \url{https://doi.org/10.1016/B978-0-12-801415-8.00016-3}
\phantomsection
\label{csl:31}Divakaruni, A.S., Paradyse, A., Ferrick, D.A., Murphy, A.N., Jastroch, M., 2014. {Analysis and interpretation of microplate-based oxygen consumption and pH data}, Methods in Enzymology. \url{https://doi.org/10.1016/B978-0-12-801415-8.00016-3}
\phantomsection
\label{csl:17}Dmitriev, R.I., Papkovsky, D.B., 2012. {Optical probes and techniques for O 2 measurement in live cells and tissue} 2025–2039. \url{https://doi.org/10.1007/s00018-011-0914-0}
\phantomsection
\label{csl:27}Dranka, B.P., Benavides, G.A., Diers, A.R., Giordano, S., Blake, R., Reily, C., Zou, L., Chatham, J.C., Hill, B.G., Landar, A., Darley-usmar, V.M., 2013. {Assessing bioenergetic function in response to oxidative stress by metabolic profiling} 51, 1621–1635. \url{https://doi.org/10.1016/j.freeradbiomed.2011.08.005.Assessing}
\phantomsection
\label{csl:24}Dranka, B.P., Hill, B.G., Darley-usmar, V.M., 2010. {Free Radical Biology \& Medicine Mitochondrial reserve capacity in endothelial cells : The impact of nitric oxide and reactive oxygen species}. Free Radical Biology and Medicine 48, 905–914. \url{https://doi.org/10.1016/j.freeradbiomed.2010.01.015}
\phantomsection
\label{csl:10}Dunham-Snary, K.J., Sandel, M.W., Westbrook, D.G., Ballinger, S.W., 2014. {Redox Biology A method for assessing mitochondrial bioenergetics in whole white adipose tissues}. Elsevier 2, 656–660. \url{https://doi.org/10.1016/j.redox.2014.04.005}
\phantomsection
\label{csl:15}Ferrick, D.A., Neilson, A., Beeson, C., 2008. {Advances in measuring cellular bioenergetics using extracellular flux} 13. \url{https://doi.org/10.1016/j.drudis.2007.12.008}
\phantomsection
\label{csl:20}Gerencser, A.A., Neilson, A., Choi, S.W., Edman, U., Yadava, N., Oh, R.J., Ferrick, D.A., Nicholls, D.G., Brand, M.D., 2009. {Quantitative Microplate-Based Respirometry with Correction for Oxygen Diffusion} 81, 6868–6878.
\phantomsection
\label{csl:1}Gorman, G.S., Chinnery, P.F., DiMauro, S., Hirano, M., Koga, Y., McFarland, R., Suomalainen, A., Thorburn, D.R., Zeviani, M., Turnbull, D.M., 2016. {Mitochondrial diseases}. Nature Reviews Disease Primers 2, 16080. \url{https://doi.org/10.1038/nrdp.2016.80}
\phantomsection
\label{csl:33}Hedges, L.V., Cooper, H., Bushman, B.J., 1992. {Testing the null hypothesis in meta-analysis: A comparison of combined probability and confidence interval procedures.}. Psychological Bulletin 111, 188–194. \url{https://doi.org/10.1037/0033-2909.111.1.188}
\phantomsection
\label{csl:19}Hill, B.G., Benavides, G.A., Jr, J.R.L., Ballinger, S., Italia, L.D., 2012. {Integration of cellular bioenergetics with mitochondrial quality control and autophagy} 393, 1485–1512. \url{https://doi.org/10.1515/hsz-2012-0198}
\phantomsection
\label{csl:29}Invernizzi, F., D'Amato, I., Jensen, P.B., Ravaglia, S., Zeviani, M., Tiranti, V., 2012. {Microscale oxygraphy reveals OXPHOS impairment in MRC mutant cells}. Mitochondrion 12, 328–335. \url{https://doi.org/10.1016/j.mito.2012.01.001}
\phantomsection
\label{csl:13}Koopman, M., Michels, H., Dancy, B.M., Kamble, R., Mouchiroud, L., Auwerx, J., Nollen, E.A.A., Houtkooper, R.H., 2016. {A screening-based platform for the assessment of cellular respiration in Caenorhabditis elegans}. Nature protocols 11, 1798–1816. \url{https://doi.org/10.1038/nprot.2016.106}
\phantomsection
\label{csl:21}Ribeiro, S.M., Giménez-cassina, A., Danial, N.N., 2015. {Chapter 6 Measurement of Mitochondrial Oxygen Consumption Rates in Mouse Primary Neurons and Astrocytes} 1241, 59–69. \url{https://doi.org/10.1007/978-1-4939-1875-1}
\phantomsection
\label{csl:28}Shah-Simpson, S., Pereira, C.F.A., Dumoulin, P.C., Caradonna, K.L., Burleigh, B.A., 2016. {Bioenergetic profiling of Trypanosoma cruzi life stages using Seahorse extracellular flux technology}. Molecular and Biochemical Parasitology 208, 91–95. \url{https://doi.org/10.1016/j.molbiopara.2016.07.001}
\phantomsection
\label{csl:6}Sullivan, L.B., Gui, D.Y., Hosios, A.M., Bush, L.N., Freinkman, E., {Vander Heiden}, M.G., 2015. {Supporting Aspartate Biosynthesis Is an Essential Function of Respiration in Proliferating Cells}. Cell 162, 552–563. \url{https://doi.org/10.1016/j.cell.2015.07.017}
\phantomsection
\label{csl:3}Sun, N., Youle, R.J., Finkel, T., 2016. {The Mitochondrial Basis of Aging}. Molecular Cell 61, 654–666. \url{https://doi.org/10.1016/j.molcel.2016.01.028}
\phantomsection
\label{csl:12}Titov, D.V., Cracan, V., Goodman, R.P., Peng, J., Grabarek, Z., Mootha, V.K., 2016. {Complementation of mitochondrial electron transport chain by manipulation of the NAD+/NADH ratio.}. Science (New York, N.Y.) 352, 231–235. \url{https://doi.org/10.1126/science.aad4017}
\phantomsection
\label{csl:4}Wallace, D.C., 2007. {Why do we still have a maternally inherited mitochondrial DNA? Insights from evolutionary medicine}. Annual Review of Biochemistry 76, 781–821. \url{https://doi.org/doi:10.1146/annurev.biochem.76.081205.150955}
\phantomsection
\label{csl:9}Wallace, D.C., 2012. {Mitochondria and cancer}. Nature Reviews Cancer 12, 685–698. \url{https://doi.org/10.1038/nrc3365}
\phantomsection
\label{csl:8}Weinberg, S.E., Sena, L.A., Chandel, N.S., 2015. {Mitochondria in the regulation of innate and adaptive immunity}. Immunity 42, 406–417. \url{https://doi.org/10.1016/j.immuni.2015.02.002}
\phantomsection
\label{csl:18}Wu, M., Neilson, A., Swift, A.L., Moran, R., Tamagnine, J., Parslow, D., Armistead, S., Lemire, K., Orrell, J., Teich, J., Chomicz, S., Ferrick, D.A., Wu, M., Neilson, A., Al, S., Moran, R., Tamagnine, J., Armistead, S., Lemire, K., Orrell, J., Teich, J., Chomicz, S., 2007. {Multiparameter metabolic analysis reveals a close link between attenuated mitochondrial bioenergetic function and enhanced glycolysis dependency in human tumor cells} 01862, 125–136. \url{https://doi.org/10.1152/ajpcell.00247.2006.}
\phantomsection
\label{csl:11}Yao, J., Irwin, R.W., Zhao, L., Nilsen, J., Hamilton, R.T., Brinton, R.D., 2009. {Mitochondrial bioenergetic deficit precedes Alzheimer ’ s pathology in female mouse model of Alzheimer ’ s disease}.
\phantomsection
\label{csl:25}Zhang, J., Nuebel, E., Wisidagama, D.R.R., Setoguchi, K., Hong, J.S., 2013. {Measuring energy metabolism in cultured cells, including human pluripotent stem cells and differentiated cells}. Nat Protocol 7. \url{https://doi.org/10.1038/nprot.2012.048.Measuring}
\phantomsection
\label{csl:26}Zhou, W., Choi, M., Margineantu, D., Margaretha, L., Hesson, J., Cavanaugh, C., Blau, C.A., Horwitz, M.S., Hockenbery, D., Ware, C., Ruohola-Baker, H., 2012. {HIF1$\alpha$ induced switch from bivalent to exclusively glycolytic metabolism during ESC-to-EpiSC/hESC transition.}. The EMBO journal 31, 2103–16. \url{https://doi.org/10.1038/emboj.2012.71}
\phantomsection
\label{csl:7}Zong, W.-X., Rabinowitz, J.D., White, E., 2016. {Mitochondria and Cancer}. Cell 166, 555–566. \url{https://doi.org/10.1016/j.cell.2016.07.002}
\phantomsection
\label{csl:32}{Agilent Technologies}, n.d. {Multi-File XF Report Generator User Guide}.
\phantomsection
\label{csl:22}{Seahorse Bioscience}, n.d. {XF Cell Mito Stress Test Kit}.
\phantomsection
\label{csl:23}{Seahorse Bioscience}, 2012. {XF Glycolysis Stress Test Kit - User Manual}.
\phantomsection
\label{csl:30}{Seahorse Bioscience}, n.d. {Application Note - Normalizing XF metabolic data to cellular or mitochondrial parameters} 1–4.
\end{document}