package gdsc.smlm.fitting;
import java.util.Arrays;
import org.apache.commons.math3.analysis.MultivariateFunction;
import org.apache.commons.math3.analysis.MultivariateMatrixFunction;
import org.apache.commons.math3.analysis.MultivariateVectorFunction;
import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.TooManyEvaluationsException;
import org.apache.commons.math3.exception.TooManyIterationsException;
import org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder;
import org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum;
import org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem;
import org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer;
import org.apache.commons.math3.linear.DiagonalMatrix;
import org.apache.commons.math3.optim.ConvergenceChecker;
import org.apache.commons.math3.optim.InitialGuess;
import org.apache.commons.math3.optim.MaxEval;
import org.apache.commons.math3.optim.OptimizationData;
import org.apache.commons.math3.optim.PointValuePair;
import org.apache.commons.math3.optim.SimpleBounds;
import org.apache.commons.math3.optim.SimpleValueChecker;
import org.apache.commons.math3.optim.nonlinear.scalar.GoalType;
import org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunction;
import org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CMAESOptimizer;
import org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CustomPowellOptimizer;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.util.FastMath;
import gdsc.core.logging.Logger;
import gdsc.core.logging.NullLogger;
/*-----------------------------------------------------------------------------
* GDSC SMLM Software
*
* Copyright (C) 2013 Alex Herbert
* Genome Damage and Stability Centre
* University of Sussex, UK
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*---------------------------------------------------------------------------*/
import gdsc.core.utils.Maths;
import gdsc.core.utils.Sort;
/**
* Perform curve fitting on a cumulative histogram of the mean-squared displacement (MSD) per second to calculate the
* diffusion coefficient of molecules (in um^2/s). The MSD is also known as the Jump Distance, i.e. how far a molecule
* moves when being tracked.
* <p>
* Based on the paper: Weimann, L., Ganzinger, K.A., McColl, J., Irvine, K.L., Davis, S.J., Gay, N.J., Bryant, C.E.,
* Klenerman, D. (2013) A Quantitative Comparison of Single-Dye Tracking Analysis Tools Using Monte Carlo Simulations.
* PLoS One 8, Issue 5, e64287
*/
public class JumpDistanceAnalysis
{
private final boolean DEBUG_OPTIMISER = false;
final static double THIRD = 1.0 / 3.0;
private double s2 = 0;
private boolean msdCorrection = false;
private int n = 0;
private double deltaT = 0;
private boolean calibrated = false;
public interface CurveLogger
{
/**
* Get the number of points to use for the curve between the minimum and maximum exclusive. The size of the
* curve arrays will be this value plus 1 (to include the maximum).
*
* @return The number of points to use for the curve between the minimum and maximum exclusive
*/
int getNumberOfCurvePoints();
/**
* Called with the best fit curve using a single population
*
* @param curve
*/
void saveSinglePopulationCurve(double[][] curve);
/**
* Called with the best fit curve using a mixed population
*
* @param curve
*/
void saveMixedPopulationCurve(double[][] curve);
}
private final Logger logger;
private CurveLogger curveLogger;
private int fitRestarts = 3;
private double minFraction = 0.1;
private double minDifference = 2;
private double minD = 1e-6;
private int minN = 1, maxN = 10;
// Set by the last call to the doFit functions
private double ss, ll, ic;
// Set by any public fit call
private double lastIC;
public JumpDistanceAnalysis()
{
this(null);
}
/**
* @param logger
* Used to write status messages on the fitting
*/
public JumpDistanceAnalysis(Logger logger)
{
if (logger == null)
logger = new NullLogger();
this.logger = logger;
resetFitResult();
}
private void resetFitResult()
{
lastIC = Double.NaN;
}
/**
* Fit the jump distances using a fit to a cumulative histogram.
* <p>
* The histogram is fit repeatedly using a mixed population model with increasing number of different molecules.
* Results are sorted by the diffusion coefficient ascending. This process is stopped when: the information
* criterion does not improve; the fraction of one of the populations is below the min fraction; the difference
* between two consecutive diffusion coefficients is below the min difference.
* <p>
* The number of populations must be obtained from the size of the D/fractions arrays.
*
* @param jumpDistances
* The jump distances (in um^2)
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistances(double... jumpDistances)
{
resetFitResult();
if (jumpDistances == null || jumpDistances.length == 0)
return null;
final double meanJumpDistance = Maths.sum(jumpDistances) / jumpDistances.length;
if (meanJumpDistance == 0)
return null;
double[][] jdHistogram = cumulativeHistogram(jumpDistances);
return fitJumpDistanceHistogram(meanJumpDistance, jdHistogram);
}
/**
* Fit the jump distance histogram using a cumulative sum.
* <p>
* The histogram is fit repeatedly using a mixed population model with increasing number of different molecules.
* Results are sorted by the diffusion coefficient ascending. This process is stopped when: the information
* criterion does not improve; the fraction of one of the populations is below the min fraction; the difference
* between two consecutive diffusion coefficients is below the min difference.
* <p>
* The number of populations must be obtained from the size of the D/fractions arrays.
*
* @param jumpDistances
* The mean jump distance (in um^2)
* @param jdHistogram
* The cumulative jump distance histogram. X-axis is um^2, Y-axis is cumulative probability. Must be
* monototic ascending.
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistanceHistogram(double meanJumpDistance, double[][] jdHistogram)
{
resetFitResult();
// Guess the D
final double estimatedD = meanJumpDistance / 4;
if (meanJumpDistance == 0)
return null;
logger.info("Estimated D = %s um^2", Maths.rounded(estimatedD, 4));
double[] ic = new double[maxN];
Arrays.fill(ic, Double.POSITIVE_INFINITY);
double[][] coefficients = new double[maxN][];
double[][] fractions = new double[maxN][];
double bestIC = Double.POSITIVE_INFINITY;
int best = -1;
if (minN == 1)
{
double[][] fit = doFitJumpDistanceHistogram(jdHistogram, estimatedD, 1);
if (fit != null)
{
coefficients[0] = fit[0];
fractions[0] = fit[1];
ic[0] = this.ic;
saveFitCurve(fit, jdHistogram);
bestIC = ic[0];
best = 0;
}
}
// Fit using a mixed population model.
// Vary n from 2 to N. Stop when the fit fails or the fit is worse.
int bestMulti = -1;
double bestMultiIC = Double.POSITIVE_INFINITY;
for (int n = Math.max(1, minN - 1); n < maxN; n++)
{
double[][] fit = doFitJumpDistanceHistogram(jdHistogram, estimatedD, n + 1);
if (fit == null)
break;
coefficients[n] = fit[0];
fractions[n] = fit[1];
ic[n] = this.ic;
// Store the best model
if (bestIC > ic[n])
{
bestIC = ic[n];
best = n;
}
// Stop if not improving
if (bestMultiIC < ic[n])
break;
bestMultiIC = ic[n];
bestMulti = n;
}
// Add the best fit to the plot
if (bestMulti > -1)
saveFitCurve(new double[][] { coefficients[bestMulti], fractions[bestMulti] }, jdHistogram);
if (best > -1)
{
logger.info("Best fit achieved using %d population%s: %s, Fractions = %s", best + 1, (best == 0) ? "" : "s",
formatD(coefficients[best]), format(fractions[best]));
lastIC = bestIC;
return new double[][] { coefficients[best], fractions[best] };
}
return null;
}
/**
* Fit the jump distances using a fit to a cumulative histogram with the given number of species.
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jumpDistances
* The jump distances (in um^2)
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistances(double[] jumpDistances, int n)
{
resetFitResult();
if (jumpDistances == null || jumpDistances.length == 0)
return null;
final double meanJumpDistance = Maths.sum(jumpDistances) / jumpDistances.length;
if (meanJumpDistance == 0)
return null;
double[][] jdHistogram = cumulativeHistogram(jumpDistances);
return fitJumpDistanceHistogram(meanJumpDistance, jdHistogram, n);
}
/**
* Fit the jump distance histogram using a cumulative sum with the given number of species.
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jumpDistances
* The mean jump distance (in um^2)
* @param jdHistogram
* The cumulative jump distance histogram. X-axis is um^2, Y-axis is cumulative probability. Must be
* monototic ascending.
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistanceHistogram(double meanJumpDistance, double[][] jdHistogram, int n)
{
resetFitResult();
// Guess the D
final double estimatedD = meanJumpDistance / 4;
if (meanJumpDistance == 0)
return null;
logger.info("Estimated D = %s um^2", Maths.rounded(estimatedD, 4));
double[][] fit = doFitJumpDistanceHistogram(jdHistogram, estimatedD, n);
if (fit != null)
saveFitCurve(fit, jdHistogram);
return fit;
}
/**
* Fit the jump distance histogram using a cumulative sum with the given number of species.
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jdHistogram
* The cumulative jump distance histogram. X-axis is um^2, Y-axis is cumulative probability. Must be
* monototic ascending.
* @param estimatedD
* The estimated diffusion coefficient
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
private double[][] doFitJumpDistanceHistogram(double[][] jdHistogram, double estimatedD, int n)
{
calibrated = isCalibrated();
if (n == 1)
{
// Fit using a single population model
LevenbergMarquardtOptimizer lvmOptimizer = new LevenbergMarquardtOptimizer();
try
{
final JumpDistanceCumulFunction function = new JumpDistanceCumulFunction(jdHistogram[0], jdHistogram[1],
estimatedD);
//@formatter:off
LeastSquaresProblem problem = new LeastSquaresBuilder()
.maxEvaluations(Integer.MAX_VALUE)
.maxIterations(3000)
.start(function.guess())
.target(function.getY())
.weight(new DiagonalMatrix(function.getWeights()))
.model(function, new MultivariateMatrixFunction() {
public double[][] value(double[] point) throws IllegalArgumentException
{
return function.jacobian(point);
}} )
.build();
//@formatter:on
Optimum lvmSolution = lvmOptimizer.optimize(problem);
double[] fitParams = lvmSolution.getPoint().toArray();
// True for an unweighted fit
ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
//ss = calculateSumOfSquares(function.getY(), function.value(fitParams));
lastIC = ic = Maths.getAkaikeInformationCriterionFromResiduals(ss, function.x.length, 1);
double[] coefficients = fitParams;
double[] fractions = new double[] { 1 };
logger.info("Fit Jump distance (N=1) : %s, SS = %s, IC = %s (%d evaluations)", formatD(fitParams[0]),
Maths.rounded(ss, 4), Maths.rounded(ic, 4), lvmSolution.getEvaluations());
return new double[][] { coefficients, fractions };
}
catch (TooManyIterationsException e)
{
logger.info("LVM optimiser failed to fit (N=1) : Too many iterations : %s", e.getMessage());
}
catch (ConvergenceException e)
{
logger.info("LVM optimiser failed to fit (N=1) : %s", e.getMessage());
}
}
// Uses a weighted sum of n exponential functions, each function models a fraction of the particles.
// An LVM fit cannot restrict the parameters so the fractions do not go below zero.
// Use the CustomPowell/CMEASOptimizer which supports bounded fitting.
MixedJumpDistanceCumulFunctionMultivariate function = new MixedJumpDistanceCumulFunctionMultivariate(
jdHistogram[0], jdHistogram[1], estimatedD, n);
double[] lB = function.getLowerBounds();
int evaluations = 0;
PointValuePair constrainedSolution = null;
MaxEval maxEval = new MaxEval(20000);
CustomPowellOptimizer powellOptimizer = createCustomPowellOptimizer();
try
{
// The Powell algorithm can use more general bounds: 0 - Infinity
constrainedSolution = powellOptimizer.optimize(maxEval, new ObjectiveFunction(function),
new InitialGuess(function.guess()),
new SimpleBounds(lB, function.getUpperBounds(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)),
new CustomPowellOptimizer.BasisStep(function.step()), GoalType.MINIMIZE);
evaluations = powellOptimizer.getEvaluations();
logger.debug("Powell optimiser fit (N=%d) : SS = %f (%d evaluations)", n, constrainedSolution.getValue(),
evaluations);
}
catch (TooManyEvaluationsException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : Too many evaluations (%d)", n,
powellOptimizer.getEvaluations());
}
catch (TooManyIterationsException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : Too many iterations (%d)", n,
powellOptimizer.getIterations());
}
catch (ConvergenceException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : %s", n, e.getMessage());
}
if (constrainedSolution == null)
{
logger.info("Trying CMAES optimiser with restarts ...");
double[] uB = function.getUpperBounds();
SimpleBounds bounds = new SimpleBounds(lB, uB);
// The sigma determines the search range for the variables. It should be 1/3 of the initial search region.
double[] s = new double[lB.length];
for (int i = 0; i < s.length; i++)
s[i] = (uB[i] - lB[i]) / 3;
OptimizationData sigma = new CMAESOptimizer.Sigma(s);
OptimizationData popSize = new CMAESOptimizer.PopulationSize(
(int) (4 + Math.floor(3 * Math.log(function.x.length))));
// Iterate this for stability in the initial guess
CMAESOptimizer cmaesOptimizer = createCMAESOptimizer();
for (int i = 0; i <= fitRestarts; i++)
{
// Try from the initial guess
try
{
PointValuePair solution = cmaesOptimizer.optimize(new InitialGuess(function.guess()),
new ObjectiveFunction(function), GoalType.MINIMIZE, bounds, sigma, popSize, maxEval);
if (constrainedSolution == null || solution.getValue() < constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.debug("CMAES optimiser [%da] fit (N=%d) : SS = %f (%d evaluations)", i, n,
solution.getValue(), evaluations);
}
}
catch (TooManyEvaluationsException e)
{
}
if (constrainedSolution == null)
continue;
// Try from the current optimum
try
{
PointValuePair solution = cmaesOptimizer.optimize(
new InitialGuess(constrainedSolution.getPointRef()), new ObjectiveFunction(function),
GoalType.MINIMIZE, bounds, sigma, popSize, maxEval);
if (solution.getValue() < constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.debug("CMAES optimiser [%db] fit (N=%d) : SS = %f (%d evaluations)", i, n,
solution.getValue(), evaluations);
}
}
catch (TooManyEvaluationsException e)
{
}
}
if (constrainedSolution != null)
{
// Re-optimise with Powell?
try
{
PointValuePair solution = powellOptimizer.optimize(maxEval, new ObjectiveFunction(function),
new InitialGuess(constrainedSolution.getPointRef()),
new SimpleBounds(lB,
function.getUpperBounds(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)),
new CustomPowellOptimizer.BasisStep(function.step()), GoalType.MINIMIZE);
if (solution.getValue() < constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.info("Powell optimiser re-fit (N=%d) : SS = %f (%d evaluations)", n,
constrainedSolution.getValue(), evaluations);
}
}
catch (TooManyEvaluationsException e)
{
}
catch (TooManyIterationsException e)
{
}
catch (ConvergenceException e)
{
}
}
}
if (constrainedSolution == null)
{
logger.info("Failed to fit N=%d", n);
return null;
}
double[] fitParams = constrainedSolution.getPointRef();
ss = constrainedSolution.getValue();
// TODO - Try a bounded BFGS optimiser
// Try and improve using a LVM fit
final MixedJumpDistanceCumulFunctionGradient functionGradient = new MixedJumpDistanceCumulFunctionGradient(
jdHistogram[0], jdHistogram[1], estimatedD, n);
Optimum lvmSolution;
LevenbergMarquardtOptimizer lvmOptimizer = new LevenbergMarquardtOptimizer();
try
{
//@formatter:off
LeastSquaresProblem problem = new LeastSquaresBuilder()
.maxEvaluations(Integer.MAX_VALUE)
.maxIterations(3000)
.start(fitParams)
.target(functionGradient.getY())
.weight(new DiagonalMatrix(functionGradient.getWeights()))
.model(functionGradient, new MultivariateMatrixFunction() {
public double[][] value(double[] point) throws IllegalArgumentException
{
return functionGradient.jacobian(point);
}} )
.build();
//@formatter:on
lvmSolution = lvmOptimizer.optimize(problem);
// True for an unweighted fit
double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
//double ss = calculateSumOfSquares(functionGradient.getY(), functionGradient.value(lvmSolution.getPoint().toArray()));
// All fitted parameters must be above zero
if (ss < this.ss && Maths.min(lvmSolution.getPoint().toArray()) > 0)
{
logger.info(" Re-fitting improved the SS from %s to %s (-%s%%)", Maths.rounded(this.ss, 4),
Maths.rounded(ss, 4), Maths.rounded(100 * (this.ss - ss) / this.ss, 4));
fitParams = lvmSolution.getPoint().toArray();
this.ss = ss;
evaluations += lvmSolution.getEvaluations();
}
}
catch (TooManyIterationsException e)
{
logger.error("Failed to re-fit : Too many iterations : %s", e.getMessage());
}
catch (ConvergenceException e)
{
logger.error("Failed to re-fit : %s", e.getMessage());
}
// Since the fractions must sum to one we subtract 1 degree of freedom from the number of parameters
ic = Maths.getAkaikeInformationCriterionFromResiduals(ss, function.x.length, fitParams.length - 1);
double[] d = new double[n];
double[] f = new double[n];
double sum = 0;
for (int i = 0; i < d.length; i++)
{
f[i] = fitParams[i * 2];
sum += f[i];
d[i] = fitParams[i * 2 + 1];
}
for (int i = 0; i < f.length; i++)
f[i] /= sum;
// Sort by coefficient size
sort(d, f);
double[] coefficients = d;
double[] fractions = f;
logger.info("Fit Jump distance (N=%d) : %s (%s), SS = %s, IC = %s (%d evaluations)", n, formatD(d), format(f),
Maths.rounded(ss, 4), Maths.rounded(ic, 4), evaluations);
if (isValid(d, f))
{
lastIC = ic;
return new double[][] { coefficients, fractions };
}
return null;
}
private boolean isValid(double[] d, double[] f)
{
for (int i = 0; i < f.length; i++)
{
// Check the fractions and coefficients exist
if (f[i] <= 0)
{
logger.debug("Fraction is zero");
return false;
}
if (d[i] <= 0)
{
logger.debug("Coefficient is zero");
return false;
}
// Check the fit has fractions above the minimum fraction
if (f[i] < minFraction)
{
logger.debug("Fraction is less than the minimum fraction: %s < %s", Maths.rounded(f[i]),
Maths.rounded(minFraction));
return false;
}
// Check the coefficients are different
if (i + 1 < f.length && d[i] / d[i + 1] < minDifference)
{
logger.debug("Coefficients are not different: %s / %s = %s < %s", Maths.rounded(d[i]),
Maths.rounded(d[i + 1]), Maths.rounded(d[i] / d[i + 1]), Maths.rounded(minDifference));
return false;
}
}
return true;
}
/**
* Fit the jump distances using a maximum likelihood estimation.
* <p>
* The data is fit repeatedly using a mixed population model with increasing number of different molecules. Results
* are sorted by the diffusion coefficient ascending. This process is stopped when: the likelihood does not improve;
* the fraction of one of the populations is below the min fraction; the difference between two consecutive
* diffusion coefficients is below the min difference.
* <p>
* The number of populations must be obtained from the size of the D/fractions arrays.
*
* @param jumpDistances
* The jump distances (in um^2)
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistancesMLE(double[] jumpDistances)
{
return fitJumpDistancesMLE(jumpDistances, null);
}
/**
* Fit the jump distances using a maximum likelihood estimation.
* <p>
* The data is fit repeatedly using a mixed population model with increasing number of different molecules. Results
* are sorted by the diffusion coefficient ascending. This process is stopped when: the likelihood does not improve;
* the fraction of one of the populations is below the min fraction; the difference between two consecutive
* diffusion coefficients is below the min difference.
* <p>
* The number of populations must be obtained from the size of the D/fractions arrays.
*
* @param jumpDistances
* The jump distances (in um^2)
* @param jdHistogram
* The jump distance histogram for the given distances. If null will be computed using
* {@link #cumulativeHistogram(double[])}. Only used if the CurveLogger is not null.
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistancesMLE(double[] jumpDistances, double[][] jdHistogram)
{
resetFitResult();
if (jumpDistances == null || jumpDistances.length == 0)
return null;
final double meanJumpDistance = Maths.sum(jumpDistances) / jumpDistances.length;
if (meanJumpDistance == 0)
return null;
// Guess the D
final double estimatedD = meanJumpDistance / 4;
logger.info("Estimated D = %s um^2", Maths.rounded(estimatedD, 4));
// Used for saving fitted the curve
if (curveLogger != null && jdHistogram == null)
jdHistogram = cumulativeHistogram(jumpDistances);
double[] ic = new double[maxN];
Arrays.fill(ic, Double.POSITIVE_INFINITY);
double[][] coefficients = new double[maxN][];
double[][] fractions = new double[maxN][];
double bestIC = Double.POSITIVE_INFINITY;
int best = -1;
if (minN == 1)
{
double[][] fit = doFitJumpDistancesMLE(jumpDistances, estimatedD, 1);
if (fit != null)
{
coefficients[0] = fit[0];
fractions[0] = fit[1];
ic[0] = this.ic;
saveFitCurve(fit, jdHistogram);
bestIC = ic[0];
best = 0;
}
}
// Fit using a mixed population model.
// Vary n from 2 to N. Stop when the fit fails or the fit is worse.
int bestMulti = -1;
double bestMultiIC = Double.POSITIVE_INFINITY;
for (int n = Math.max(1, minN - 1); n < maxN; n++)
{
double[][] fit = doFitJumpDistancesMLE(jumpDistances, estimatedD, n + 1);
if (fit == null)
break;
coefficients[n] = fit[0];
fractions[n] = fit[1];
ic[n] = this.ic;
// Store the best model
if (bestIC > ic[n])
{
bestIC = ic[n];
best = n;
}
// Stop if not improving
if (bestMultiIC < ic[n])
break;
bestMultiIC = ic[n];
bestMulti = n;
}
// Add the best fit to the plot
if (bestMulti > -1)
saveFitCurve(new double[][] { coefficients[bestMulti], fractions[bestMulti] }, jdHistogram);
if (best > -1)
{
logger.info("Best fit achieved using %d population%s: %s, Fractions = %s", best + 1, (best == 0) ? "" : "s",
formatD(coefficients[best]), format(fractions[best]));
lastIC = bestIC;
return new double[][] { coefficients[best], fractions[best] };
}
return null;
}
/**
* Fit the jump distances using a maximum likelihood estimation with the given number of species.
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jumpDistances
* The jump distances (in um^2)
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistancesMLE(double[] jumpDistances, int n)
{
return fitJumpDistancesMLE(jumpDistances, null, n);
}
/**
* Fit the jump distances using a maximum likelihood estimation with the given number of species.
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jumpDistances
* The jump distances (in um^2)
* @param jdHistogram
* The jump distance histogram for the given distances. If null will be computed using
* {@link #cumulativeHistogram(double[])}. Only used if the CurveLogger is not null.
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
public double[][] fitJumpDistancesMLE(double[] jumpDistances, double[][] jdHistogram, int n)
{
resetFitResult();
if (jumpDistances == null || jumpDistances.length == 0)
return null;
final double meanJumpDistance = Maths.sum(jumpDistances) / jumpDistances.length;
if (meanJumpDistance == 0)
return null;
// Guess the D
final double estimatedD = meanJumpDistance / 4;
logger.info("Estimated D = %s um^2", Maths.rounded(estimatedD, 4));
// Used for saving fitted the curve
if (curveLogger != null && jdHistogram == null)
jdHistogram = cumulativeHistogram(jumpDistances);
double[][] fit = doFitJumpDistancesMLE(jumpDistances, estimatedD, n);
if (fit != null)
saveFitCurve(fit, jdHistogram);
return fit;
}
/**
* Fit the jump distances using a maximum likelihood estimation with the given number of species.
* | *
* <p>
* Results are sorted by the diffusion coefficient ascending.
*
* @param jumpDistances
* The jump distances (in um^2)
* @param estimatedD
* The estimated diffusion coefficient
* @param n
* The number of species in the mixed population
* @return Array containing: { D (um^2), Fractions }. Can be null if no fit was made.
*/
private double[][] doFitJumpDistancesMLE(double[] jumpDistances, double estimatedD, int n)
{
MaxEval maxEval = new MaxEval(20000);
CustomPowellOptimizer powellOptimizer = createCustomPowellOptimizer();
calibrated = isCalibrated();
if (n == 1)
{
try
{
final JumpDistanceFunction function = new JumpDistanceFunction(jumpDistances, estimatedD);
// The Powell algorithm can use more general bounds: 0 - Infinity
SimpleBounds bounds = new SimpleBounds(function.getLowerBounds(),
function.getUpperBounds(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
PointValuePair solution = powellOptimizer.optimize(maxEval, new ObjectiveFunction(function),
new InitialGuess(function.guess()), bounds,
new CustomPowellOptimizer.BasisStep(function.step()), GoalType.MAXIMIZE);
double[] fitParams = solution.getPointRef();
ll = solution.getValue();
lastIC = ic = Maths.getAkaikeInformationCriterion(ll, jumpDistances.length, 1);
double[] coefficients = fitParams;
double[] fractions = new double[] { 1 };
logger.info("Fit Jump distance (N=1) : %s, MLE = %s, IC = %s (%d evaluations)", formatD(fitParams[0]),
Maths.rounded(ll, 4), Maths.rounded(ic, 4), powellOptimizer.getEvaluations());
return new double[][] { coefficients, fractions };
}
catch (TooManyEvaluationsException e)
{
logger.info("Powell optimiser failed to fit (N=1) : Too many evaluation (%d)",
powellOptimizer.getEvaluations());
}
catch (TooManyIterationsException e)
{
logger.info("Powell optimiser failed to fit (N=1) : Too many iterations (%d)",
powellOptimizer.getIterations());
}
catch (ConvergenceException e)
{
logger.info("Powell optimiser failed to fit (N=1) : %s", e.getMessage());
}
return null;
}
MixedJumpDistanceFunction function = new MixedJumpDistanceFunction(jumpDistances, estimatedD, n);
double[] lB = function.getLowerBounds();
int evaluations = 0;
PointValuePair constrainedSolution = null;
try
{
// The Powell algorithm can use more general bounds: 0 - Infinity
constrainedSolution = powellOptimizer.optimize(maxEval, new ObjectiveFunction(function),
new InitialGuess(function.guess()),
new SimpleBounds(lB, function.getUpperBounds(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)),
new CustomPowellOptimizer.BasisStep(function.step()), GoalType.MAXIMIZE);
evaluations = powellOptimizer.getEvaluations();
logger.debug("Powell optimiser fit (N=%d) : MLE = %f (%d evaluations)", n, constrainedSolution.getValue(),
powellOptimizer.getEvaluations());
}
catch (TooManyEvaluationsException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : Too many evaluation (%d)", n,
powellOptimizer.getEvaluations());
}
catch (TooManyIterationsException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : Too many iterations (%d)", n,
powellOptimizer.getIterations());
}
catch (ConvergenceException e)
{
logger.info("Powell optimiser failed to fit (N=%d) : %s", n, e.getMessage());
}
if (constrainedSolution == null)
{
logger.info("Trying CMAES optimiser with restarts ...");
double[] uB = function.getUpperBounds();
SimpleBounds bounds = new SimpleBounds(lB, uB);
// Try a bounded CMAES optimiser since the Powell optimiser appears to be
// sensitive to the order of the parameters. It is not good when the fast particle
// is the minority fraction. Could this be due to too low an upper bound?
// The sigma determines the search range for the variables. It should be 1/3 of the initial search region.
double[] s = new double[lB.length];
for (int i = 0; i < s.length; i++)
s[i] = (uB[i] - lB[i]) / 3;
OptimizationData sigma = new CMAESOptimizer.Sigma(s);
OptimizationData popSize = new CMAESOptimizer.PopulationSize(
(int) (4 + Math.floor(3 * Math.log(function.x.length))));
// Iterate this for stability in the initial guess
CMAESOptimizer cmaesOptimizer = createCMAESOptimizer();
for (int i = 0; i <= fitRestarts; i++)
{
// Try from the initial guess
try
{
PointValuePair solution = cmaesOptimizer.optimize(new InitialGuess(function.guess()),
new ObjectiveFunction(function), GoalType.MAXIMIZE, bounds, sigma, popSize, maxEval);
if (constrainedSolution == null || solution.getValue() > constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.debug("CMAES optimiser [%da] fit (N=%d) : MLE = %f (%d evaluations)", i, n,
solution.getValue(), evaluations);
}
}
catch (TooManyEvaluationsException e)
{
}
if (constrainedSolution == null)
continue;
// Try from the current optimum
try
{
PointValuePair solution = cmaesOptimizer.optimize(
new InitialGuess(constrainedSolution.getPointRef()), new ObjectiveFunction(function),
GoalType.MAXIMIZE, bounds, sigma, popSize, maxEval);
if (solution.getValue() > constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.debug("CMAES optimiser [%db] fit (N=%d) : MLE = %f (%d evaluations)", i, n,
solution.getValue(), evaluations);
}
}
catch (TooManyEvaluationsException e)
{
}
}
if (constrainedSolution != null)
{
try
{
// Re-optimise with Powell?
PointValuePair solution = powellOptimizer.optimize(maxEval, new ObjectiveFunction(function),
new InitialGuess(constrainedSolution.getPointRef()),
new SimpleBounds(lB,
function.getUpperBounds(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)),
new CustomPowellOptimizer.BasisStep(function.step()), GoalType.MAXIMIZE);
if (solution.getValue() > constrainedSolution.getValue())
{
evaluations = cmaesOptimizer.getEvaluations();
constrainedSolution = solution;
logger.info("Powell optimiser re-fit (N=%d) : MLE = %f (%d evaluations)", n,
constrainedSolution.getValue(), powellOptimizer.getEvaluations());
}
}
catch (TooManyEvaluationsException e)
{
}
catch (TooManyIterationsException e)
{
}
catch (ConvergenceException e)
{
}
}
}
if (constrainedSolution == null)
{
logger.info("Failed to fit N=%d", n);
return null;
}
double[] fitParams = constrainedSolution.getPointRef();
ll = constrainedSolution.getValue();
// Since the fractions must sum to one we subtract 1 degree of freedom from the number of parameters
ic = Maths.getAkaikeInformationCriterion(ll, jumpDistances.length, fitParams.length - 1);
double[] d = new double[n];
double[] f = new double[n];
double sum = 0;
for (int i = 0; i < d.length; i++)
{
f[i] = fitParams[i * 2];
sum += f[i];
d[i] = fitParams[i * 2 + 1];
}
for (int i = 0; i < f.length; i++)
f[i] /= sum;
// Sort by coefficient size
sort(d, f);
double[] coefficients = d;
double[] fractions = f;
logger.info("Fit Jump distance (N=%d) : %s (%s), MLE = %s, IC = %s (%d evaluations)", n, formatD(d), format(f),
Maths.rounded(ll, 4), Maths.rounded(ic, 4), evaluations);
if (isValid(d, f))
{
lastIC = ic;
return new double[][] { coefficients, fractions };
}
return null;
}
private CustomPowellOptimizer createCustomPowellOptimizer()
{
double rel = 1e-8;
double abs = 1e-10;
//double lineRel = rel;
//double lineAbs = abs;
ConvergenceChecker<PointValuePair> positionChecker = null;
// new org.apache.commons.math3.optim.PositionChecker(1e-3, 1e-10);
boolean basisConvergence = false;
//return new CustomPowellOptimizer(rel, abs, lineRel, lineAbs, checker, basisConvergence);
return new CustomPowellOptimizer(rel, abs, positionChecker, basisConvergence);
}
private CMAESOptimizer createCMAESOptimizer()
{
double rel = 1e-8;
double abs = 1e-10;
int maxIterations = 2000;
double stopFitness = 0; //Double.NEGATIVE_INFINITY;
boolean isActiveCMA = true;
int diagonalOnly = 20;
int checkFeasableCount = 1;
RandomGenerator random = new Well19937c();
boolean generateStatistics = false;
ConvergenceChecker<PointValuePair> checker = new SimpleValueChecker(rel, abs);
// Iterate this for stability in the initial guess
return new CMAESOptimizer(maxIterations, stopFitness, isActiveCMA, diagonalOnly, checkFeasableCount, random,
generateStatistics, checker);
}
/**
* Format the diffusion coefficients for reporting using the calibration if present
*
* @param jumpD
* @return The formatted D
*/
private String formatD(double... jumpD)
{
if (jumpD == null || jumpD.length == 0)
return "";
StringBuilder sb = new StringBuilder("D = ");
for (int i = 0; i < jumpD.length; i++)
{
if (i != 0)
sb.append(", ");
sb.append(Maths.rounded(jumpD[i], 4));
}
sb.append(" um^2");
if (calibrated)
{
jumpD = calculateApparentDiffusionCoefficient(jumpD);
sb.append(", D* = ");
for (int i = 0; i < jumpD.length; i++)
{
if (i != 0)
sb.append(", ");
sb.append(Maths.rounded(jumpD[i], 4));
}
sb.append(" um^2/s");
}
return sb.toString();
}
private String format(double[] data)
{
if (data == null || data.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < data.length; i++)
{
if (i != 0)
sb.append(", ");
sb.append(Maths.rounded(data[i], 4));
}
return sb.toString();
}
/**
* Sort the arrays by the size of the diffusion coefficient
*
* @param d
* The diffusion coefficient array
* @param f
* The fraction of the population array
*/
public static void sort(double[] d, double[] f)
{
// Sort by coefficient size
int[] indices = new int[f.length];
for (int i = 0; i < f.length; i++)
{
indices[i] = i;
}
Sort.sort(indices, d);
double[] d2 = Arrays.copyOf(d, d.length);
double[] f2 = Arrays.copyOf(f, f.length);
for (int i = 0; i < f.length; i++)
{
d[i] = d2[indices[i]];
f[i] = f2[indices[i]];
}
}
private void saveFitCurve(double[][] fit, double[][] jdHistogram)
{
if (fit[0].length == 1)
saveFitCurve(fit[0], jdHistogram);
else
{
double[] params = new double[fit[0].length * 2];
for (int i = 0; i < fit[0].length; i++)
{
params[i * 2] = fit[1][i];
params[i * 2 + 1] = fit[0][i];
}
saveFitCurve(params, jdHistogram);
}
}
private void saveFitCurve(double[] params, double[][] jdHistogram)
{
if (curveLogger == null)
return;
final int nPoints = curveLogger.getNumberOfCurvePoints();
if (nPoints <= 1)
return;
Function function;
if (params.length == 1)
function = new JumpDistanceCumulFunction(null, null, 0);
else
function = new MixedJumpDistanceCumulFunction(null, null, 0, params.length / 2);
final double max = jdHistogram[0][jdHistogram[0].length - 1];
final double interval = max / nPoints;
final double[] x = new double[nPoints + 1];
final double[] y = new double[nPoints + 1];
for (int i = 0; i < nPoints; i++)
{
x[i] = i * interval;
y[i] = function.evaluate(x[i], params);
}
x[nPoints] = max;
y[nPoints] = function.evaluate(max, params);
if (params.length == 1)
curveLogger.saveSinglePopulationCurve(new double[][] { x, y });
else
curveLogger.saveMixedPopulationCurve(new double[][] { x, y });
}
@SuppressWarnings("unused")
private double calculateSumOfSquares(double[] obs, double[] exp)
{
double ss = 0;
for (int i = 0; i < obs.length; i++)
ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
return ss;
}
/**
* Function used for least-squares fitting of cumulative histogram of jump distances.
*/
public abstract class Function
{
double[] x, y;
double estimatedD;
int n;
public Function(double[] x, double[] y, double estimatedD, int n)
{
this.x = x;
this.y = y;
this.estimatedD = estimatedD;
this.n = n;
}
/**
* @return An estimate for the parameters
*/
public double[] guess()
{
if (n == 1)
return new double[] { estimatedD };
double[] guess = new double[n * 2];
double d = estimatedD;
for (int i = 0; i < n; i++)
{
// Fraction are all equal
guess[i * 2] = 1;
// Diffusion coefficient gets smaller for each fraction
guess[i * 2 + 1] = d;
d *= 0.1;
}
return guess;
}
/**
* @return An estimate for the initial search step for the parameters
*/
public double[] step()
{
if (n == 1)
return new double[] { estimatedD * 0.5 };
double[] step = new double[n * 2];
double d = estimatedD;
for (int i = 0; i < n; i++)
{
// Fraction are all equal
step[i * 2] = 0.1;
// Diffusion coefficient gets smaller for each fraction
step[i * 2 + 1] = d * 0.5;
d *= 0.1;
}
return step;
}
public double[] getUpperBounds()
{
// Fraction guess is 1 so set the upper limit as 10
// Diffusion coefficient could be 10x the estimated
return getUpperBounds(10, estimatedD * 10);
}
public double[] getUpperBounds(double fractionLimit, double dLimit)
{
if (n == 1)
return new double[] { dLimit };
double[] bounds = new double[n * 2];
for (int i = 0; i < n; i++)
{
bounds[i * 2] = fractionLimit;
bounds[i * 2 + 1] = dLimit;
}
return bounds;
}
public double[] getLowerBounds()
{
return getLowerBounds(0, 0);
}
public double[] getLowerBounds(double fractionLimit, double dLimit)
{
// Diffusion coefficient could be 0 but this is not practical for
// testing a mixed population so set to a small value where the optimiser
// will not be successfully anyway
fractionLimit = Math.max(fractionLimit, 1e-5);
dLimit = Math.max(dLimit, 1e-16);
if (n == 1)
return new double[] { dLimit };
double[] bounds = new double[n * 2];
for (int i = 0; i < n; i++)
{
bounds[i * 2] = fractionLimit;
bounds[i * 2 + 1] = dLimit;
}
return bounds;
}
public double[] getWeights()
{
double[] w = new double[x.length];
Arrays.fill(w, 1);
return w;
}
public double[] getX()
{
return x;
}
public double[] getY()
{
return y;
}
public abstract double evaluate(double x, double[] parameters);
public double[][] jacobian(double[] variables)
{
double[][] jacobian = new double[x.length][variables.length];
final double delta = 0.001;
double[] d = new double[variables.length];
double[][] variables2 = new double[variables.length][];
for (int i = 0; i < variables.length; i++)
{
d[i] = delta * Math.abs(variables[i]); // Should the delta be changed for each parameter ?
variables2[i] = Arrays.copyOf(variables, variables.length);
variables2[i][i] += d[i];
}
for (int i = 0; i < jacobian.length; ++i)
{
double value = evaluate(x[i], variables);
for (int j = 0; j < variables.length; j++)
{
double value2 = evaluate(x[i], variables2[j]);
jacobian[i][j] = (value2 - value) / d[j];
}
}
return jacobian;
}
}
/**
* Compute the probability of mean-squared distance x given a diffusion coefficient.
* <p>
* Function used for maximum likelihood fitting.
*/
public class JumpDistanceFunction extends Function implements MultivariateFunction
{
public JumpDistanceFunction(double[] x, double estimatedD)
{
super(x, null, estimatedD, 1);
}
// Adapted from http://commons.apache.org/proper/commons-math/userguide/optimization.html
public double evaluate(double x, double[] params)
{
// Compute the probability:
// p = 1/4D * exp(-x/4D)
//final double fourD = 4 * getD(params[0]);
final double fourD = 4 * params[0];
return FastMath.exp(-x / fourD) / fourD;
}
public double[] evaluateAll(double[] params)
{
// Compute the probability:
// p = 1/4D * exp(-x/4D)
final double[] values = new double[x.length];
//final double one_fourD = 1 / (4 * getD(params[0]));
final double one_fourD = 1 / (4 * params[0]);
for (int i = 0; i < values.length; i++)
{
values[i] = one_fourD * FastMath.exp(-x[i] * one_fourD);
}
return values;
}
/*
* (non-Javadoc)
*
* @see org.apache.commons.math3.analysis.MultivariateFunction#value(double[])
*/
public double value(double[] variables)
{
// Compute the log-likelihood:
// log(p) = log(1/4D * exp(-x/4D))
// = log(1/4D) + log(exp(-x/4D))
// = log(1/4D) + -x/4D
double l = 0;
//final double one_fourD = 1 / (4 * getD(variables[0]));
final double one_fourD = 1 / (4 * variables[0]);
for (int i = 0; i < x.length; i++)
{
l += -x[i] * one_fourD;
}
l += Math.log(one_fourD) * x.length;
// Debug the call from the optimiser
if (DEBUG_OPTIMISER)
{
System.out.printf("[1] : [%f] = %f\n", variables[0], l);
}
return l;
}
// This has not been tested. It could be used for LVM fitting of the p-values. However MLE
// is less sensitive to outliers of p-values.
// /*
// * (non-Javadoc)
// *
// * @see gdsc.smlm.fitting.JumpDistanceAnalysis.Function#jacobian(double[])
// */
// public double[][] jacobian(double[] variables)
// {
// // Compute the gradients using calculus differentiation:
// // y = 1/4D * exp(-x/4D)
// // y = aa * a
// // aa = 1/4D
// // a = exp(b)
// // b = -x / 4D
// //
// // y' = aa' * a + aa * a'
// // aa' = -1/4D^2
// // a' = exp(b) * b'
// // b' = -1 * -x / 4D^2 = x / 4D^2
// // y' = -1/4D^2 * exp(-x/4D) + 1/4D * exp(-x/4D) * x / 4D^2
// // = 1/4D * exp(-x/4D) * (-1/D + x / 4D^2)
//
// final double d = variables[0];
// final double fourD = 4 * d;
// final double aa = 1 / fourD;
// final double cc = aa / d;
// final double c = -1 / d;
// double[][] jacobian = new double[x.length][variables.length];
//
// for (int i = 0; i < jacobian.length; ++i)
// {
// jacobian[i][0] = aa * FastMath.exp(-x[i] * aa) * (c + x[i] * cc);
// }
//
// //// Check numerically ...
// //double[][] jacobian2 = super.jacobian(variables);
// //for (int i = 0; i < jacobian.length; i++)
// //{
// // System.out.printf("dD = %g : %g = %g\n", jacobian[i][0], jacobian2[i][0],
// // DoubleEquality.relativeError(jacobian[i][0], jacobian2[i][0]));
// //}
//
// return jacobian;
// }
}
/**
* Compute the probability of mean-squared distance being within x given a diffusion coefficient.
* <p>
* Function used for least-squares fitting of cumulative histogram of jump distances.
*/
public class JumpDistanceCumulFunction extends Function implements MultivariateVectorFunction
{
public JumpDistanceCumulFunction(double[] x, double[] y, double estimatedD)
{
super(x, y, estimatedD, 1);
}
// Adapted from http://commons.apache.org/proper/commons-math/userguide/optimization.html
public double evaluate(double x, double[] params)
{
return 1 - FastMath.exp(-x / (4 * params[0]));
}
/*
* (non-Javadoc)
*
* @see org.apache.commons.math3.analysis.MultivariateVectorFunction#value(double[])
*/
public double[] value(double[] variables)
{
final double[] values = new double[x.length];
final double fourD = 4 * variables[0];
for (int i = 0; i < values.length; i++)
{
values[i] = 1 - FastMath.exp(-x[i] / fourD);
}
return values;
}
/*
* (non-Javadoc)
*
* @see gdsc.smlm.fitting.JumpDistanceAnalysis.Function#jacobian(double[])
*/
public double[][] jacobian(double[] variables)
{
// Compute the gradients using calculus differentiation:
// y = 1 - a
// a = exp(b)
// b = -x / 4D
//
// y' = -a'
// a' = exp(b) * b'
// b' = -1 * -x / 4D^2 = x / 4D^2
// y' = -exp(b) * x / 4D^2
// = -a * -b / D
// = a * b / D
// = exp(b) * b / D
final double d = variables[0];
final double fourD = 4 * d;
final double[][] jacobian = new double[x.length][variables.length];
for (int i = 0; i < jacobian.length; ++i)
{
final double b = -x[i] / fourD;
jacobian[i][0] = FastMath.exp(b) * b / d;
}
//// Check numerically ...
//double[][] jacobian2 = super.jacobian(variables);
//for (int i = 0; i < jacobian.length; i++)
//{
// System.out.printf("dD = %g : %g = %g\n", jacobian[i][0], jacobian2[i][0],
// DoubleEquality.relativeError(jacobian[i][0], jacobian2[i][0]));
//}
return jacobian;
}
}
/**
* Compute the probability of mean-squared distance x given a mixed population with set fractions and
* diffusion coefficients.
* <p>
* Function used for maximum likelihood fitting.
*/
public class MixedJumpDistanceFunction extends Function implements MultivariateFunction
{
public MixedJumpDistanceFunction(double[] x, double estimatedD, int n)
{
super(x, null, estimatedD, n);
}
public double evaluate(double x, double[] params)
{
// Compute the probability:
// p = sum [ Fj/4Dj * exp(-x/4Dj) ]
double sum = 0;
double total = 0;
for (int i = 0; i < n; i++)
{
//final double f = getF(params[i * 2]);
//final double fourD = 4 * getD(params[i * 2 + 1]);
final double f = params[i * 2];
final double fourD = 4 * params[i * 2 + 1];
sum += (f / fourD) * FastMath.exp(-x / fourD);
total += f;
}
return sum / total;
}
public double[] evaluateAll(double[] params)
{
double total = 0;
final double[] f_d = new double[n];
for (int i = 0; i < n; i++)
{
//f_d[i] = getF(params[i * 2]);
f_d[i] = params[i * 2];
total += f_d[i];
}
final double[] fourD = new double[n];
for (int i = 0; i < n; i++)
{
//fourD[i] = 4 * getD(params[i * 2 + 1]);
fourD[i] = 4 * params[i * 2 + 1];
f_d[i] = (f_d[i] / total) / fourD[i];
}
// Compute the probability:
// p = sum [ Fj/4Dj * exp(-x/4Dj) ]
final double[] values = new double[x.length];
for (int i = 0; i < x.length; i++)
{
double sum = 0;
for (int j = 0; j < n; j++)
{
sum += f_d[j] * FastMath.exp(-x[i] / fourD[j]);
}
values[i] = sum;
}
return values;
}
/*
* (non-Javadoc)
*
* @see org.apache.commons.math3.analysis.MultivariateFunction#value(double[])
*/
public double value(double[] params)
{
// Compute the log-likelihood
double l = 0;
for (final double p : evaluateAll(params))
{
l += Math.log(p);
}
// Debug the call from the optimiser
if (DEBUG_OPTIMISER)
{
double[] F = new double[n];
double[] D = new double[n];
for (int i = 0; i < n; i++)
{
F[i] = params[i * 2];
D[i] = params[i * 2 + 1];
}
System.out.printf("%s : %s = %f\n", Arrays.toString(F), Arrays.toString(D), l);
}
return l;
}
}
/**
* Compute the probability of mean-squared distance being within x given a mixed population with set fractions and
* diffusion coefficients.
* <p>
* Function used for least-squares fitting of cumulative histogram of jump distances.
*/
public class MixedJumpDistanceCumulFunction extends Function
{
public MixedJumpDistanceCumulFunction(double[] x, double[] y, double estimatedD, int n)
{
super(x, y, estimatedD, n);
}
public double evaluate(double x, double[] params)
{
double sum = 0;
double total = 0;
for (int i = 0; i < n; i++)
{
final double f = params[i * 2];
sum += f * FastMath.exp(-x / (4 * params[i * 2 + 1]));
total += f;
}
return 1 - sum / total;
}
public double[] getValue(double[] variables)
{
double total = 0;
for (int i = 0; i < n; i++)
{
total += variables[i * 2];
}
final double[] fourD = new double[n];
final double[] f = new double[n];
for (int i = 0; i < n; i++)
{
f[i] = variables[i * 2] / total;
fourD[i] = 4 * variables[i * 2 + 1];
}
final double[] values = new double[x.length];
for (int i = 0; i < values.length; i++)
{
double sum = 0;
for (int j = 0; j < n; j++)
{
sum += f[j] * FastMath.exp(-x[i] / fourD[j]);
}
values[i] = 1 - sum;
}
return values;
}
}
/**
* Compute the probability of mean-squared distance being within x given a mixed population with set fractions and
* diffusion coefficients.
* <p>
* Function used for least-squares fitting of cumulative histogram of jump distances.
*/
public class MixedJumpDistanceCumulFunctionGradient extends MixedJumpDistanceCumulFunction
implements MultivariateVectorFunction
{
public MixedJumpDistanceCumulFunctionGradient(double[] x, double[] y, double estimatedD, int n)
{
super(x, y, estimatedD, n);
}
/*
* (non-Javadoc)
*
* @see org.apache.commons.math3.analysis.MultivariateVectorFunction#value(double[])
*/
public double[] value(double[] point) throws IllegalArgumentException
{
return getValue(point);
}
/*
* (non-Javadoc)
*
* @see gdsc.smlm.fitting.JumpDistanceAnalysis.Function#jacobian(double[])
*/
public double[][] jacobian(double[] variables)
{
// Compute the gradients using calculus differentiation:
// y = 1 - sum(a)
// The sum is over n components of the following function
// a = f * exp(b)
// b = -x / 4D
// Each function contributes a fraction f:
// f = fj / sum_j(f)
// The gradient is the sum of the individual gradients. The diffusion coefficient is only
// used per component. The fraction is used in all, either with the fraction as the
// numerator (A) or part of the denominator (B)
// E.G.
// f(A) = A / (A+B+C)
// Quotient rule: f = g / h => f' = (g'h - gh') / h^2
// f'(A) = ((A+B+C) - A) / (A+B+C)^2
// = (B+C) / (A+B+C)^2
// = (sum(f) - f) / sum(f)^2
// f'(B) = -A / (A+B+C)^2
// = -f / sum(f)^2
// Differentiate with respect to D is easier:
// y' = -a'
// a' = f * exp(b) * b'
// b' = -1 * -x / 4D^2 = x / 4D^2
// y' = f * -exp(b) * x / 4D^2
// = f * -a * -b / D
// = f * a * b / D
// = f * exp(b) * b / D
final double[] fourD = new double[n];
final double[] f = new double[n];
double total = 0;
for (int i = 0; i < n; i++)
{
f[i] = variables[i * 2];
fourD[i] = 4 * variables[i * 2 + 1];
total += f[i];
}
final double[] fraction = new double[n];
final double[] total_f = new double[n];
final double[] f_total = new double[n];
for (int i = 0; i < n; i++)
{
fraction[i] = f[i] / total;
// Because we use y = 1 - sum(a) all coefficients are inverted
total_f[i] = -1 * (total - f[i]) / (total * total);
f_total[i] = -1 * -f[i] / (total * total);
}
final double[][] jacobian = new double[x.length][variables.length];
double[] b = new double[n];
for (int i = 0; i < x.length; ++i)
{
for (int j = 0; j < n; j++)
b[j] = -x[i] / fourD[j];
for (int j = 0; j < n; j++)
{
// Gradient for the diffusion coefficient
jacobian[i][j * 2 + 1] = fraction[j] * FastMath.exp(b[j]) * b[j] / variables[j * 2 + 1];
// Gradient for the fraction f
jacobian[i][j * 2] = total_f[j] * FastMath.exp(b[j]);
for (int k = 0; k < n; k++)
{
if (j == k)
continue;
jacobian[i][j * 2] += f_total[k] * FastMath.exp(b[k]);
}
}
}
//// Check numerically ...
//double[][] jacobian2 = super.jacobian(variables);
//for (int i = 0; i < jacobian.length; i++)
//{
// StringBuilder sb = new StringBuilder();
// for (int j = 0; j < jacobian[i].length; j++)
// {
// sb.append(" d").append(j).append(" = ").append(jacobian[i][j]).append(" : ")
// .append(jacobian2[i][j]).append(" = ")
// .append(DoubleEquality.relativeError(jacobian[i][j], jacobian2[i][j]));
// }
// System.out.println(sb.toString());
//}
return jacobian;
}
}
/**
* Compute the probability of mean-squared distance being within x given a mixed population with set fractions and
* diffusion coefficients.
* <p>
* Function used for least-squares fitting of cumulative histogram of jump distances.
*/
public class MixedJumpDistanceCumulFunctionMultivariate extends MixedJumpDistanceCumulFunction
implements MultivariateFunction
{
public MixedJumpDistanceCumulFunctionMultivariate(double[] x, double[] y, double estimatedD, int n)
{
super(x, y, estimatedD, n);
}
/*
* (non-Javadoc)
*
* @see org.apache.commons.math3.analysis.MultivariateFunction#value(double[])
*/
public double value(double[] parameters)
{
final double[] obs = getValue(parameters);
// Optimise the sum of squares
double ss = 0;
for (int i = x.length; i-- > 0;)
{
final double dx = y[i] - obs[i];
ss += dx * dx;
}
// Debug the call from the optimiser
if (DEBUG_OPTIMISER)
{
double[] F = new double[n];
double[] D = new double[n];
for (int i = 0; i < n; i++)
{
F[i] = parameters[i * 2];
D[i] = parameters[i * 2 + 1];
}
System.out.printf("%s : %s = %f\n", Arrays.toString(F), Arrays.toString(D), ss);
}
return ss;
}
}
/**
* @return The number restarts for fitting
*/
public int getFitRestarts()
{
return fitRestarts;
}
/**
* @param fitRestarts
* The number restarts for fitting
*/
public void setFitRestarts(int fitRestarts)
{
this.fitRestarts = fitRestarts;
}
/**
* @return the min fraction for each population in a mixed population
*/
public double getMinFraction()
{
return minFraction;
}
/**
* @param minFraction
* the min fraction for each population in a mixed population
*/
public void setMinFraction(double minFraction)
{
this.minFraction = minFraction;
}
/**
* @return the min difference between diffusion coefficients in a mixed population
*/
public double getMinDifference()
{
return minDifference;
}
/**
* @param minDifference
* the min difference between diffusion coefficients in a mixed population
*/
public void setMinDifference(double minDifference)
{
this.minDifference = minDifference;
}
/**
* @return the minimum number of different molecules to fit in a mixed population model
*/
public int getMinN()
{
return minN;
}
/**
* @param n
* the minimum number of different molecules to fit in a mixed population model
*/
public void setMinN(int n)
{
if (n < 1)
n = 1;
minN = n;
}
/**
* @return the maximum number of different molecules to fit in a mixed population model
*/
public int getMaxN()
{
return maxN;
}
/**
* @param n
* the maximum number of different molecules to fit in a mixed population model
*/
public void setMaxN(int n)
{
if (n < 1)
n = 1;
maxN = n;
}
/**
* @param the
* curve logger used to store the best fit curves
*/
public void setCurveLogger(CurveLogger curveLogger)
{
this.curveLogger = curveLogger;
}
/**
* Get the cumulative jump distance histogram given a set of jump distance values
*
* @param values
* The jump distances
* @return The JD cumulative histogram as two arrays: { MSD, CumulativeProbability }
*/
public static double[][] cumulativeHistogram(double[] values)
{
return Maths.cumulativeHistogram(values, true);
}
/**
* @param d
* @return Return d or minD whichever is larger
*/
public double getD(double d)
{
return (d < minD) ? minD : d;
}
/**
* @param f
* @return Return f or minFraction whichever is larger
*/
public double getF(double f)
{
return (f < minFraction) ? minFraction : f;
}
/**
* @return the minimum diffusion coefficient
*/
public double getMinD()
{
return minD;
}
/**
* @param minD
* the minimum diffusion coefficient
*/
public void setMinD(double minD)
{
this.minD = minD;
}
/**
* Get the conversion factor to convert an observed mean-squared distance (MSD) between n frames into the actual
* MSD.
* <p>
* Note that diffusion of a molecule within a frame means that the position of the molecule is an average within the
* frame. This leads to condensation of the observed distance travelled by the particle between two frames. The
* start and end frame locations have condensed diffusion within the frame to a single point. This condensation has
* the effect of reducing the effective time that diffusion occured in the start and end frame. The observed MSD can
* be converted to the corrected MSD by applying a factor:
*
* <pre>
* observed = actual * (n - 1/3) / n
* actual = observed * n / (n - 1/3)
* </pre>
*
* Note this is only valid for n>=1
*
* @param n
* @return
*/
public static double getConversionfactor(int n)
{
return (double) n / (n - THIRD);
}
/**
* Get the conversion factor to convert an observed mean-squared distance (MSD) between n frames into the actual
* MSD.
* <p>
* Note that diffusion of a molecule within a frame means that the position of the molecule is an average within the
* frame. This leads to condensation of the observed distance travelled by the particle between two frames. The
* start and end frame locations have condensed diffusion within the frame to a single point. This condensation has
* the effect of reducing the effective time that diffusion occured in the start and end frame and the total number
* of frames should be reduced by 1/3.
* <p>
* In the event that n is less than 1 the two frames overlap. Consequently there is interference where some of the
* same molecule positions have been used to compute the average start and end location within the frames. This must
* be modelled using a different formula.
* <p>
* Simulations using multiple simulation steps within each frame were used to compute the MSD at different frame
* separation intervals. These curves were compared to the expected MSD for the simulated diffusion coefficient to
* produce a correction factor curve. This was fitted for n>=1 and n<1. The observed MSD can be converted to the
* corrected MSD by applying a factor:
*
* <pre>
* n>=1:
* observed = actual * (n - 1/3) / n
* actual = observed * n / (n - 1/3)
*
* n<1:
* observed = actual * (n - n*n / 3)
* actual = observed / (n - n*n / 3)
* </pre>
*
* Note this is valid for n>=0
*
* @param n
* @return
*/
public static double getConversionfactor(double n)
{
if (n > 1)
return n / (n - THIRD);
if (n > 0)
return 1 / (n - n * n / 3.0);
return 0;
}
/**
* Get the corrected time between n frames for an observed mean-squared distance (MSD).
* <p>
* Note that diffusion of a molecule within a frame means that the position of the molecule is an average within the
* frame. This leads to condensation of the observed distance travelled by the particle between two frames. The
* start and end frame locations have condensed diffusion within the frame to a single point. This condensation has
* the effect of reducing the effective time that diffusion occured in the start and end frame and the total number
* of frames should be reduced by 1/3.
*
* <pre>
* corrected frames = n - 1/3
* </pre>
*
* Note this is only valid for n>=1
*
* @param n
* @return
*/
public static double getCorrectedTime(int n)
{
return n - THIRD;
}
/**
* Convert an observed mean-squared distance (MSD) between n frames into the actual MSD.
* <p>
* Note that diffusion of a molecule within a frame means that the position of the molecule is an average within the
* frame. This leads to condensation of the observed distance travelled by the particle between two frames. The
* start and end frame locations have condensed diffusion within the frame to a single point. This condensation has
* the effect of reducing the effective time that diffusion occured in the start and end frame and the total number
* of frames should be reduced by 1/3. The observed MSD can be converted to the corrected MSD by applying a factor:
*
* <pre>
* observed = actual * (n - 1/3) / n
* actual = observed * n / (n - 1/3)
* </pre>
*
* Note this is only valid for n>=1
*
* @param msd
* The observed MSD
* @param n
* The number of frames separating the start and end points for the MSD
* @return The actual MSD
*/
public static double convertObservedToActual(double msd, int n)
{
return msd * n / (n - THIRD);
}
/**
* Convert an actual mean-squared distance (MSD) between n frames into the observed MSD.
* <p>
* Note that diffusion of a molecule within a frame means that the position of the molecule is an average within the
* frame. This leads to condensation of the observed distance travelled by the particle between two frames. The
* start and end frame locations have condensed diffusion within the frame to a single point. This condensation has
* the effect of reducing the effective time that diffusion occured in the start and end frame and the total number
* of frames should be reduced by 1/3. The observed MSD can be converted to the corrected MSD by applying a factor:
*
* <pre>
* observed = actual * (n - 1/3) / n
* actual = observed * n / (n - 1/3)
* </pre>
*
* Note this is only valid for n>=1
*
* @param msd
* The actual MSD
* @param n
* The number of frames separating the start and end points for the MSD
* @return The actual MSD
*/
public static double convertActualToObserved(double msd, int n)
{
return msd * (n - THIRD) / n;
}
/**
* @return The localisation error (s) of the start and end coordinates of the jump (in um)
*/
public double getError()
{
return Math.sqrt(s2);
}
/**
* Set the localisation error (s) of the start and end coordinates of the jump. The error is used to compute the
* apparent diffusion coefficient: D* = D - s^2
*
* @param error
* The error
* @param nm
* True if the error is in nm (default is um)
*/
public void setError(double error, boolean nm)
{
if (nm)
error /= 1000;
this.s2 = error * error;
}
/**
* @return True if correcting MSD between frames
*/
public boolean isMsdCorrection()
{
return msdCorrection;
}
/**
* Set to true to correct the diffusion coefficient to the apparent diffusion coefficient by compensating for the
* averaging of diffusion with a time frame into a single location.
*
* @param msdCorrection
* True if correcting MSD between frames
*/
public void setMsdCorrection(boolean msdCorrection)
{
this.msdCorrection = msdCorrection;
}
/**
* @return The number of frames between the start and end coordinates of the jump
*/
public int getN()
{
return n;
}
/**
* Set the number of frames between the start and end coordinates of the jump
*
* @param n
* The number of frames (must be strictly positive)
*/
public void setN(int n)
{
this.n = n;
}
/**
* @return The time difference between each frame
*/
public double getDeltaT()
{
return deltaT;
}
/**
* Set the time difference between each frame. The total time is {@link #getDeltaT()} * {@link #getN()}
*
* @param deltaT
* The time difference
*/
public void setDeltaT(double deltaT)
{
this.deltaT = deltaT;
}
/**
* @return True if the number of frames and time delta have been set
*/
public boolean isCalibrated()
{
return deltaT > 0 && n > 0;
}
/**
* Convert the diffusion coefficients (um^2/jump) into apparent diffusion coefficients (um^2/s) if the time and
* frames are calibrated:
*
* <pre>
* D* = factor * (D - s2) / (n * deltaT)
* </pre>
*
* where factor is the conversion factor to increase the MSD to correct for diffusion within the frame
*
* @param d
* The fitted diffusion coefficients D (in um^2)
* @return The apparent diffusion coefficients D* (in um^2/s)
*/
public double[] calculateApparentDiffusionCoefficient(double... d)
{
if (isCalibrated())
{
d = d.clone();
final double f = ((msdCorrection) ? getConversionfactor(n) : 1) / (n * deltaT);
for (int i = 0; i < d.length; i++)
{
d[i] = Math.max(0, d[i] - s2) * f;
}
}
return d;
}
/**
* @return The information criterion from the last successful fit
*/
public double getInformationCriterion()
{
return lastIC;
}
}