package gdsc.smlm.fitting.nonlinear.gradient;
import org.apache.commons.math3.special.Gamma;
import org.apache.commons.math3.util.FastMath;
import gdsc.smlm.function.NonLinearFunction;
/*-----------------------------------------------------------------------------
* GDSC SMLM Software
*
* Copyright (C) 2016 Alex Herbert
* Genome Damage and Stability Centre
* University of Sussex, UK
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*---------------------------------------------------------------------------*/
/**
* Calculates the Hessian matrix (the square matrix of second-order partial derivatives of a function)
* and the gradient vector of the function's partial first derivatives with respect to the parameters.
* This is used within the Levenberg-Marquardt method to fit a nonlinear model with coefficients (a) for a
* set of data points (x, y).
* <p>
* This calculator computes a modified Chi-squared expression to perform Maximum Likelihood Estimation assuming Poisson
* model. See Laurence & Chromy (2010) Efficient maximum likelihood estimator. Nature Methods 7, 338-339. The input data
* must be Poisson distributed for this to be relevant.
*/
public class MLEGradientCalculator extends GradientCalculator
{
private static double LOG_FOR_MIN = Math.log(Double.MIN_VALUE);
/**
* @param nparams
* The number of gradient parameters
*/
public MLEGradientCalculator(final int nparams)
{
super(nparams);
}
/**
* Note: if the function returns a negative value then it is set to zero
*
* @param y
* Data to fit (must be strictly positive Poisson data)
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int[], double[], double[],
* double[][], double[], gdsc.smlm.function.NonLinearFunction)
*/
public double findLinearised(final int[] x, final double[] y, final double[] a, final double[][] alpha,
final double[] beta, final NonLinearFunction func)
{
double chisq = 0;
final double[] dfi_da = new double[nparams];
zero(alpha, beta);
func.initialise(a);
for (int i = 0; i < x.length; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores any function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i, dfi_da);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
// We ignore this contribution to the gradient for stability
//compute(alpha, beta, dfi_da, Double.MIN_VALUE, xi);
}
else
{
// We assume y[i] is positive
if (xi == 0)
{
chisq += fi;
compute0(beta, dfi_da, fi);
}
else
{
chisq += (fi - xi - xi * Math.log(fi / xi));
compute(alpha, beta, dfi_da, fi, xi);
}
}
}
symmetric(alpha);
// Move the factor of 2 to the end
return checkGradients(alpha, beta, nparams, chisq * 2);
}
/**
* Note: if the function returns a negative value then it is set to zero
*
* @param y
* Data to fit (must be strictly positive Poisson data)
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int[], double[], double[],
* double[][], double[], gdsc.smlm.function.NonLinearFunction, boolean[])
*/
public double findLinearised(final int[] x, final double[] y, final double[] a, final double[][] alpha,
final double[] beta, final NonLinearFunction func, boolean[] ignore)
{
double chisq = 0;
final double[] dfi_da = new double[nparams];
zero(alpha, beta);
func.initialise(a);
int[] indices = new int[nparams];
int nnparams = 0;
for (int j = 0; j < nparams; j++)
{
if (ignore[j])
continue;
indices[nnparams++] = j;
}
for (int i = 0; i < x.length; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i, dfi_da);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
// We ignore this contribution to the gradient for stability
//compute(alpha, beta, dfi_da, Double.MIN_VALUE, xi);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
compute(alpha, beta, dfi_da, fi, xi, indices, nnparams);
}
}
symmetric(alpha);
// Move the factor of 2 to the end
return checkGradients(alpha, beta, nparams, chisq * 2);
}
/**
* @param y
* Data to fit (must be strictly positive Poisson data)
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int[], double[], double[], double[],
* gdsc.smlm.function.NonLinearFunction)
*/
public double findLinearised(final int[] x, final double[] y, double[] y_fit, final double[] a,
final NonLinearFunction func)
{
double chisq = 0;
func.initialise(a);
if (y_fit == null || y_fit.length < x.length)
{
for (int i = 0; i < x.length; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
}
}
}
else
{
for (int i = 0; i < x.length; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i);
y_fit[i] = fi;
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
}
}
}
// Move the factor of 2 to the end
return chisq * 2;
}
/**
* @param y
* Data to fit (must be strictly positive Poisson data)
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int, double[], double[], double[][],
* double[], gdsc.smlm.function.NonLinearFunction)
*/
public double findLinearised(final int n, final double[] y, final double[] a, final double[][] alpha,
final double[] beta, final NonLinearFunction func)
{
double chisq = 0;
final double[] dfi_da = new double[nparams];
zero(alpha, beta);
func.initialise(a);
for (int i = 0; i < n; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i, dfi_da);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
// We ignore this contribution to the gradient for stability
//compute(alpha, beta, dfi_da, Double.MIN_VALUE, xi);
}
else
{
// We assume y[i] is positive
if (xi == 0)
{
chisq += fi;
compute0(beta, dfi_da, fi);
}
else
{
chisq += (fi - xi - xi * Math.log(fi / xi));
compute(alpha, beta, dfi_da, fi, xi);
}
}
//checkGradients(alpha, beta, nparams, 0);
//if (isNaNGradients())
//{
// System.out.printf("Bad gradients generated: %s / %f : %s\n", Double.toString(fi), xi,
// Arrays.toString(dfi_da));
// return 0;
//}
}
symmetric(alpha);
// Move the factor of 2 to the end
return checkGradients(alpha, beta, nparams, chisq * 2);
}
/**
* @param y
* Data to fit (must be strictly positive Poisson data)
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int, double[], double[], double[][],
* double[], gdsc.smlm.function.NonLinearFunction, boolean[])
*/
public double findLinearised(final int n, final double[] y, final double[] a, final double[][] alpha,
final double[] beta, final NonLinearFunction func, boolean[] ignore)
{
double chisq = 0;
final double[] dfi_da = new double[nparams];
zero(alpha, beta);
func.initialise(a);
int[] indices = new int[nparams];
int nnparams = 0;
for (int j = 0; j < nparams; j++)
{
if (ignore[j])
continue;
indices[nnparams++] = j;
}
for (int i = 0; i < n; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i, dfi_da);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
// We ignore this contribution to the gradient for stability
//compute(alpha, beta, dfi_da, Double.MIN_VALUE, xi);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
compute(alpha, beta, dfi_da, fi, xi, indices, nnparams);
}
//checkGradients(alpha, beta, nparams, 0);
//if (isNaNGradients())
//{
// System.out.printf("Bad gradients generated: %s / %f : %s\n", Double.toString(fi), xi,
// Arrays.toString(dfi_da));
// return 0;
//}
}
symmetric(alpha);
// Move the factor of 2 to the end
return checkGradients(alpha, beta, nparams, chisq * 2);
}
/**
* Find linearised.
*
* @param n
* the n
* @param y
* Data to fit (must be strictly positive Poisson data)
* @param y_fit
* the y fit
* @param a
* the a
* @param func
* the func
* @return The MLE chi-squared value
*
* @see gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator#findLinearised(int, double[], double[], double[],
* gdsc.smlm.function.NonLinearFunction)
*/
public double findLinearised(final int n, final double[] y, double[] y_fit, final double[] a,
final NonLinearFunction func)
{
double chisq = 0;
func.initialise(a);
if (y_fit == null || y_fit.length < n)
{
for (int i = 0; i < n; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
}
}
}
else
{
for (int i = 0; i < n; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i);
y_fit[i] = fi;
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
}
}
}
// Move the factor of 2 to the end
return chisq * 2;
}
/**
* Zero the working region of the input matrix alpha and vector beta
*
* @param alpha
* the alpha
* @param beta
* the beta
*/
protected void zero(final double[][] alpha, final double[] beta)
{
for (int i = 0; i < nparams; i++)
{
beta[i] = 0;
for (int j = 0; j <= i; j++)
alpha[i][j] = 0;
}
}
/**
* Compute the matrix alpha and vector beta.
*
* @param alpha
* the alpha
* @param beta
* the beta
* @param dfi_da
* the gradient of the function with respect to each parameter a
* @param fi
* the function value at index i
* @param xi
* the data value at index i
*/
protected void compute(final double[][] alpha, final double[] beta, final double[] dfi_da, final double fi,
final double xi)
{
final double xi_fi = xi / fi;
final double xi_fi2 = xi_fi / fi;
final double e = 1 - (xi_fi);
// For + summation:
//final double e = (xi_fi) - 1;
// Compute:
// Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// alpha - the Hessian matrix (the square matrix of second-order partial derivatives of a function;
// that is, it describes the local curvature of a function of many variables.)
// beta - the gradient vector of the function's partial first derivatives with respect to the parameters
for (int k = 0; k < nparams; k++)
{
final double w = dfi_da[k] * xi_fi2;
for (int l = 0; l <= k; l++)
// This is the non-optimised version:
//alpha[k][l] += dfi_da[k] * dfi_da[l] * xi / (fi * fi);
alpha[k][l] += w * dfi_da[l];
// This is the non-optimised version:
//beta[k] -= (1 - xi / fi) * dfi_da[k];
beta[k] -= e * dfi_da[k];
// + summation:
//beta[k] += e * dfi_da[k];
}
}
/**
* Compute the matrix alpha and vector beta when the data value is zero.
*
* @param beta
* the beta
* @param dfi_da
* the gradient of the function with respect to each parameter a
* @param fi
* the function value at index i
*/
protected void compute0(final double[] beta, final double[] dfi_da, final double fi)
{
// Assume xi is zero. This removes most of the computation
for (int k = 0; k < nparams; k++)
{
beta[k] -= dfi_da[k];
}
}
/**
* Compute the matrix alpha and vector beta.
*
* @param alpha
* the alpha
* @param beta
* the beta
* @param dfi_da
* the gradient of the function with respect to each parameter a
* @param fi
* the function value at index i
* @param xi
* the data value at index i
* @param ignore
* An array of size beta.length. Set the index to true to ignore the gradients.
*/
protected void compute(final double[][] alpha, final double[] beta, final double[] dfi_da, final double fi,
final double xi, int[] indices, int nnparams)
{
final double xi_fi = xi / fi;
final double xi_fi2 = xi_fi / fi;
final double e = 1 - (xi_fi);
// Compute:
// Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// alpha - the Hessian matrix (the square matrix of second-order partial derivatives of a function;
// that is, it describes the local curvature of a function of many variables.)
// beta - the gradient vector of the function's partial first derivatives with respect to the parameters
for (int k = 0; k < nnparams; k++)
{
final double w = dfi_da[indices[k]] * xi_fi2;
for (int l = 0; l <= k; l++)
alpha[k][l] += w * dfi_da[indices[l]];
beta[k] -= e * dfi_da[k];
}
}
/**
* Generate a symmetric matrix alpha
*
* @param alpha
* the alpha
*/
protected void symmetric(final double[][] alpha)
{
for (int i = 0; i < nparams - 1; i++)
for (int j = i + 1; j < nparams; j++)
alpha[i][j] = alpha[j][i];
}
/**
* Evaluate the function and compute the MLE chi-squared value and the gradient with respect to the model
* parameters.
* <p>
* A call to {@link #isNaNGradients()} will indicate if the gradients were invalid.
*
* @param x
* n observations
* @param y
* Data to fit
* @param a
* Set of m coefficients
* @param df_da
* the gradient vector of the function's partial first derivatives with respect to the parameters (size
* m)
* @param func
* Non-linear fitting function
* @return The MLE chi-squared value
*/
public double evaluate(final int[] x, final double[] y, final double[] a, final double[] df_da,
final NonLinearFunction func)
{
double chisq = 0;
final double[] dfi_da = new double[nparams];
zero(df_da);
func.initialise(a);
for (int i = 0; i < x.length; i++)
{
// Function must produce a positive output.
final double xi = y[i];
// The code provided in Laurence & Chromy (2010) Nature Methods 7, 338-339, SI
// effectively ignores the function value below zero. This could lead to a
// situation where the best chisq value can be achieved by setting the output
// function to produce 0 for all evaluations. To cope with this we heavily
// penalise the chisq value.
// Optimally the function should be bounded to always produce a positive number.
final double fi = func.eval(i, dfi_da);
if (fi <= 0)
{
// We assume xi is positive
if (xi != 0)
// Penalise the chi-squared value by assuming fi is a very small positive value
chisq += (-xi - xi * LOG_FOR_MIN);
//// Note: We ignore this contribution to the gradient for stability
//final double e = 1 - (xi / Double.MIN_VALUE);
//for (int k = 0; k < nparams; k++)
//{
// df_da[k] += e * dfi_da[k];
//}
}
else
{
// We assume y[i] is positive
if (xi == 0)
chisq += fi;
else
chisq += (fi - xi - xi * Math.log(fi / xi));
final double e = 1 - (xi / fi);
// Compute:
// Laurence & Chromy (2010) Nature Methods 7, 338-339, SI, Equation 6
// df_da - the gradient vector of the function's partial first derivatives with respect to the parameters
for (int k = 0; k < nparams; k++)
{
df_da[k] += e * dfi_da[k];
}
}
}
checkGradients(df_da, nparams);
// Move the factor of 2 to the end
for (int j = 0; j < nparams; j++)
df_da[j] *= 2;
return chisq * 2;
}
/**
* Get the Poisson likelihood of value x given the mean. The mean must be strictly positive. x must be positive.
*
* @param u
* the mean
* @param x
* the x
* @return the likelihood
*/
public static double likelihood(double u, double x)
{
if (x == 0)
return FastMath.exp(-u);
return Math.pow(u, x) * FastMath.exp(-u) / factorial(x);
}
private static double factorial(double k)
{
if (k <= 1)
return 1;
return Gamma.gamma(k + 1);
}
/**
* Get the Poisson log likelihood of value x given the mean. The mean must be strictly positive. x must be positive.
*
* @param u
* the mean
* @param x
* the x
* @return the log likelihood
*/
public static double logLikelihood(double u, double x)
{
if (x == 0)
return -u;
return x * Math.log(u) - u - logFactorial(x);
}
private static double logFactorial(double k)
{
if (k <= 1)
return 0;
return Gamma.logGamma(k + 1);
}
/**
* Compute the Poisson log likelihood for the data.
*
* @param x
* the data
* @param a
* the parameters for the function
* @param func
* the function (must evaluate to strictly positive for all the data points)
* @return the Poisson log likelihood
*/
public double logLikelihood(final double[] x, final double[] a, final NonLinearFunction func)
{
double ll = 0;
func.initialise(a);
for (int i = 0; i < x.length; i++)
{
ll += logLikelihood(func.eval(i), x[i]);
}
return ll;
}
}