package gdsc.smlm.fitting.nonlinear.gradient; import gdsc.smlm.function.NonLinearFunction; /*----------------------------------------------------------------------------- * GDSC SMLM Software * * Copyright (C) 2013 Alex Herbert * Genome Damage and Stability Centre * University of Sussex, UK * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. *---------------------------------------------------------------------------*/ /** * Calculates the Hessian matrix (the square matrix of second-order partial derivatives of a function) * and the scaled gradient vector of the function's partial first derivatives with respect to the parameters. * This is used within the Levenberg-Marquardt method to fit a nonlinear model with coefficients (a) for a * set of data points (x, y). * <p> * Note that the Hessian matrix is scaled by 1/2 and the gradient vector is scaled by -1/2 for convenience in solving * the non-linear model. See Numerical Recipes in C++, 2nd Ed. Equation 15.5.8 for Nonlinear Models. */ public class GradientCalculator { public final int nparams; private boolean badGradients; /** * @param nparams * The number of gradient parameters */ public GradientCalculator(final int nparams) { this.nparams = nparams; } /** * Evaluate the function and compute the sum-of-squares and the curvature matrix. * <p> * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param x * n observations * @param y * Data to fit * @param a * Set of m coefficients * @param alpha * the scaled Hessian curvature matrix (size m*m) * @param beta * the scaled gradient vector of the function's partial first derivatives with respect to the parameters * (size m) * @param func * Non-linear fitting function * @return The sum-of-squares value for the fit */ public double findLinearised(final int[] x, final double[] y, final double[] a, final double[][] alpha, final double[] beta, final NonLinearFunction func) { double ssx = 0; final double[] dy_da = new double[a.length]; for (int i = 0; i < nparams; i++) { beta[i] = 0; for (int j = 0; j <= i; j++) alpha[i][j] = 0; } func.initialise(a); if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i], dy_da, w); final double weight = getWeight(w[0]); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nparams; j++) { final double wgt = dy_da[j] * weight; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[k]; beta[j] += wgt * dy; } ssx += dy * dy * weight; } } else { for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i], dy_da); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nparams; j++) { final double wgt = dy_da[j]; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[k]; beta[j] += wgt * dy; } ssx += dy * dy; } } // Generate symmetric matrix for (int i = 0; i < nparams - 1; i++) for (int j = i + 1; j < nparams; j++) alpha[i][j] = alpha[j][i]; return checkGradients(alpha, beta, nparams, ssx); } /** * Evaluate the function and compute the sum-of-squares and the curvature matrix. * <p> * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * <p> * Allows disabling the use of gradients. The output alpha and beta will be reduced in size by the number of indices * * @param x * n observations * @param y * Data to fit * @param a * Set of m coefficients * @param alpha * the scaled Hessian curvature matrix (size m*m) * @param beta * the scaled gradient vector of the function's partial first derivatives with respect to the parameters * (size m) * @param func * Non-linear fitting function * @param ignore * An array of size beta.length. Set the index to true to ignore the gradients. * @return The sum-of-squares value for the fit */ public double findLinearised(final int[] x, final double[] y, final double[] a, final double[][] alpha, final double[] beta, final NonLinearFunction func, boolean[] ignore) { double ssx = 0; final double[] dy_da = new double[a.length]; for (int i = 0; i < nparams; i++) { beta[i] = 0; for (int j = 0; j <= i; j++) alpha[i][j] = 0; } func.initialise(a); int[] indices = new int[nparams]; int nnparams = 0; for (int j = 0; j < nparams; j++) { if (ignore[j]) continue; indices[nnparams++] = j; } if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i], dy_da, w); final double weight = getWeight(w[0]); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nnparams; j++) { final double wgt = dy_da[indices[j]] * weight; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[indices[k]]; beta[j] += wgt * dy; } ssx += dy * dy * weight; } } else { for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i], dy_da); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nnparams; j++) { final double wgt = dy_da[indices[j]]; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[indices[k]]; beta[j] += wgt * dy; } ssx += dy * dy; } } // Generate symmetric matrix for (int i = 0; i < nparams - 1; i++) for (int j = i + 1; j < nparams; j++) alpha[i][j] = alpha[j][i]; return checkGradients(alpha, beta, nparams, ssx); } /** * Evaluate the function and compute the sum-of-squares * * @param x * n observations * @param y * The data * @param y_fit * The function data * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return The sum-of-squares value for the fit */ public double findLinearised(final int[] x, final double[] y, double[] y_fit, final double[] a, final NonLinearFunction func) { double ssx = 0; func.initialise(a); if (y_fit == null || y_fit.length < x.length) { if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.evalw(x[i], w); final double weight = getWeight(w[0]); ssx += dy * dy * weight; } } else { for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i]); ssx += dy * dy; } } } else { if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < x.length; i++) { y_fit[i] = func.evalw(x[i], w); final double dy = y[i] - y_fit[i]; final double weight = getWeight(w[0]); ssx += dy * dy * weight; } } else { for (int i = 0; i < x.length; i++) { y_fit[i] = func.eval(x[i]); final double dy = y[i] - y_fit[i]; ssx += dy * dy; } } } return ssx; } /** * Evaluate the function and compute the sum-of-squares and the curvature matrix. * Assumes the n observations (x) are sequential integers from 0. * <p> * If the function supports weights then these will be used to compute the SS and curvature matrix. * <p> * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param n * The number of data points * @param y * Data to fit * @param a * Set of m coefficients * @param alpha * the scaled Hessian curvature matrix (size m*m) * @param beta * the scaled gradient vector of the function's partial first derivatives with respect to the parameters * (size m) * @param func * Non-linear fitting function * @see {@link gdsc.smlm.function.NonLinearFunction#eval(int, double[])}, * @see {@link gdsc.smlm.function.NonLinearFunction#eval(int, double[], double[])}, * @see {@link gdsc.smlm.function.NonLinearFunction#canComputeWeights()} * @return The sum-of-squares value for the fit. */ public double findLinearised(final int n, final double[] y, final double[] a, final double[][] alpha, final double[] beta, final NonLinearFunction func) { double ssx = 0; final double[] dy_da = new double[a.length]; for (int i = 0; i < nparams; i++) { beta[i] = 0; for (int j = 0; j <= i; j++) alpha[i][j] = 0; } func.initialise(a); if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < n; i++) { final double dy = y[i] - func.eval(i, dy_da, w); final double weight = getWeight(w[0]); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nparams; j++) { final double wgt = dy_da[j] * weight; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[k]; beta[j] += wgt * dy; } ssx += dy * dy * weight; } } else { for (int i = 0; i < n; i++) { final double dy = y[i] - func.eval(i, dy_da); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nparams; j++) { final double wgt = dy_da[j]; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[k]; beta[j] += wgt * dy; } ssx += dy * dy; } } // Generate symmetric matrix for (int i = 0; i < nparams - 1; i++) for (int j = i + 1; j < nparams; j++) alpha[i][j] = alpha[j][i]; return checkGradients(alpha, beta, nparams, ssx); } /** * Evaluate the function and compute the sum-of-squares and the curvature matrix. * Assumes the n observations (x) are sequential integers from 0. * <p> * If the function supports weights then these will be used to compute the SS and curvature matrix. * <p> * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * <p> * Allows disabling the use of gradients. The output alpha and beta will be reduced in size by the number of indices * that were disabled. The remaining positions will be zero filled. * * @param n * The number of data points * @param y * Data to fit * @param a * Set of m coefficients * @param alpha * the scaled Hessian curvature matrix (size m*m) * @param beta * the scaled gradient vector of the function's partial first derivatives with respect to the parameters * (size m) * @param func * Non-linear fitting function * @param ignore * An array of size beta.length. Set the index to true to ignore the gradients. * @return The sum-of-squares value for the fit. * @see {@link gdsc.smlm.function.NonLinearFunction#eval(int, double[])}, * @see {@link gdsc.smlm.function.NonLinearFunction#eval(int, double[], double[])}, * @see {@link gdsc.smlm.function.NonLinearFunction#canComputeWeights()} */ public double findLinearised(final int n, final double[] y, final double[] a, final double[][] alpha, final double[] beta, final NonLinearFunction func, boolean[] ignore) { double ssx = 0; final double[] dy_da = new double[a.length]; for (int i = 0; i < nparams; i++) { beta[i] = 0; for (int j = 0; j <= i; j++) alpha[i][j] = 0; } func.initialise(a); int[] indices = new int[nparams]; int nnparams = 0; for (int j = 0; j < nparams; j++) { if (ignore[j]) continue; indices[nnparams++] = j; } if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < n; i++) { final double dy = y[i] - func.eval(i, dy_da, w); final double weight = getWeight(w[0]); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nnparams; j++) { final double wgt = dy_da[indices[j]] * weight; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[indices[k]]; beta[j] += wgt * dy; } ssx += dy * dy * weight; } } else { for (int i = 0; i < n; i++) { final double dy = y[i] - func.eval(i, dy_da); // Compute: // - the scaled Hessian matrix (the square matrix of second-order partial derivatives of a function; // that is, it describes the local curvature of a function of many variables.) // - the scaled gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nnparams; j++) { final double wgt = dy_da[indices[j]]; for (int k = 0; k <= j; k++) alpha[j][k] += wgt * dy_da[indices[k]]; beta[j] += wgt * dy; } ssx += dy * dy; } } // Generate symmetric matrix for (int i = 0; i < nparams - 1; i++) for (int j = i + 1; j < nparams; j++) alpha[i][j] = alpha[j][i]; return checkGradients(alpha, beta, nparams, ssx); } /** * Evaluate the function and compute the sum-of-squares * * @param n * The number of data points * @param y * The data * @param y_fit * The function data * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return The sum-of-squares value for the fit */ public double findLinearised(final int n, final double[] y, double[] y_fit, final double[] a, final NonLinearFunction func) { double ssx = 0; func.initialise(a); if (y_fit == null || y_fit.length < n) { if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < n; i++) { final double dy = y[i] - func.evalw(i, w); final double weight = getWeight(w[0]); ssx += dy * dy * weight; } } else { for (int i = 0; i < n; i++) { final double dy = y[i] - func.eval(i); ssx += dy * dy; } } } else { if (func.canComputeWeights()) { final double[] w = new double[1]; for (int i = 0; i < n; i++) { y_fit[i] = func.evalw(i, w); final double dy = y[i] - y_fit[i]; final double weight = getWeight(w[0]); ssx += dy * dy * weight; } } else { for (int i = 0; i < n; i++) { y_fit[i] = func.eval(i); final double dy = y[i] - y_fit[i]; ssx += dy * dy; } } } return ssx; } /** * Get the weight factor using the computed weight * <p> * Check if the weight is below 1 and set to 1 to avoid excessive weights. * * @param w * The computed weight * @return The weight factor */ protected double getWeight(final double w) { // TODO - Check if there is a better way to smooth the weights rather than just truncating them at 1 return (w < 1) ? 1 : 1.0 / w; } protected double checkGradients(final double[][] alpha, final double[] beta, int nparams, final double ssx) { badGradients = checkIsNaN(alpha, beta, nparams); return ssx; } private boolean checkIsNaN(final double[][] alpha, final double[] beta, final int nparams) { for (int i = 0; i < nparams; i++) { if (Double.isNaN(beta[i])) return true; for (int j = 0; j <= i; j++) if (Double.isNaN(alpha[i][j])) return true; } return false; } protected void checkGradients(double[][] alpha, int nparams) { badGradients = checkIsNaN(alpha, nparams); } private boolean checkIsNaN(final double[][] alpha, final int nparams) { for (int i = 0; i < nparams; i++) { for (int j = 0; j <= i; j++) if (Double.isNaN(alpha[i][j])) return true; } return false; } protected void checkGradients(final double[] beta, final int nparams) { badGradients = checkIsNaN(beta, nparams); } private boolean checkIsNaN(final double[] beta, final int nparams) { for (int i = 0; i < nparams; i++) { if (Double.isNaN(beta[i])) return true; } return false; } /** * @return True if the last calculation produced gradients with NaN values */ public boolean isNaNGradients() { return badGradients; } /** * Compute the Fisher's Information Matrix (I) assuming a Poisson process. * * <pre> * Iab = E [ ( d ln(L(x|p)) / da ) * ( d ln(L(x|p)) / db ) ] * p = parameters * x = obsereved values * L(x|p) = likelihood of X given p * E = expected value * </pre> * * Note that this is only a true Fisher information diagonal if the function returns the expected value for a * Poisson process. In this case the equation reduces to: * * <pre> * Iaa = sum(i) (dYi da) * (dYi da) / Yi * </pre> * * See Smith et al, (2010). Fast, single-molecule localisation that achieves theoretically minimum uncertainty. * Nature Methods 7, 373-375 (supplementary note), Eq. 9. * * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param x * n observations * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return I */ public double[][] fisherInformationMatrix(int[] x, final double[] a, final NonLinearFunction func) { return fisherInformationMatrix(x.length, a, func); } /** * Compute the Fisher's Information Matrix (I) assuming a Poisson process. * * <pre> * Iab = E [ ( d ln(L(x|p)) / da ) * ( d ln(L(x|p)) / db ) ] * p = parameters * x = obsereved values * L(x|p) = likelihood of X given p * E = expected value * </pre> * * Note that this is only a true Fisher information diagonal if the function returns the expected value for a * Poisson process. In this case the equation reduces to: * * <pre> * Iaa = sum(i) (dYi da) * (dYi da) / Yi * </pre> * * See Smith et al, (2010). Fast, single-molecule localisation that achieves theoretically minimum uncertainty. * Nature Methods 7, 373-375 (supplementary note), Eq. 9. * * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param n * The number of data points * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return I */ public double[][] fisherInformationMatrix(final int n, final double[] a, final NonLinearFunction func) { final double[] dy_da = new double[a.length]; final double[][] alpha = new double[nparams][nparams]; func.initialise(a); for (int i = 0; i < n; i++) { final double yi = 1.0 / func.eval(i, dy_da); for (int j = 0; j < nparams; j++) { final double dy_db = dy_da[j] * yi; for (int k = 0; k <= j; k++) alpha[j][k] += dy_db * dy_da[k]; } } // Generate symmetric matrix for (int i = 0; i < nparams - 1; i++) for (int j = i + 1; j < nparams; j++) alpha[i][j] = alpha[j][i]; checkGradients(alpha, nparams); return alpha; } /** * Compute the central diagonal of Fisher's Information Matrix (I) assuming a Poisson process. * * <pre> * Iab = E [ ( d ln(L(x|p)) / da ) * ( d ln(L(x|p)) / db ) ] * p = parameters * x = obsereved values * L(x|p) = likelihood of X given p * E = expected value * </pre> * * Note that this is only a true Fisher information diagonal if the function returns the expected value for a * Poisson process. In this case the equation reduces to: * * <pre> * Iaa = sum(i) (dYi da) * (dYi da) / Yi * </pre> * * See Smith et al, (2010). Fast, single-molecule localisation that achieves theoretically minimum uncertainty. * Nature Methods 7, 373-375 (supplementary note), Eq. 9. * * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param x * n observations * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return Iaa */ public double[] fisherInformationDiagonal(final int[] x, final double[] a, final NonLinearFunction func) { return fisherInformationDiagonal(x.length, a, func); } /** * Compute the central diagonal of Fisher's Information Matrix (I) assuming a Poisson process. * * <pre> * Iab = E [ ( d ln(L(x|p)) / da ) * ( d ln(L(x|p)) / db ) ] * p = parameters * x = obsereved values * L(x|p) = likelihood of X given p * E = expected value * </pre> * * Note that this is only a true Fisher information diagonal if the function returns the expected value for a * Poisson process. In this case the equation reduces to: * * <pre> * Iaa = sum(i) (dYi da) * (dYi da) / Yi * </pre> * * See Smith et al, (2010). Fast, single-molecule localisation that achieves theoretically minimum uncertainty. * Nature Methods 7, 373-375 (supplementary note), Eq. 9. * * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param n * The number of data points * @param a * Set of m coefficients * @param func * Non-linear fitting function * @return Iaa */ public double[] fisherInformationDiagonal(final int n, final double[] a, final NonLinearFunction func) { final double[] dy_da = new double[a.length]; final double[] alpha = new double[nparams]; func.initialise(a); for (int i = 0; i < n; i++) { final double yi = 1.0 / func.eval(i, dy_da); for (int j = 0; j < nparams; j++) { alpha[j] += dy_da[j] * dy_da[j] * yi; } } checkGradients(alpha, nparams); return alpha; } /** * Evaluate the function and compute the sum-of-squares and the gradient with respect to the model * parameters. * <p> * A call to {@link #isNaNGradients()} will indicate if the gradients were invalid. * * @param x * n observations * @param y * Data to fit * @param a * Set of m coefficients * @param df_da * the gradient vector of the function's partial first derivatives with respect to the parameters * (size m) * @param func * Non-linear fitting function * @return The sum-of-squares value for the fit */ public double evaluate(final int[] x, final double[] y, final double[] a, final double[] df_da, final NonLinearFunction func) { double ssx = 0; final double[] dy_da = new double[nparams]; zero(df_da); func.initialise(a); for (int i = 0; i < x.length; i++) { final double dy = y[i] - func.eval(x[i], dy_da); // Compute: // - the gradient vector of the function's partial first derivatives with respect to the parameters for (int j = 0; j < nparams; j++) { df_da[j] += dy_da[j] * dy; } ssx += dy * dy; } checkGradients(df_da, nparams); // Apply a factor of -2 to compute the actual gradients: // See Numerical Recipes in C++, 2nd Ed. Equation 15.5.6 for Nonlinear Models for (int j = 0; j < nparams; j++) df_da[j] *= -2; return ssx; } /** * Zero the working region of the input matrix alpha and vector beta * * @param beta * the beta */ protected void zero(final double[] beta) { for (int i = 0; i < nparams; i++) { beta[i] = 0; } } }