/** * Copyright (C) 2009 - present by OpenGamma Inc. and the OpenGamma group of companies * * Please see distribution for license. */ package com.opengamma.analytics.math.statistics.leastsquare; import org.apache.commons.lang.Validate; import com.opengamma.analytics.math.MathException; import com.opengamma.analytics.math.differentiation.VectorFieldFirstOrderDifferentiator; import com.opengamma.analytics.math.function.Function1D; import com.opengamma.analytics.math.linearalgebra.Decomposition; import com.opengamma.analytics.math.linearalgebra.DecompositionFactory; import com.opengamma.analytics.math.linearalgebra.DecompositionResult; import com.opengamma.analytics.math.matrix.DoubleMatrix1D; import com.opengamma.analytics.math.matrix.DoubleMatrix2D; import com.opengamma.analytics.math.matrix.DoubleMatrixUtils; import com.opengamma.analytics.math.matrix.MatrixAlgebra; import com.opengamma.analytics.math.matrix.MatrixAlgebraFactory; import com.opengamma.analytics.math.matrix.OGMatrixAlgebra; import com.opengamma.util.ArgumentChecker; /** * Modification to NonLinearLeastSquare to use a penalty function add to the normal chi^2 term of the form $a^TPa$ where * $a$ is the vector of model parameters sort and P is some matrix. The idea is to extend the p-spline concept to * non-linear models of the form $\hat{y}_j = H\left(\sum_{i=0}^{M-1} w_i b_i (x_j)\right)$ where $H(\cdot)$ is * some non-linear function, $b_i(\cdot)$ are a set of basis functions and $w_i$ are the weights (to be found). As with * (linear) p-splines, smoothness of the function is obtained by having a penalty on the nth order difference of the * weights. The modified chi-squared is written as * $\chi^2 = \sum_{i=0}^{N-1} \left(\frac{y_i-H\left(\sum_{k=0}^{M-1} w_k b_k (x_i)\right)}{\sigma_i} \right)^2 + * \sum_{i,j=0}^{M-1}P_{i,j}x_ix_j$ */ public class NonLinearLeastSquareWithPenalty { // private static final Logger LOGGER = LoggerFactory.getLogger(NonLinearLeastSquareWithPenalty.class); private static final int MAX_ATTEMPTS = 100000; // Review should we use Cholesky as default private static final Decomposition<?> DEFAULT_DECOMP = DecompositionFactory.SV_COLT; private static final OGMatrixAlgebra MA = new OGMatrixAlgebra(); private static final double EPS = 1e-8; // Default convergence tolerance on the relative change in chi2 /** * Unconstrained allowed function - always returns true */ public static final Function1D<DoubleMatrix1D, Boolean> UNCONSTRAINED = new Function1D<DoubleMatrix1D, Boolean>() { @Override public Boolean evaluate(final DoubleMatrix1D x) { return true; } }; private final double _eps; private final Decomposition<?> _decomposition; private final MatrixAlgebra _algebra; /** * Default constructor. This uses SVD (Colt), {@link OGMatrixAlgebra} and a convergence tolerance of 1e-8 */ public NonLinearLeastSquareWithPenalty() { this(DEFAULT_DECOMP, MA, EPS); } /** * Constructor allowing matrix decomposition to be set. This uses {@link OGMatrixAlgebra} and a convergence tolerance of 1e-8 * @param decomposition Matrix decomposition (see {@link DecompositionFactory} for list) */ public NonLinearLeastSquareWithPenalty(final Decomposition<?> decomposition) { this(decomposition, MA, EPS); } /** * Constructor allowing convergence tolerance to be set. This uses SVD (Colt) and {@link OGMatrixAlgebra} * @param eps Convergence tolerance */ public NonLinearLeastSquareWithPenalty(final double eps) { this(DEFAULT_DECOMP, MA, eps); } /** * Constructor allowing matrix decomposition and convergence tolerance to be set. This uses {@link OGMatrixAlgebra} * @param decomposition Matrix decomposition (see {@link DecompositionFactory} for list) * @param eps Convergence tolerance */ public NonLinearLeastSquareWithPenalty(final Decomposition<?> decomposition, final double eps) { this(decomposition, MA, eps); } /** * General constructor * @param decomposition Matrix decomposition (see {@link DecompositionFactory} for list) * @param algebra The matrix algebra (see {@link MatrixAlgebraFactory} for list) * @param eps Convergence tolerance */ public NonLinearLeastSquareWithPenalty(final Decomposition<?> decomposition, final MatrixAlgebra algebra, final double eps) { ArgumentChecker.notNull(decomposition, "decomposition"); ArgumentChecker.notNull(algebra, "algebra"); ArgumentChecker.isTrue(eps > 0, "must have positive eps"); _decomposition = decomposition; _algebra = algebra; _eps = eps; } /** * Use this when the model is given as a function of its parameters only (i.e. a function that takes a set of * parameters and return a set of model values, * so the measurement points are already known to the function), and analytic parameter sensitivity is not available * @param observedValues Set of measurement values * @param func The model as a function of its parameters only * @param startPos Initial value of the parameters * @param penalty Penalty matrix * @return value of the fitted parameters */ public LeastSquareWithPenaltyResults solve(final DoubleMatrix1D observedValues, final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final DoubleMatrix1D startPos, final DoubleMatrix2D penalty) { final int n = observedValues.getNumberOfElements(); final VectorFieldFirstOrderDifferentiator jac = new VectorFieldFirstOrderDifferentiator(); return solve(observedValues, new DoubleMatrix1D(n, 1.0), func, jac.differentiate(func), startPos, penalty); } /** * Use this when the model is given as a function of its parameters only (i.e. a function that takes a set of * parameters and return a set of model values, * so the measurement points are already known to the function), and analytic parameter sensitivity is not available * @param observedValues Set of measurement values * @param sigma Set of measurement errors * @param func The model as a function of its parameters only * @param startPos Initial value of the parameters * @param penalty Penalty matrix * @return value of the fitted parameters */ public LeastSquareWithPenaltyResults solve(final DoubleMatrix1D observedValues, final DoubleMatrix1D sigma, final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final DoubleMatrix1D startPos, final DoubleMatrix2D penalty) { final VectorFieldFirstOrderDifferentiator jac = new VectorFieldFirstOrderDifferentiator(); return solve(observedValues, sigma, func, jac.differentiate(func), startPos, penalty); } /** * Use this when the model is given as a function of its parameters only (i.e. a function that takes a set of * parameters and return a set of model values, * so the measurement points are already known to the function), and analytic parameter sensitivity is not available * @param observedValues Set of measurement values * @param sigma Set of measurement errors * @param func The model as a function of its parameters only * @param startPos Initial value of the parameters * @param penalty Penalty matrix * @param allowedValue a function which returned true if the new trial position is allowed by the model. An example * would be to enforce positive parameters * without resorting to a non-linear parameter transform. In some circumstances this approach will lead to slow * convergence. * @return value of the fitted parameters */ public LeastSquareWithPenaltyResults solve(final DoubleMatrix1D observedValues, final DoubleMatrix1D sigma, final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final DoubleMatrix1D startPos, final DoubleMatrix2D penalty, final Function1D<DoubleMatrix1D, Boolean> allowedValue) { final VectorFieldFirstOrderDifferentiator jac = new VectorFieldFirstOrderDifferentiator(); return solve(observedValues, sigma, func, jac.differentiate(func), startPos, penalty, allowedValue); } /** * Use this when the model is given as a function of its parameters only (i.e. a function that takes a set of * parameters and return a set of model values, * so the measurement points are already known to the function), and analytic parameter sensitivity is available * @param observedValues Set of measurement values * @param sigma Set of measurement errors * @param func The model as a function of its parameters only * @param jac The model sensitivity to its parameters (i.e. the Jacobian matrix) as a function of its parameters only * @param startPos Initial value of the parameters * @param penalty Penalty matrix * @return the least-square results */ public LeastSquareWithPenaltyResults solve(final DoubleMatrix1D observedValues, final DoubleMatrix1D sigma, final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final Function1D<DoubleMatrix1D, DoubleMatrix2D> jac, final DoubleMatrix1D startPos, final DoubleMatrix2D penalty) { return solve(observedValues, sigma, func, jac, startPos, penalty, UNCONSTRAINED); } /** * Use this when the model is given as a function of its parameters only (i.e. a function that takes a set of * parameters and return a set of model values, * so the measurement points are already known to the function), and analytic parameter sensitivity is available * @param observedValues Set of measurement values * @param sigma Set of measurement errors * @param func The model as a function of its parameters only * @param jac The model sensitivity to its parameters (i.e. the Jacobian matrix) as a function of its parameters only * @param startPos Initial value of the parameters * @param penalty Penalty matrix (must be positive semi-definite) * @param allowedValue a function which returned true if the new trial position is allowed by the model. An example * would be to enforce positive parameters * without resorting to a non-linear parameter transform. In some circumstances this approach will lead to slow * convergence. * @return the least-square results */ public LeastSquareWithPenaltyResults solve(final DoubleMatrix1D observedValues, final DoubleMatrix1D sigma, final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final Function1D<DoubleMatrix1D, DoubleMatrix2D> jac, final DoubleMatrix1D startPos, final DoubleMatrix2D penalty, final Function1D<DoubleMatrix1D, Boolean> allowedValue) { Validate.notNull(observedValues, "observedValues"); Validate.notNull(sigma, " sigma"); Validate.notNull(func, " func"); Validate.notNull(jac, " jac"); Validate.notNull(startPos, "startPos"); final int nObs = observedValues.getNumberOfElements(); Validate.isTrue(nObs == sigma.getNumberOfElements(), "observedValues and sigma must be same length"); ArgumentChecker.isTrue(allowedValue.evaluate(startPos), "The start position {} is not valid for this model. Please choose a valid start position", startPos); DoubleMatrix2D alpha; DecompositionResult decmp; DoubleMatrix1D theta = startPos; double lambda = 0.0; // TODO debug if the model is linear, it will be solved in 1 step double newChiSqr, oldChiSqr; DoubleMatrix1D error = getError(func, observedValues, sigma, theta); DoubleMatrix1D newError; DoubleMatrix2D jacobian = getJacobian(jac, sigma, theta); oldChiSqr = getChiSqr(error); double p = getANorm(penalty, theta); oldChiSqr += p; DoubleMatrix1D beta = getChiSqrGrad(error, jacobian); DoubleMatrix1D temp = (DoubleMatrix1D) _algebra.multiply(penalty, theta); beta = (DoubleMatrix1D) _algebra.subtract(beta, temp); for (int count = 0; count < MAX_ATTEMPTS; count++) { alpha = getModifiedCurvatureMatrix(jacobian, lambda, penalty); DoubleMatrix1D deltaTheta; try { decmp = _decomposition.evaluate(alpha); deltaTheta = decmp.solve(beta); } catch (final Exception e) { throw new MathException(e); } final DoubleMatrix1D trialTheta = (DoubleMatrix1D) _algebra.add(theta, deltaTheta); // if the new value of theta is not in the model domain keep increasing lambda until an acceptable step is found if (!allowedValue.evaluate(trialTheta)) { lambda = increaseLambda(lambda); continue; } newError = getError(func, observedValues, sigma, trialTheta); p = getANorm(penalty, trialTheta); newChiSqr = getChiSqr(newError); newChiSqr += p; // Check for convergence when no improvement in chiSqr occurs if (Math.abs(newChiSqr - oldChiSqr) / (1 + oldChiSqr) < _eps) { final DoubleMatrix2D alpha0 = lambda == 0.0 ? alpha : getModifiedCurvatureMatrix(jacobian, 0.0, penalty); if (lambda > 0.0) { decmp = _decomposition.evaluate(alpha0); } return finish(alpha0, decmp, newChiSqr - p, p, jacobian, trialTheta, sigma); } if (newChiSqr < oldChiSqr) { lambda = decreaseLambda(lambda); theta = trialTheta; error = newError; jacobian = getJacobian(jac, sigma, trialTheta); beta = getChiSqrGrad(error, jacobian); temp = (DoubleMatrix1D) _algebra.multiply(penalty, theta); beta = (DoubleMatrix1D) _algebra.subtract(beta, temp); oldChiSqr = newChiSqr; } else { lambda = increaseLambda(lambda); } } throw new MathException("Could not converge in " + MAX_ATTEMPTS + " attempts"); } private double decreaseLambda(final double lambda) { return lambda / 10; } private double increaseLambda(final double lambda) { if (lambda == 0.0) { // this will happen the first time a full quadratic step fails return 0.1; } return lambda * 10; } private LeastSquareWithPenaltyResults finish(final DoubleMatrix2D alpha, final DecompositionResult decmp, final double chiSqr, final double penalty, final DoubleMatrix2D jacobian, final DoubleMatrix1D newTheta, final DoubleMatrix1D sigma) { final DoubleMatrix2D covariance = decmp.solve(DoubleMatrixUtils.getIdentityMatrix2D(alpha.getNumberOfRows())); final DoubleMatrix2D bT = getBTranspose(jacobian, sigma); final DoubleMatrix2D inverseJacobian = decmp.solve(bT); return new LeastSquareWithPenaltyResults(chiSqr, penalty, newTheta, covariance, inverseJacobian); } private DoubleMatrix1D getError(final Function1D<DoubleMatrix1D, DoubleMatrix1D> func, final DoubleMatrix1D observedValues, final DoubleMatrix1D sigma, final DoubleMatrix1D theta) { final int n = observedValues.getNumberOfElements(); final DoubleMatrix1D modelValues = func.evaluate(theta); Validate.isTrue(n == modelValues.getNumberOfElements(), "Number of data points different between model (" + modelValues.getNumberOfElements() + ") and observed (" + n + ")"); final double[] res = new double[n]; for (int i = 0; i < n; i++) { res[i] = (observedValues.getEntry(i) - modelValues.getEntry(i)) / sigma.getEntry(i); } return new DoubleMatrix1D(res); } private DoubleMatrix2D getBTranspose(final DoubleMatrix2D jacobian, final DoubleMatrix1D sigma) { final int n = jacobian.getNumberOfRows(); final int m = jacobian.getNumberOfColumns(); DoubleMatrix2D res = new DoubleMatrix2D(m, n); double[][] data = res.getData(); double[][] jacData = jacobian.getData(); for (int i = 0; i < n; i++) { double sigmaInv = 1.0 / sigma.getEntry(i); for (int k = 0; k < m; k++) { data[k][i] = jacData[i][k] * sigmaInv; } } return res; } private DoubleMatrix2D getJacobian(final Function1D<DoubleMatrix1D, DoubleMatrix2D> jac, final DoubleMatrix1D sigma, final DoubleMatrix1D theta) { final DoubleMatrix2D res = jac.evaluate(theta); final double[][] data = res.getData(); final int n = res.getNumberOfRows(); final int m = res.getNumberOfColumns(); Validate.isTrue(theta.getNumberOfElements() == m, "Jacobian is wrong size"); Validate.isTrue(sigma.getNumberOfElements() == n, "Jacobian is wrong size"); for (int i = 0; i < n; i++) { double sigmaInv = 1.0 / sigma.getEntry(i); for (int j = 0; j < m; j++) { data[i][j] *= sigmaInv; } } return res; } private double getChiSqr(final DoubleMatrix1D error) { return _algebra.getInnerProduct(error, error); } private DoubleMatrix1D getChiSqrGrad(final DoubleMatrix1D error, final DoubleMatrix2D jacobian) { return (DoubleMatrix1D) _algebra.multiply(error, jacobian); } private DoubleMatrix2D getModifiedCurvatureMatrix(final DoubleMatrix2D jacobian, final double lambda, final DoubleMatrix2D penalty) { double onePLambda = 1.0 + lambda; final int m = jacobian.getNumberOfColumns(); DoubleMatrix2D alpha = (DoubleMatrix2D) MA.add(MA.matrixTransposeMultiplyMatrix(jacobian), penalty); // scale the diagonal double[][] data = alpha.getData(); for (int i = 0; i < m; i++) { data[i][i] *= onePLambda; } return alpha; } private double getANorm(final DoubleMatrix2D aM, final DoubleMatrix1D xV) { final double[][] a = aM.getData(); final double[] x = xV.getData(); final int n = x.length; double sum = 0.0; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { sum += a[i][j] * x[i] * x[j]; } } return sum; } }