/* * File: FletcherXuHybridEstimation.java * Authors: Kevin R. Dixon * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright Jul 4, 2008, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. * Export of this program may require a license from the United States * Government. See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.learning.algorithm.regression; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationReferences; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerBFGS; import gov.sandia.cognition.learning.algorithm.minimization.MinimizationStoppingCriterion; import gov.sandia.cognition.learning.algorithm.minimization.line.DirectionalVectorToDifferentiableScalarFunction; import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer; import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeFree; import gov.sandia.cognition.learning.data.InputOutputPair; import gov.sandia.cognition.learning.function.cost.SumSquaredErrorCostFunction; import gov.sandia.cognition.math.matrix.Matrix; import gov.sandia.cognition.math.matrix.MatrixFactory; import gov.sandia.cognition.math.matrix.Vector; import gov.sandia.cognition.util.ObjectUtil; /** * The Fletcher-Xu hybrid estimation for solving the nonlinear least-squares * parameters. The FX method is a hybrid between the BFGS Quasi-Newton method * and the Gauss-Newton minimization procedure. We have slightly modified the * original algorithm to choose between BFGS and Levenberg-Marquardt (Tikhonov * regularization or ridge regression) update formulae, as the LMA update is * more stable than Gauss-Newton and produces better results on my test battery. * <BR><BR> * The motivation behind FX hybrid is that Gauss-Newton (and * Levenberg-Marquardt) has quadratic convergence properties, but has linear * convergence when the parameters are far from optimal. BFGS always * demonstrates superlinear convergence, even on nonoptimal parameter sets. * The FX hybrid attempts to use BFGS when the solutions are far from optimal, * but switches to Gauss-Newton (Levenberg-Marquardt in our implementation) * when its quadratic convergence properties can be brought to bear. * <BR><BR> * Generally speaking, FX hybrid is the most efficient and effective * parameter-estimation procedure I know of. However, FX requires the storage * of an estimated Hessian inverse, which is O(N*N) space, where "N" is the * number of parameters to estimate. * * @author Kevin R. Dixon * @since 2.1 */ @PublicationReferences( references={ @PublicationReference( author="R. Fletcher", title="Practical Methods of Optimization, Second Edition", type=PublicationType.Book, year=1987, pages={116,117}, notes="Section 6.1 motivates the algorithm w.r.t. Gauss-Newton, BFGS, and Levenberg-Marquardt" ) , @PublicationReference( author={ "R. Fletcher", "C. Xu" }, title="Hybrid Methods for Nonlinear Least Squares", type=PublicationType.Journal, year=1987, pages={371,389}, publication="Institute of Mathematics and its Applications Journal of Numerical Analysis" ) } ) public class FletcherXuHybridEstimation extends LeastSquaresEstimator { /** * Reduction test for Equation 6.1.16 in Fletcher PMOO, 0.2 given on page 117. * Lower values result in more Gauss-Newton steps, larger values mean more * BFGS steps (1.0), default is {@value} * In my test battery, 0.2 also performs the best. */ public static final double DEFAULT_REDUCTION_TEST = 0.2; /** * Divisor of the damping factor on unsuccessful iteration, dividing * damping factor on cost-reducing iteration {@value} */ public static final double DEFAULT_DAMPING_DIVISOR = 2.0; /** * Default line minimization algorithm, LineMinimizerDerivativeFree */ public static final LineMinimizer<?> DEFAULT_LINE_MINIMIZER = new LineMinimizerDerivativeFree(); /** * Workhorse algorithm that finds the minimum along a particular direction */ private LineMinimizer<?> lineMinimizer; /** * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. */ private double reductionTest; /** * Amount to modify the damping factor, typically 2.0 or 10.0 */ private double dampingFactorDivisor; /** * Creates a new instance of FletcherXuHybridEstimation */ public FletcherXuHybridEstimation() { this( ObjectUtil.cloneSafe( DEFAULT_LINE_MINIMIZER ) ); } /** * Creates a new instance of FletcherXuHybridEstimation * @param lineMinimizer The minimizer */ public FletcherXuHybridEstimation( LineMinimizer<?> lineMinimizer ) { this( lineMinimizer, DEFAULT_REDUCTION_TEST ); } /** * Creates a new instance of FletcherXuHybridEstimation * * @param lineMinimizer * Workhorse algorithm that finds the minimum along a particular direction * @param reductionTest * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. */ public FletcherXuHybridEstimation( LineMinimizer<?> lineMinimizer, double reductionTest ) { this( lineMinimizer, reductionTest, DEFAULT_DAMPING_DIVISOR ); } /** * Creates a new instance of FletcherXuHybridEstimation * * @param lineMinimizer * Workhorse algorithm that finds the minimum along a particular direction * @param reductionTest * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. * @param dampingFactorDivisor * Amount to modify the damping factor, typically 2.0 or 10.0 */ public FletcherXuHybridEstimation( LineMinimizer<?> lineMinimizer, double reductionTest, double dampingFactorDivisor ) { this( lineMinimizer, reductionTest, dampingFactorDivisor, DEFAULT_MAX_ITERATIONS, DEFAULT_TOLERANCE ); } /** * Creates a new instance of FletcherXuHybridEstimation * * @param lineMinimizer * Workhorse algorithm that finds the minimum along a particular direction * @param reductionTest * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. * @param dampingFactorDivisor * Amount to modify the damping factor, typically 2.0 or 10.0 * @param maxIterations * Maximum number of iterations before stopping * @param tolerance * Tolerance of the algorithm. */ public FletcherXuHybridEstimation( LineMinimizer<?> lineMinimizer, double reductionTest, double dampingFactorDivisor, int maxIterations, double tolerance ) { super( maxIterations, tolerance ); this.setLineMinimizer( lineMinimizer ); this.setReductionTest( reductionTest ); this.setDampingFactorDivisor( dampingFactorDivisor ); } /** * Last value of the parameter cost */ private SumSquaredErrorCostFunction.Cache lastCost; /** * Function that maps a Evaluator<Vector,Double> onto a * Evaluator<Double,Double> using a set point, direction and scale factor */ private DirectionalVectorToDifferentiableScalarFunction lineFunction; /** * Estimated inverse of the Hessian (second derivative) */ private Matrix hessianInverse; @Override protected boolean initializeAlgorithm() { this.setResult( this.getObjectToOptimize().clone() ); this.getCostFunction().setCostParameters( this.getData() ); this.dampingFactor = 1.0; this.lastCost = SumSquaredErrorCostFunction.Cache.compute( this.getResult(), this.getData() ); ParameterDifferentiableCostMinimizer.ParameterCostEvaluatorDerivativeBased f = new ParameterDifferentiableCostMinimizer.ParameterCostEvaluatorDerivativeBased( this.getResult(), this.getCostFunction() ); // Load up the line function with the current direction and // the search direction, which is the negative gradient, in other words // the direction of steepest descent Vector parameters = this.getResult().convertToVector(); int M = parameters.getDimensionality(); this.lineFunction = new DirectionalVectorToDifferentiableScalarFunction( f, parameters, this.lastCost.Jte ); this.hessianInverse = MatrixFactory.getDefault().createIdentity( M, M ).scale( 0.5 ); return true; } /** * Damping factor for the Levenberg-Marquardt ridge regression */ private double dampingFactor; @Override protected boolean step() { InputOutputPair<Vector,Double> result = this.getLineMinimizer().minimizeAlongDirection( this.lineFunction, this.lastCost.parameterCost, this.lastCost.Jte ); Vector lastParameters = this.lineFunction.getVectorOffset(); Vector delta = result.getInput().minus( lastParameters ); this.lineFunction.setVectorOffset( result.getInput() ); this.getResult().convertFromVector( result.getInput() ); // If the trial parameters reduce the cost, then accept them SumSquaredErrorCostFunction.Cache cache = SumSquaredErrorCostFunction.Cache.compute( this.getResult(), this.getData() ); this.setResultCost( cache.parameterCost ); // Equation 6.1.16 in Fletcher PMOO if( this.getReductionTest()*this.lastCost.parameterCost <= (this.lastCost.parameterCost - cache.parameterCost) ) { // On my test battery, taking a Levenberg-Marquardt step performs // better here than a Gauss-Newton step. // I've also tried pretty much every variant on these parameters, // and this is what came out the best. I've also tried: // Gauss-Newton: direction = cache.JtJ.solve(Jte); // Steepest descent: direction = Jte; Matrix JtJpI = cache.JtJ.scale(-1.0); Vector Jte = cache.Jte; int M = JtJpI.getNumRows(); for( int i = 0; i < M; i++ ) { // Again, the damping factor is subtracted to compensate for the // negative between gradient and direction double v = JtJpI.getElement(i, i); JtJpI.setElement(i, i, v - this.dampingFactor ); } // This is the ridge-regression (Tikhonov regularization) step to solve // for the parameter change Vector direction = JtJpI.solve(Jte); this.dampingFactor /= this.getDampingFactorDivisor(); double directionNorm = direction.norm2(); if( directionNorm > GaussNewtonAlgorithm.STEP_MAX ) { direction.scaleEquals( GaussNewtonAlgorithm.STEP_MAX / directionNorm ); } // Take a Levenber-Marquardt step this.lineFunction.setDirection( direction ); } else { // Suggested by Fletcher, PMOO p.117, Equation 6.1.15 // I've modified it slightly, the last term sould be: // cache.J.transpose().minus( lastCost.J ).times( cache.error ) // However, I don't cache the errors individually, so this is // as close as I can get... It seems to be very effective. Vector gamma = cache.JtJ.times( this.lineFunction.getDirection().scale( 2.0 ) ).plus( cache.Jte.minus( lastCost.Jte ) ); FunctionMinimizerBFGS.BFGSupdateRule( this.hessianInverse, delta, gamma, this.getTolerance() ); // Take a BFGS step Vector direction = this.hessianInverse.times( cache.Jte ); this.lineFunction.setDirection( direction ); this.dampingFactor *= this.getDampingFactorDivisor(); } this.lastCost = cache; return !MinimizationStoppingCriterion.convergence( result.getInput(), result.getOutput(), delta, cache.Jte, this.getTolerance() ); } @Override protected void cleanupAlgorithm() { } /** * Getter for reduction test. * @return * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. */ public double getReductionTest() { return this.reductionTest; } /** * Setter for reduction test. * @param reductionTest * Reduction test for switching between BFGS and Levenberg-Marquardt, must * be [0,1]. Lower values result in more Levenberg-Marquardt steps, * larger values result in more BFGS steps. */ public void setReductionTest( double reductionTest ) { if( (reductionTest < 0.0) || (reductionTest > 1.0) ) { throw new IllegalArgumentException( "reductionTest must be [0,1]" ); } this.reductionTest = reductionTest; } /** * Getter for dampingFactorDivisor * @return * Amount to modify the damping factor, typically 2.0 or 10.0 */ public double getDampingFactorDivisor() { return this.dampingFactorDivisor; } /** * Setter for dampingFactorDivisor * @param dampingFactorDivisor * Amount to modify the damping factor, typically 2.0 or 10.0 */ public void setDampingFactorDivisor( double dampingFactorDivisor ) { this.dampingFactorDivisor = dampingFactorDivisor; } /** * Getter for lineMinimizer * @return * Workhorse algorithm that finds the minimum along a particular direction */ public LineMinimizer<?> getLineMinimizer() { return this.lineMinimizer; } /** * Setter for lineMinimizer * @param lineMinimizer * Workhorse algorithm that finds the minimum along a particular direction */ public void setLineMinimizer( LineMinimizer<?> lineMinimizer ) { this.lineMinimizer = lineMinimizer; } }