FunctionMinimizerQuasiNewton.java example

Explorer
Foundry-master
- Components
/*
 * File:                FunctionMinimizerQuasiNewton.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright Jun 20, 2008, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government. 
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.learning.algorithm.minimization;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.line.DirectionalVectorToDifferentiableScalarFunction;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeBased;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.math.DifferentiableEvaluator;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;

/**
 * This is an abstract implementation of the Quasi-Newton minimization method,
 * sometimes called "Variable-Metric methods."
 * This family of minimization algorithms uses first-order gradient information
 * to find a locally minimum to a scalar function.  Quasi-Newton methods
 * perform this minimization by creating an estimate of the inverse
 * of the Hessian to compute a new search direction.  A line search yields
 * the next point from which to estimate the function curvature (that is,
 * the estimate of the Hessian inverse).  Please note: Quasi-Newton algorithms
 * estimate the INVERSE of the Hessian and never actually invert/solve for
 * a matrix, making them extremely fast.
 * <BR><BR>
 * It is generally agreed that Quasi-Newton methods are the fastest
 * minimization algorithms that use one first-order gradient information.  In
 * particular, the BFGS update formula to the Quasi-Newton algorithm
 * (FunctionMinimizerBFGS) is generally regarded as the fastest unconstrained
 * optimizer out there, and is my method of choice.  However, all Quasi-Newton
 * methods require storage of the inverse Hessian, which can become quite
 * large.  If you cannot store the inverse Hessian estimate in memory, then
 * I would recommend using the Liu-Storey Conjugate Gradient algorithm instead
 * (FunctionMinimizerLiuStorey).
 * <BR><BR>
 * Generally speaking, Quasi-Newton methods require first-order gradient
 * information.  If you do not have access to gradients, then I would recommend
 * using automatic finite-difference approximations over the derivative-free
 * minimization algorithms.
 *
 * @author Kevin R. Dixon
 * @since 2.1
 */
@PublicationReferences(
    references={
        @PublicationReference(
            author="R. Fletcher",
            title="Practical Methods of Optimization, Second Edition",
            type=PublicationType.Book,
            year=1987,
            pages={49, 57},
            notes="Section 3.2"
        ),
        @PublicationReference(
            author="Wikipedia",
            title="Quasi-Newton method",
            type=PublicationType.WebPage,
            year=2008,
            url="http://en.wikipedia.org/wiki/Quasi-Newton_methods"
        ),
        @PublicationReference(
            author={
                "William H. Press",
                "Saul A. Teukolsky",
                "William T. Vetterling",
                "Brian P. Flannery"
            },
            title="Numerical Recipes in C, Second Edition",
            type=PublicationType.Book,
            year=1992,
            pages={425,430},
            notes="Section 10.7",
            url="http://www.nrbook.com/a/bookcpdf.php"
       )
    }
)
public abstract class FunctionMinimizerQuasiNewton 
    extends AbstractAnytimeFunctionMinimizer<Vector, Double, DifferentiableEvaluator<? super Vector, Double, Vector>>
{
    
    /**
     * Default maximum number of iterations before stopping, {@value}
     */
    public static final int DEFAULT_MAX_ITERATIONS = 1000;

    /**
     * Default tolerance, {@value}
     */
    public static final double DEFAULT_TOLERANCE = 1e-5;

    /**
     * Default line minimization algorithm, LineMinimizerDerivativeBased
     */
    public static final LineMinimizer<?> DEFAULT_LINE_MINIMIZER =
//        new LineMinimizerBacktracking();
        new LineMinimizerDerivativeBased();
//        new LineMinimizerDerivativeFree();

    /**
     * Work-horse algorithm that minimizes the function along a direction
     */
    private LineMinimizer<?> lineMinimizer;    
    
    /**
     * Creates a new instance of FunctionMinimizerBFGS
     * 
     * @param initialGuess Initial guess about the minimum of the method
     * @param tolerance Tolerance of the minimization algorithm, must be >= 0.0, typically ~1e-10
     * @param lineMinimizer 
     * Work-horse algorithm that minimizes the function along a direction
     * @param maxIterations Maximum number of iterations, must be >0, typically ~100
     */
    public FunctionMinimizerQuasiNewton(
        LineMinimizer<?> lineMinimizer,
        Vector initialGuess,
        double tolerance,
        int maxIterations )
    {
        super( initialGuess, tolerance, maxIterations );
        this.setLineMinimizer( lineMinimizer );
    }
    
    /**
     * Function that maps a Evaluator<Vector,Double> onto a 
     * Evaluator<Double,Double> using a set point, direction and scale factor
     */
    private DirectionalVectorToDifferentiableScalarFunction lineFunction;

    /**
     * Estimated inverse of the Hessian (second derivative)
     */
    private Matrix hessianInverse;

    /**
     * Gradient at the current guess
     */
    private Vector gradient;

    /**
     * The dimensionality of the input space
     */
    private int dimensionality;    
    
    @Override
    protected boolean initializeAlgorithm()
    {
        // Due to bizarre inheretance, this.data is the function to minimize...
        DifferentiableEvaluator<? super Vector, Double, Vector> f = this.data;
        
        this.result = new DefaultInputOutputPair<Vector, Double>(
            this.initialGuess.clone(), f.evaluate( this.initialGuess ) );

        this.gradient = f.differentiate( this.initialGuess );

        // Load up the line function with the current direction and
        // the search direction, which is the negative gradient, in other words
        // the direction of steepest descent
        this.lineFunction = new DirectionalVectorToDifferentiableScalarFunction(
            f, this.result.getInput(), this.gradient.scale( -1.0 ) );
        
        this.dimensionality = this.initialGuess.getDimensionality();
        this.hessianInverse = MatrixFactory.getDefault().createIdentity(
            this.dimensionality, this.dimensionality )
            .scale( 0.5 )
//            .scale( Math.sqrt(this.getTolerance()) )
            ;

        return true;
    }

    @Override
    protected boolean step()
    {
        // Search along the gradient for the minimum value
        Vector xold = this.result.getInput();
        
        this.result = this.lineMinimizer.minimizeAlongDirection(
            this.lineFunction, this.result.getOutput(), this.gradient );

        Vector xnew = this.result.getInput();
        double fnew = this.result.getOutput();

        this.lineFunction.setVectorOffset( xnew );

        // Let's cache some values for speed
        Vector gradientOld = this.gradient;
        
        // See if I've already computed the gradient information
        // NOTE: It's possible that there's still an inefficiency here.
        // For example, we could have computed the gradient for "xnew"
        // previous to the last evaluation.  But this would require a
        // tremendous amount of bookkeeping and memory.
        if( (this.lineFunction.getLastGradient() != null) &&
            (this.lineFunction.getLastGradient().getInput().equals( xnew )) )
        {
            this.gradient = this.lineFunction.getLastGradient().getOutput();
        }
        else
        {
            this.gradient = this.data.differentiate( xnew );
        }

        // Start caching vectors and dot products for the BFGS update...
        // this notation is taken from Wikipedia
        Vector gamma = this.gradient.minus( gradientOld );
        Vector delta = xnew.minus( xold );
       
        // If we've converged on zero slope, then we're done!
        if( MinimizationStoppingCriterion.convergence(
            xnew, fnew, this.gradient, delta, this.getTolerance() ) )
        {
            return false;
        }
        
        // Call the particular Quasi-Newton update rule
        this.updateHessianInverse( this.hessianInverse, delta, gamma );
        this.lineFunction.setDirection(
            this.hessianInverse.times( this.gradient ).scale( -1.0 ) );

        return true;
    }

    /**
     * The step that makes BFGS/DFP/SR1 different from each other.  This
     * embodies the specific Quasi-Newton update step
     * @param hessianInverse 
     * Current estimate of the Hessian inverse.  Must be modified!
     * @param delta
     * Change in the search points (xnew-xold)
     * @param gamma
     * Change in the gradients (gnew-gold)
     * @return
     * True if Hessian was modified, false otherwise (due to numerical 
     * instability, etc.)
     */
    protected abstract boolean updateHessianInverse(
        Matrix hessianInverse,
        Vector delta,
        Vector gamma );
    
    @Override
    protected void cleanupAlgorithm()
    {
    }
    
    /**
     * Getter for lineMinimizer
     * @return
     * Work-horse algorithm that minimizes the function along a direction
     */
    public LineMinimizer<?> getLineMinimizer()
    {
        return this.lineMinimizer;
    }

    /**
     * Setter for lineMinimizer
     * @param lineMinimizer
     * Work-horse algorithm that minimizes the function along a direction
     */
    public void setLineMinimizer(
        LineMinimizer<?> lineMinimizer )
    {
        this.lineMinimizer = lineMinimizer;
    }

}