FunctionMinimizerConjugateGradient.java example

Explorer
Foundry-master
- Components
/*
 * File:                FunctionMinimizerConjugateGradient.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright November 7, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 *
 *
 */

package gov.sandia.cognition.learning.algorithm.minimization;

import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeBased;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.line.DirectionalVectorToDifferentiableScalarFunction;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.math.DifferentiableEvaluator;
import gov.sandia.cognition.math.matrix.Vector;

/**
 * Conjugate gradient method is a class of algorithms for finding the
 * unconstrained local minimum of a nonlinear function.  CG algorithms find
 * the local minimum by using a line search algorithm along a "conjugate
 * gradient" direction using first-order (gradient) information. The particular 
 * approaches vary only slightly according to how the direction is updated.
 * However, in my experience, the Liu-Storey CG variant
 * (FunctionMinimizerLiuStorey) performs the best.
 * <BR><BR>
 * All CG variants tend to require more function/gradient evaluations than
 * their Quasi-Newton cousins.  However, CG methods only require O(N) storage,
 * whereas Quasi-Newton algorithms (FunctionMinimizerQuasiNewton) require
 * O(N*N) storage, where N is the size of the input space.  So, if your input
 * space is large, then CG algorithms may be the method of choice.  In any 
 * case, the Liu-Storey CG variant tends to perform fairly well compared to the
 * best Quasi-Newton algorithms, BFGS in particular.
 * 
 * @see FunctionMinimizerQuasiNewton
 * 
 * @author Kevin R. Dixon
 * @since 2.1
 */
@PublicationReferences(
    references={
        @PublicationReference(
            author="R. Fletcher",
            title="Practical Methods of Optimization, Second Edition",
            type=PublicationType.Book,
            year=1987,
            pages={80,87},
            notes="Section 4.1"
        ),
        @PublicationReference(
            author="Wikipedia",
            title="Nonlinear conjugate gradient method",
            type=PublicationType.WebPage,
            url="http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method",
            year=2008
        ),
        @PublicationReference(
            author={
                "William H. Press",
                "Saul A. Teukolsky",
                "William T. Vetterling",
                "Brian P. Flannery"
            },
            title="Numerical Recipes in C, Second Edition",
            type=PublicationType.Book,
            year=1992,
            pages={423,424},
            notes="Section 10.6",
            url="http://www.nrbook.com/a/bookcpdf.php"
        )
    }
)
public abstract class FunctionMinimizerConjugateGradient 
    extends AbstractAnytimeFunctionMinimizer<Vector, Double, DifferentiableEvaluator<? super Vector, Double, Vector>>
{

    /**
     * Default maximum number of iterations before stopping, {@value}
     */
    public static final int DEFAULT_MAX_ITERATIONS = 1000;

    /**
     * Default tolerance, {@value}
     */
    public static final double DEFAULT_TOLERANCE = 1e-5;

    /**
     * Test for convergence on change in x, {@value}
     */
    private static final double TOLERANCE_DELTA_Y = 1e-10;
    
    /**
     * Default line minimization algorithm, LineMinimizerDerivativeBased
     */
    public static final LineMinimizer<?> DEFAULT_LINE_MINIMIZER =
        new LineMinimizerDerivativeBased();
//        new LineMinimizerDerivativeFree();

    /**
     * Work-horse algorithm that minimizes the function along a direction
     */
    private LineMinimizer<?> lineMinimizer;    
    
    /**
     * Creates a new instance of FunctionMinimizerConjugateGradient
     * 
     * @param initialGuess Initial guess about the minimum of the method
     * @param tolerance
     * Tolerance of the minimization algorithm, must be >= 0.0, typically ~1e-10
     * @param lineMinimizer 
     * Work-horse algorithm that minimizes the function along a direction
     * @param maxIterations
     * Maximum number of iterations, must be >0, typically ~100
     */
    public FunctionMinimizerConjugateGradient(
        LineMinimizer<?> lineMinimizer,
        Vector initialGuess,
        double tolerance,
        int maxIterations )
    {
        super( initialGuess, tolerance, maxIterations );
        this.setLineMinimizer( lineMinimizer );
    }
    
    /**
     * Function that maps a Evaluator<Vector,Double> onto a 
     * Evaluator<Double,Double> using a set point, direction and scale factor
     */
    protected DirectionalVectorToDifferentiableScalarFunction lineFunction;

    /**
     * Gradient at the current guess
     */
    private Vector gradient;

    /**
     * Getter for lineMinimizer
     * @return
     * Work-horse algorithm that minimizes the function along a direction
     */
    public LineMinimizer<?> getLineMinimizer()
    {
        return this.lineMinimizer;
    }

    /**
     * Setter for lineMinimizer
     * @param lineMinimizer
     * Work-horse algorithm that minimizes the function along a direction
     */
    public void setLineMinimizer(
        LineMinimizer<?> lineMinimizer )
    {
        this.lineMinimizer = lineMinimizer;
    }

    @Override
    protected boolean initializeAlgorithm()
    {
        this.result = new DefaultInputOutputPair<Vector, Double>(
            this.initialGuess, this.data.evaluate( this.initialGuess ) );

        this.gradient = this.data.differentiate( this.initialGuess );
        
        this.lineFunction = new DirectionalVectorToDifferentiableScalarFunction(
            this.data, this.initialGuess, this.gradient.scale(-1.0) );
        
        return true;
    }

    @Override
    protected boolean step()
    {

        // Rename the function "this.data" to "f" to minimize confusion.
        DifferentiableEvaluator<? super Vector, ? extends Double, Vector> f = this.data;
        
        Vector xold = this.result.getInput();
        
        // Find the minimum along this search direction
        this.result = this.lineMinimizer.minimizeAlongDirection(
            this.lineFunction, this.result.getOutput(), this.gradient );        

        Vector xnew = this.result.getInput();
        double fnew = this.result.getOutput();
        
        // Save off the previous gradient
        Vector gradientOld = this.gradient;
        
        // See if I've already computed the gradient information
        // NOTE: It's possible that there's still an inefficiency here.
        // For example, we could have computed the gradient for "xnew"
        // previous to the last evaluation.  But this would require a
        // tremendous amount of bookkeeping and memory.
        if( (this.lineFunction.getLastGradient() != null) &&
            (this.lineFunction.getLastGradient().getInput().equals( xnew )) )
        {
            this.gradient = this.lineFunction.getLastGradient().getOutput();
        }
        else
        {
            this.gradient = f.differentiate( xnew );
        }
        
        // Test for almost zero-gradient convergence
        if( MinimizationStoppingCriterion.convergence( 
            xnew, fnew, this.gradient, xnew.minus( xold ), this.getTolerance() ) )
        {
            return false;
        }
        
        double beta;

        // This is how often we will reset the search direction and 
        // re-initialize it as the direction of steepest descent
        int resetPeriod = this.gradient.getDimensionality()*2;
        if( ((this.getIteration()+1) % resetPeriod) == 0 )
        {
            beta = 0.0;
        }
        else
        {
            beta = this.computeScaleFactor( this.gradient, gradientOld );
        }
        
        Vector newDirection = 
            this.lineFunction.getDirection().scale( beta ).minus( this.gradient );
        this.lineFunction.setDirection( newDirection );
        this.lineFunction.setVectorOffset( xnew );
        
        return true;
    }

    @Override
    protected void cleanupAlgorithm()
    {
    }    
    
    /**
     * Computes the conjugate gradient parameter for the particular update
     * scheme.
     * @param gradientCurrent
     * Gradient at the current evaluation point
     * @param gradientPrevious
     * Gradient at the previous evaluation point
     * @return
     * "beta" scale factor
     */
    protected abstract double computeScaleFactor(
        Vector gradientCurrent,
        Vector gradientPrevious );
    
}