/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/
/**
Numerical Recipes in C: p.385. lnsrch. A simple backtracking line
search. No attempt at accurately finding the true minimum is
made. The goal is only to ensure that BackTrackLineSearch will
return a position of higher value.
*/
package cc.mallet.optimize;
import java.util.logging.*;
import java.util.Arrays;
import cc.mallet.fst.CRF;
import cc.mallet.optimize.LineOptimizer;
import cc.mallet.optimize.Optimizable;
import cc.mallet.types.MatrixOps;
//"Line Searches and Backtracking", p385, "Numeric Recipes in C"
public class BackTrackLineSearch implements LineOptimizer.ByGradient
{
private static Logger logger = Logger.getLogger(BackTrackLineSearch.class.getName());
Optimizable.ByGradientValue function;
public BackTrackLineSearch (Optimizable.ByGradientValue optimizable) {
this.function = optimizable;
}
final int maxIterations = 100;
final double stpmax = 100;
final double EPS = 3.0e-12;
// termination conditions: either
// a) abs(delta x/x) < REL_TOLX for all coordinates
// b) abs(delta x) < ABS_TOLX for all coordinates
// c) sufficient function increase (uses ALF)
private double relTolx = 1e-7;
private double absTolx = 1e-4; // tolerance on absolute value difference
final double ALF = 1e-4;
/**
* Sets the tolerance of relative diff in function value.
* Line search converges if <tt>abs(delta x / x) < tolx</tt>
* for all coordinates. */
public void setRelTolx (double tolx) { relTolx = tolx; }
/**
* Sets the tolerance of absolute diff in function value.
* Line search converges if <tt>abs(delta x) < tolx</tt>
* for all coordinates. */
public void setAbsTolx (double tolx) { absTolx = tolx; }
// initialStep is ignored. This is b/c if the initial step is not 1.0,
// it sometimes confuses the backtracking for reasons I don't
// understand. (That is, the jump gets LARGER on iteration 1.)
// returns fraction of step size (alam) if found a good step
// returns 0.0 if could not step in direction
public double optimize (double[] line, double initialStep)
{
double[] g, x, oldParameters;
double slope, newSlope, temp, test, alamin, alam, alam2, tmplam;
double rhs1, rhs2, a, b, disc, oldAlam;
double f, fold, f2;
g = new double[function.getNumParameters()]; // gradient
x = new double[function.getNumParameters()]; // parameters
oldParameters = new double[function.getNumParameters()];
function.getParameters (x);
System.arraycopy (x, 0, oldParameters, 0, x.length);
function.getValueGradient (g);
alam2 = tmplam = 0.0;
f2 = fold = function.getValue();
if (logger.isLoggable(Level.FINE)) {
logger.fine ("ENTERING BACKTRACK\n");
logger.fine("Entering BackTrackLnSrch, value="+fold+",\ndirection.oneNorm:"
+ MatrixOps.oneNorm(line) + " direction.infNorm:"+MatrixOps.infinityNorm(line));
}
assert (!MatrixOps.isNaN(g));
double sum = MatrixOps.twoNorm(line);
if(sum > stpmax) {
logger.warning("attempted step too big. scaling: sum="+sum+
", stpmax="+stpmax);
MatrixOps.timesEquals(line, stpmax/sum);
}
newSlope = slope = MatrixOps.dotProduct (g, line);
logger.fine("slope="+slope);
if (slope<0)
throw new InvalidOptimizableException ("Slope = " + slope + " is negative");
if (slope == 0)
throw new InvalidOptimizableException ("Slope = " + slope + " is zero");
// find maximum lambda
// converge when (delta x) / x < REL_TOLX for all coordinates.
// the largest step size that triggers this threshold is
// precomputed and saved in alamin
test = 0.0;
for(int i=0; i<oldParameters.length; i++) {
temp = Math.abs(line[i]) /
Math.max(Math.abs(oldParameters[i]), 1.0);
if(temp > test) test = temp;
}
alamin = relTolx/test;
alam = 1.0;
oldAlam = 0.0;
int iteration = 0;
// look for step size in direction given by "line"
for(iteration=0; iteration < maxIterations; iteration++) {
// x = oldParameters + alam*line
// initially, alam = 1.0, i.e. take full Newton step
logger.fine("BackTrack loop iteration "+iteration+": alam="+
alam+" oldAlam="+oldAlam);
logger.fine ("before step, x.1norm: " + MatrixOps.oneNorm(x) +
"\nalam: " + alam + "\noldAlam: " + oldAlam);
assert(alam != oldAlam) : "alam == oldAlam";
MatrixOps.plusEquals(x, line, alam - oldAlam); // step
logger.fine ("after step, x.1norm: " + MatrixOps.oneNorm(x));
// check for convergence
//convergence on delta x
if ((alam < alamin) || smallAbsDiff (oldParameters, x)) {
// if ((alam < alamin)) {
function.setParameters(oldParameters);
f = function.getValue();
logger.warning("EXITING BACKTRACK: Jump too small (alamin="+alamin+"). Exiting and using xold. Value="+f);
return 0.0;
}
function.setParameters(x);
oldAlam = alam;
f = function.getValue();
logger.fine("value="+f);
// sufficient function increase (Wolf condition)
if(f >= fold+ALF*alam*slope) {
logger.fine("EXITING BACKTRACK: value="+f);
if (f<fold)
throw new IllegalStateException
("Function did not increase: f=" + f +
" < " + fold + "=fold");
return alam;
}
// if value is infinite, i.e. we've
// jumped to unstable territory, then scale down jump
else if(Double.isInfinite(f) || Double.isInfinite(f2)) {
logger.warning ("Value is infinite after jump " + oldAlam + ". f="+f+", f2="+f2+". Scaling back step size...");
tmplam = .2 * alam;
if(alam < alamin) { //convergence on delta x
function.setParameters(oldParameters);
f = function.getValue();
logger.warning("EXITING BACKTRACK: Jump too small. Exiting and using xold. Value="+f);
return 0.0;
}
}
else { // backtrack
if(alam == 1.0) // first time through
tmplam = -slope/(2.0*(f-fold-slope));
else {
rhs1 = f-fold-alam*slope;
rhs2 = f2-fold-alam2*slope;
assert((alam - alam2) != 0): "FAILURE: dividing by alam-alam2. alam="+alam;
a = (rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
b = (-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
if(a == 0.0)
tmplam = -slope/(2.0*b);
else {
disc = b*b-3.0*a*slope;
if(disc < 0.0) {
tmplam = .5 * alam;
}
else if (b <= 0.0)
tmplam = (-b+Math.sqrt(disc))/(3.0*a);
else tmplam = -slope/(b+Math.sqrt(disc));
}
if (tmplam > .5*alam)
tmplam = .5*alam; // lambda <= .5 lambda_1
}
}
alam2 = alam;
f2 = f;
logger.fine("tmplam:"+tmplam);
alam = Math.max(tmplam, .1*alam); // lambda >= .1*Lambda_1
}
if(iteration >= maxIterations)
throw new IllegalStateException ("Too many iterations.");
return 0.0;
}
// returns true iff we've converged based on absolute x difference
private boolean smallAbsDiff (double[] x, double[] xold)
{
for (int i = 0; i < x.length; i++) {
if (Math.abs (x[i] - xold[i]) > absTolx) {
return false;
}
}
return true;
}
}