package edu.stanford.nlp.optimization; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.math.ArrayMath; import edu.stanford.nlp.util.Pair; /** * <p> * Stochastic Meta Descent Minimizer based on * * <p> * Accelerated training of conditional random fields with stochastic gradient methods * S. V. N. Vishwanathan, Nicol N. Schraudolph, Mark W. Schmidt, Kevin P. Murphy * June 2006 Proceedings of the 23rd international conference on Machine learning ICML '06 * Publisher: ACM Press * <p/> * The basic way to use the minimizer is with a null constructor, then * the simple minimize method: * <p/> * <p><code>Minimizer smd = new SMDMinimizer();</code> * <br><code>DiffFunction df = new SomeDiffFunction();</code> * <br><code>double tol = 1e-4;</code> * <br><code>double[] initial = getInitialGuess();</code> * <br><code>int maxIterations = someSafeNumber; * <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code> * <p/> * Constructing with a null constructor will use the default values of * <p> * <br><code>batchSize = 15;</code> * <br><code>initialGain = 0.1;</code> * <br><code>useAlgorithmicDifferentiation = true;</code> * <p/> * <p/> * * @author <a href="mailto:akleeman@stanford.edu">Alex Kleeman</a> * @version 1.0 * @since 1.0 */ public class SMDMinimizer<T extends Function> extends StochasticMinimizer<T> { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(SMDMinimizer.class); public double mu = 0.01; public double lam = 1.0; public double cPosDef = 0.00; public double meta; //DEBUG ONLY public boolean printMinMax = false; private double[] Hv,gains; StochasticCalculateMethods method; // = null; @Override public void shutUp() { this.quiet = true; } public void setBatchSize(int batchSize) { bSize = batchSize; } public SMDMinimizer() { } public SMDMinimizer(double initialSMDGain, int batchSize, StochasticCalculateMethods method, int passes) { this(initialSMDGain, batchSize, method, passes, false); } public SMDMinimizer(double initGain, int batchSize,StochasticCalculateMethods method, int passes, boolean outputToFile){ bSize = batchSize; gain = initGain; this.method = method; this.numPasses = passes; this.outputIterationsToFile = outputToFile; } @Override public double[] minimize(Function function, double functionTolerance, double[] initial) { return minimize(function, functionTolerance, initial, -1); } @Override protected void init(AbstractStochasticCachingDiffFunction func){ func.method = this.method; gains = new double[x.length]; v = new double[x.length]; Hv = new double[x.length]; for(int i = 0; i<v.length;i++){ gains[i] = gain; } } private class setMu implements PropertySetter<Double>{ SMDMinimizer<T> parent; // = null; public setMu(SMDMinimizer<T> smd){parent = smd;} @Override public void set(Double in){ parent.mu = in ; } } private class setLam implements PropertySetter<Double>{ SMDMinimizer<T> parent; // = null; public setLam(SMDMinimizer<T> smd){parent = smd;} @Override public void set(Double in){ parent.lam = in ; } } @Override public Pair<Integer,Double> tune( edu.stanford.nlp.optimization.Function function,double[] initial, long msPerTest){ this.quiet = true; this.lam = 0.9; this.mu = tuneDouble(function,initial,msPerTest,new setMu(this),1e-8,1e-2); this.lam = tuneDouble(function,initial,msPerTest,new setLam(this),0.1,1.0); gain = tuneGain(function, initial, msPerTest, 1e-8,1.0); bSize = tuneBatch(function,initial,msPerTest,1); log.info("Results: gain: " + nf.format(gain) + " batch " + bSize + " mu" + nf.format(this.mu) + " lam" + nf.format(this.lam)); return new Pair<>(bSize, gain); } @Override protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){ dfunction.returnPreviousValues = true; System.arraycopy(dfunction.HdotVAt(x,v,grad,bSize), 0, Hv, 0, Hv.length); //Update the weights for(int i = 0; i < x.length; i++){ meta = 1-mu*grad[i]*v[i]; if(0.5 > meta){ gains[i] = gains[i]*0.5; }else{ gains[i] = gains[i]*meta; } //Update gain history v[i] = lam*(1+cPosDef*gains[i])*v[i] - gains[i]*(grad[i] + lam*Hv[i]); //Get the next X newX[i] = x[i] - gains[i]*grad[i]; } if(printMinMax){ say("vMin = " + ArrayMath.min(v) + " "); say("vMax = " + ArrayMath.max(v) + " "); say("gainMin = " + ArrayMath.min(gains) + " "); say("gainMax = " + ArrayMath.max(gains) + " "); } } @Override protected String getName(){ int m = (int) (mu*1000); int l = (int) (lam * 1000); int g = (int) (gain*10000); return "SMD" + bSize +"_mu" + m + "_lam" + l + "_g" + g ; } public static void main(String[] args) { // optimizes test function using doubles and floats // test function is (0.5 sum(x_i^2 * var_i)) ^ PI // where var is a vector of random nonnegative numbers // dimensionality is variable. final int dim = 500000; final double maxVar = 5; final double[] var = new double[dim]; double[] init = new double[dim]; for (int i = 0; i < dim; i++) { init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5); var[i] = maxVar * (i + 1) / dim; } final DiffFunction f = new DiffFunction() { @Override public double[] derivativeAt(double[] x) { double val = Math.PI * valuePow(x, Math.PI - 1); final double[] grads = new double[dim]; for (int i = 0; i < dim; i++) { grads[i] = x[i] * var[i] * val; } return grads; } @Override public double valueAt(double[] x) { return 1.0 + valuePow(x, Math.PI); } private double valuePow(double[] x, double pow) { double val = 0.0; for (int i = 0; i < dim; i++) { val += x[i] * x[i] * var[i]; } return Math.pow(val * 0.5, pow); } @Override public int domainDimension() { return dim; } }; SMDMinimizer<DiffFunction> min = new SMDMinimizer<>(); min.minimize(f, 1.0E-4, init); } }