SMDMinimizer.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.optimization; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Pair;

/**
 * <p>
 * Stochastic Meta Descent Minimizer based on
 *
 * <p>
 * Accelerated training of conditional random fields with stochastic gradient methods
 * S. V. N. Vishwanathan, Nicol N. Schraudolph, Mark W. Schmidt, Kevin P. Murphy
 * June 2006 	 	Proceedings of the 23rd international conference on Machine learning ICML '06
 * Publisher: ACM Press
 * <p/>
 * The basic way to use the minimizer is with a null constructor, then
 * the simple minimize method:
 * <p/>
 * <p><code>Minimizer smd = new SMDMinimizer();</code>
 * <br><code>DiffFunction df = new SomeDiffFunction();</code>
 * <br><code>double tol = 1e-4;</code>
 * <br><code>double[] initial = getInitialGuess();</code>
 * <br><code>int maxIterations = someSafeNumber;
 * <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code>
 * <p/>
 * Constructing with a null constructor will use the default values of
 * <p>
 * <br><code>batchSize = 15;</code>
 * <br><code>initialGain = 0.1;</code>
 * <br><code>useAlgorithmicDifferentiation = true;</code>
 * <p/>
 * <p/>
 *
 * @author <a href="mailto:akleeman@stanford.edu">Alex Kleeman</a>
 * @version 1.0
 * @since 1.0
 */
public class SMDMinimizer<T extends Function> extends StochasticMinimizer<T>  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(SMDMinimizer.class);



  public double mu = 0.01;
  public double lam = 1.0;
  public double cPosDef = 0.00;
  public double meta;
  //DEBUG ONLY
  public boolean printMinMax = false;
  private double[] Hv,gains;
  StochasticCalculateMethods method; // = null;

  @Override
  public void shutUp() {
    this.quiet = true;
  }

  public void setBatchSize(int batchSize) {
    bSize = batchSize;
  }


  public SMDMinimizer() {
  }

  public SMDMinimizer(double initialSMDGain, int batchSize, StochasticCalculateMethods method, int passes) {
    this(initialSMDGain, batchSize, method, passes, false);
  }

  public SMDMinimizer(double initGain, int batchSize,StochasticCalculateMethods method, int passes, boolean outputToFile){
    bSize = batchSize;
    gain = initGain;
    this.method = method;
    this.numPasses = passes;
    this.outputIterationsToFile = outputToFile;
  }


  @Override
  public double[] minimize(Function function, double functionTolerance, double[] initial) {
    return minimize(function, functionTolerance, initial, -1);
  }


  @Override
  protected void init(AbstractStochasticCachingDiffFunction func){
    func.method = this.method;
    gains = new double[x.length];
    v = new double[x.length];
    Hv = new double[x.length];
    for(int i = 0; i<v.length;i++){
      gains[i] = gain;
    }
  }


  private class setMu implements PropertySetter<Double>{
    SMDMinimizer<T> parent; // = null;

    public setMu(SMDMinimizer<T> smd){parent = smd;}

    @Override
    public void set(Double in){
      parent.mu = in ;
    }
  }

  private class setLam implements PropertySetter<Double>{
    SMDMinimizer<T> parent; // = null;

    public setLam(SMDMinimizer<T> smd){parent = smd;}

    @Override
    public void set(Double in){
      parent.lam = in ;
    }
  }



  @Override
  public Pair<Integer,Double> tune( edu.stanford.nlp.optimization.Function function,double[] initial, long msPerTest){

    this.quiet = true;
    this.lam = 0.9;
    this.mu = tuneDouble(function,initial,msPerTest,new setMu(this),1e-8,1e-2);
    this.lam = tuneDouble(function,initial,msPerTest,new setLam(this),0.1,1.0);
    gain = tuneGain(function, initial, msPerTest, 1e-8,1.0);
    bSize = tuneBatch(function,initial,msPerTest,1);

    log.info("Results:  gain: " + nf.format(gain) + "  batch " + bSize  + "   mu" + nf.format(this.mu) + "  lam" + nf.format(this.lam));

    return new Pair<>(bSize, gain);
  }


  @Override
  protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
    dfunction.returnPreviousValues = true;

    System.arraycopy(dfunction.HdotVAt(x,v,grad,bSize), 0, Hv, 0, Hv.length);

    //Update the weights
    for(int i = 0; i < x.length; i++){
      meta = 1-mu*grad[i]*v[i];
      if(0.5 > meta){
        gains[i] = gains[i]*0.5;
      }else{
        gains[i] = gains[i]*meta;
      }
      //Update gain history
      v[i] = lam*(1+cPosDef*gains[i])*v[i] - gains[i]*(grad[i] + lam*Hv[i]);
      //Get the next X
      newX[i] = x[i] - gains[i]*grad[i];
    }

    if(printMinMax){
      say("vMin = " + ArrayMath.min(v) + "  ");
      say("vMax = " + ArrayMath.max(v) + "  ");
      say("gainMin = " + ArrayMath.min(gains) + "  ");
      say("gainMax = " + ArrayMath.max(gains) + "  ");
    }

  }


  @Override
  protected String getName(){
    int m = (int) (mu*1000);
    int l = (int) (lam * 1000);
    int g = (int) (gain*10000);
    return "SMD" + bSize +"_mu" + m + "_lam" + l + "_g" + g ;
  }


  public static void main(String[] args) {
    // optimizes test function using doubles and floats
    // test function is (0.5 sum(x_i^2 * var_i)) ^ PI
    // where var is a vector of random nonnegative numbers
    // dimensionality is variable.
    final int dim = 500000;
    final double maxVar = 5;
    final double[] var = new double[dim];
    double[] init = new double[dim];

    for (int i = 0; i < dim; i++) {
      init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
      var[i] = maxVar * (i + 1) / dim;
    }

    final DiffFunction f = new DiffFunction() {

      @Override
      public double[] derivativeAt(double[] x) {
        double val = Math.PI * valuePow(x, Math.PI - 1);
        final double[] grads = new double[dim];
        for (int i = 0; i < dim; i++) {
          grads[i] = x[i] * var[i] * val;
        }
        return grads;
      }

      @Override
      public double valueAt(double[] x) {
        return 1.0 + valuePow(x, Math.PI);
      }

      private double valuePow(double[] x, double pow) {
        double val = 0.0;
        for (int i = 0; i < dim; i++) {
          val += x[i] * x[i] * var[i];
        }
        return Math.pow(val * 0.5, pow);
      }

      @Override
      public int domainDimension() {
        return dim;
      }
    };

    SMDMinimizer<DiffFunction> min = new SMDMinimizer<>();

    min.minimize(f, 1.0E-4, init);
  }

}