InefficientSGDMinimizer.java example

Explorer
Stanford-NLP-master
- CoreNLP-master
package edu.stanford.nlp.optimization;

import edu.stanford.nlp.util.Pair;

/**
 * Stochastic Gradient Descent Minimizer.
 *
 *
 * The basic way to use the minimizer is with a null constructor, then
 * the simple minimize method:
 * <p/>
 * <p><code>Minimizer smd = new InefficientSGDMinimizer();</code>
 * <br><code>DiffFunction df = new SomeDiffFunction(); //Note that it must be a incidence of AbstractStochasticCachingDiffFunction</code>
 * <br><code>double tol = 1e-4;</code>
 * <br><code>double[] initial = getInitialGuess();</code>
 * <br><code>int maxIterations = someSafeNumber;
 * <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code>
 * <p/>
 * Constructing with a null constructor will use the default values of
 * <p>
 * <br><code>batchSize = 15;</code>
 * <br><code>initialGain = 0.1;</code>
 * <p/>
 * <br> NOTE: This class was previously called SGDMinimizer. SGDMinimizer is now what was StochasticInPlaceMinimizer. New projects should use that class.
 * <p/>
 *
 * @author <a href="mailto:akleeman@stanford.edu">Alex Kleeman</a>
 * @version 1.0
 * @since 1.0
 */
public class InefficientSGDMinimizer<T extends Function> extends StochasticMinimizer<T> {


  @Override
  public void shutUp() {
    this.quiet = true;
  }

  public void setBatchSize(int batchSize) {
    bSize = batchSize;
  }

  public InefficientSGDMinimizer() {
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize){
    this(SGDGain,batchSize,50);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes){
    this(SGDGain,batchSize,passes,Long.MAX_VALUE,false);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, boolean outputToFile){
    this(SGDGain, batchSize, passes, Long.MAX_VALUE ,outputToFile );
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime){
    this(SGDGain,batchSize,passes,maxTime,false);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime, boolean outputToFile){
    bSize = batchSize;
    gain = SGDGain;
    this.numPasses = passes;
    this.outputIterationsToFile = outputToFile;
    this.maxTime = maxTime;
  }


  @Override
  protected String getName(){
    int g = (int) gain*1000;
      return "SGD" + bSize + "_g" + g;
  }


  public Pair <Integer,Double> tune(Function function, double[] initial,long msPerTest,double gainLow,double gainHigh){
    this.quiet = true;
    gain = tuneGain(function, initial, msPerTest, gainLow,gainHigh);
    bSize = tuneBatch(function,initial,msPerTest,1);

    return new Pair<>(bSize, gain);
  }

  @Override
  public Pair<Integer,Double> tune(Function function,double[] initial, long msPerTest){
    return this.tune(function, initial, msPerTest, 1e-7,1.0);

  }

  @Override
  protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
    for(int i = 0; i < x.length; i++){
      newX[i] = x[i] - gain*gainSchedule(k,5*numBatches)*grad[i];
    }
  }






  public static void main(String[] args) {
    // optimizes test function using doubles and floats
    // test function is (0.5 sum(x_i^2 * var_i)) ^ PI
    // where var is a vector of random nonnegative numbers
    // dimensionality is variable.
    final int dim = 500000;
    final double maxVar = 5;
    final double[] var = new double[dim];
    double[] init = new double[dim];

    for (int i = 0; i < dim; i++) {
      init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
      var[i] = maxVar * (i + 1) / dim;
    }

    final double[] grads = new double[dim];

    final DiffFunction f = new DiffFunction() {
      @Override
      public double[] derivativeAt(double[] x) {
        double val = Math.PI * valuePow(x, Math.PI - 1);
        for (int i = 0; i < dim; i++) {
          grads[i] = x[i] * var[i] * val;
        }
        return grads;
      }

      @Override
      public double valueAt(double[] x) {
        return 1.0 + valuePow(x, Math.PI);
      }

      private double valuePow(double[] x, double pow) {
        double val = 0.0;
        for (int i = 0; i < dim; i++) {
          val += x[i] * x[i] * var[i];
        }
        return Math.pow(val * 0.5, pow);
      }

      @Override
      public int domainDimension() {
        return dim;
      }
    };

    InefficientSGDMinimizer<DiffFunction> min = new InefficientSGDMinimizer<>();
    min.minimize(f, 1.0E-4, init);
  }

}