package edu.stanford.nlp.optimization;
import edu.stanford.nlp.util.Pair;
/**
* Stochastic Gradient Descent Minimizer.
*
*
* The basic way to use the minimizer is with a null constructor, then
* the simple minimize method:
* <p/>
* <p><code>Minimizer smd = new InefficientSGDMinimizer();</code>
* <br><code>DiffFunction df = new SomeDiffFunction(); //Note that it must be a incidence of AbstractStochasticCachingDiffFunction</code>
* <br><code>double tol = 1e-4;</code>
* <br><code>double[] initial = getInitialGuess();</code>
* <br><code>int maxIterations = someSafeNumber;
* <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code>
* <p/>
* Constructing with a null constructor will use the default values of
* <p>
* <br><code>batchSize = 15;</code>
* <br><code>initialGain = 0.1;</code>
* <p/>
* <br> NOTE: This class was previously called SGDMinimizer. SGDMinimizer is now what was StochasticInPlaceMinimizer. New projects should use that class.
* <p/>
*
* @author <a href="mailto:akleeman@stanford.edu">Alex Kleeman</a>
* @version 1.0
* @since 1.0
*/
public class InefficientSGDMinimizer<T extends Function> extends StochasticMinimizer<T> {
@Override
public void shutUp() {
this.quiet = true;
}
public void setBatchSize(int batchSize) {
bSize = batchSize;
}
public InefficientSGDMinimizer() {
}
public InefficientSGDMinimizer(double SGDGain, int batchSize){
this(SGDGain,batchSize,50);
}
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes){
this(SGDGain,batchSize,passes,Long.MAX_VALUE,false);
}
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, boolean outputToFile){
this(SGDGain, batchSize, passes, Long.MAX_VALUE ,outputToFile );
}
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime){
this(SGDGain,batchSize,passes,maxTime,false);
}
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime, boolean outputToFile){
bSize = batchSize;
gain = SGDGain;
this.numPasses = passes;
this.outputIterationsToFile = outputToFile;
this.maxTime = maxTime;
}
@Override
protected String getName(){
int g = (int) gain*1000;
return "SGD" + bSize + "_g" + g;
}
public Pair <Integer,Double> tune(Function function, double[] initial,long msPerTest,double gainLow,double gainHigh){
this.quiet = true;
gain = tuneGain(function, initial, msPerTest, gainLow,gainHigh);
bSize = tuneBatch(function,initial,msPerTest,1);
return new Pair<>(bSize, gain);
}
@Override
public Pair<Integer,Double> tune(Function function,double[] initial, long msPerTest){
return this.tune(function, initial, msPerTest, 1e-7,1.0);
}
@Override
protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
for(int i = 0; i < x.length; i++){
newX[i] = x[i] - gain*gainSchedule(k,5*numBatches)*grad[i];
}
}
public static void main(String[] args) {
// optimizes test function using doubles and floats
// test function is (0.5 sum(x_i^2 * var_i)) ^ PI
// where var is a vector of random nonnegative numbers
// dimensionality is variable.
final int dim = 500000;
final double maxVar = 5;
final double[] var = new double[dim];
double[] init = new double[dim];
for (int i = 0; i < dim; i++) {
init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
var[i] = maxVar * (i + 1) / dim;
}
final double[] grads = new double[dim];
final DiffFunction f = new DiffFunction() {
@Override
public double[] derivativeAt(double[] x) {
double val = Math.PI * valuePow(x, Math.PI - 1);
for (int i = 0; i < dim; i++) {
grads[i] = x[i] * var[i] * val;
}
return grads;
}
@Override
public double valueAt(double[] x) {
return 1.0 + valuePow(x, Math.PI);
}
private double valuePow(double[] x, double pow) {
double val = 0.0;
for (int i = 0; i < dim; i++) {
val += x[i] * x[i] * var[i];
}
return Math.pow(val * 0.5, pow);
}
@Override
public int domainDimension() {
return dim;
}
};
InefficientSGDMinimizer<DiffFunction> min = new InefficientSGDMinimizer<>();
min.minimize(f, 1.0E-4, init);
}
}