package LBJ2.learn; import java.io.PrintStream; import LBJ2.classify.Feature; import LBJ2.classify.FeatureVector; import LBJ2.classify.RealPrimitiveStringFeature; import LBJ2.classify.ScoreSet; import LBJ2.util.ExceptionlessInputStream; import LBJ2.util.ExceptionlessOutputStream; /** * Gradient descent is a batch learning algorithm for function approximation * in which the learner tries to follow the gradient of the error function to * the solution of minimal error. This implementation is a stochastic * approximation to gradient descent in which the approximated function is * assumed to have linear form. * * <p> This algorithm's user-configurable parameters are stored in member * fields of this class. They may be set via either a constructor that names * each parameter explicitly or a constructor that takes an instance of * {@link LBJ2.learn.StochasticGradientDescent.Parameters Parameters} as * input. The documentation in each member field in this class indicates the * default value of the associated parameter when using the former type of * constructor. The documentation of the associated member field in the * {@link LBJ2.learn.StochasticGradientDescent.Parameters Parameters} class * indicates the default value of the parameter when using the latter type of * constructor. * * @author Nick Rizzolo **/ public class StochasticGradientDescent extends Learner { /** Default value for {@link #learningRate}. */ public static final double defaultLearningRate = 0.1; /** Default for {@link #weightVector}. */ public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); /** The hypothesis vector; default {@link #defaultWeightVector}. */ protected SparseWeightVector weightVector; /** * The bias is stored here rather than as an element of the weight vector. **/ protected double bias; /** * The rate at which weights are updated; default * {@link #defaultLearningRate}. **/ protected double learningRate; /** * The learning rate takes the default value, while the name of the * classifier gets the empty string. **/ public StochasticGradientDescent() { this(""); } /** * Sets the learning rate to the specified value, while the name of the * classifier gets the empty string. * * @param r The desired learning rate value. **/ public StochasticGradientDescent(double r) { this("", r); } /** * Initializing constructor. Sets all member variables to their associated * settings in the {@link StochasticGradientDescent.Parameters} object. * * @param p The settings of all parameters. **/ public StochasticGradientDescent(Parameters p) { this("", p); } /** * The learning rate takes the default value. * * @param n The name of the classifier. **/ public StochasticGradientDescent(String n) { this(n, defaultLearningRate); } /** * Use this constructor to specify an alternative subclass of * {@link SparseWeightVector}. * * @param n The name of the classifier. * @param r The desired learning rate value. **/ public StochasticGradientDescent(String n, double r) { super(n); Parameters p = new Parameters(); p.learningRate = r; setParameters(p); } /** * Initializing constructor. Sets all member variables to their associated * settings in the {@link StochasticGradientDescent.Parameters} object. * * @param n The name of the classifier. * @param p The settings of all parameters. **/ public StochasticGradientDescent(String n, Parameters p) { super(n); setParameters(p); } /** * Sets the values of parameters that control the behavior of this learning * algorithm. * * @param p The parameters. **/ public void setParameters(Parameters p) { weightVector = p.weightVector; learningRate = p.learningRate; } /** * Retrieves the parameters that are set in this learner. * * @return An object containing all the values of the parameters that * control the behavior of this learning algorithm. **/ public Learner.Parameters getParameters() { Parameters p = new Parameters(super.getParameters()); p.weightVector = weightVector.emptyClone(); p.learningRate = learningRate; return p; } /** * Returns the current value of the {@link #learningRate} variable. * * @return The value of the {@link #learningRate} variable. **/ public double getLearningRate() { return learningRate; } /** * Sets the {@link #learningRate} member variable to the specified * value. * * @param t The new value for {@link #learningRate}. **/ public void setLearningRate(double t) { learningRate = t; } /** Resets the weight vector to all zeros. */ public void forget() { super.forget(); weightVector = weightVector.emptyClone(); bias = 0; } /** * Returns a string describing the output feature type of this classifier. * * @return <code>"real"</code> **/ public String getOutputType() { return "real"; } /** * Trains the learning algorithm given an object as an example. * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @param exampleLabels The example's label(s). * @param labelValues The labels' values. **/ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, double[] labelValues) { assert exampleLabels.length == 1 : "Example must have a single label."; double labelValue = labelValues[0]; double multiplier = learningRate * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias); weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); bias += multiplier; } /** * Since this algorithm returns a real feature, it does not return scores. * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @return <code>null</code> **/ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { return null; } /** * Returns the classification of the given example as a single feature * instead of a {@link FeatureVector}. * * @param f The features array. * @param v The values array. * @return The classification of the example as a feature. **/ public Feature featureValue(int[] f, double[] v) { return new RealPrimitiveStringFeature(containingPackage, name, "", realValue(f, v)); } /** * Simply computes the dot product of the weight vector and the example * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @return The computed real value. **/ public double realValue(int[] exampleFeatures, double[] exampleValues) { return weightVector.dot(exampleFeatures, exampleValues) + bias; } /** * Simply computes the dot product of the weight vector and the feature * vector extracted from the example object. * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @return The computed feature (in a vector). **/ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { return new FeatureVector(featureValue(exampleFeatures, exampleValues)); } /** * Writes the algorithm's internal representation as text. In the first * line of output, the name of the classifier is printed, followed by * {@link #learningRate} and {@link #bias}. * * @param out The output stream. **/ public void write(PrintStream out) { out.println(name + ": " + learningRate + ", " + bias); if (lexicon.size() == 0) weightVector.write(out); else weightVector.write(out, lexicon); } /** * Writes the learned function's internal representation in binary form. * * @param out The output stream. **/ public void write(ExceptionlessOutputStream out) { super.write(out); out.writeDouble(learningRate); out.writeDouble(bias); weightVector.write(out); } /** * Reads the binary representation of a learner with this object's run-time * type, overwriting any and all learned or manually specified parameters * as well as the label lexicon but without modifying the feature lexicon. * * @param in The input stream. **/ public void read(ExceptionlessInputStream in) { super.read(in); learningRate = in.readDouble(); bias = in.readDouble(); weightVector = SparseWeightVector.readWeightVector(in); } /** Returns a deep clone of this learning algorithm. */ public Object clone() { StochasticGradientDescent clone = null; try { clone = (StochasticGradientDescent) super.clone(); } catch (Exception e) { System.err.println("Error cloning StochasticGradientDescent: " + e); System.exit(1); } clone.weightVector = (SparseWeightVector) weightVector.clone(); return clone; } /** * Simply a container for all of {@link StochasticGradientDescent}'s * configurable parameters. Using instances of this class should make code * more readable and constructors less complicated. * * @author Nick Rizzolo **/ public static class Parameters extends Learner.Parameters { /** * The hypothesis vector; default * {@link StochasticGradientDescent#defaultWeightVector}. **/ public SparseWeightVector weightVector; /** * The rate at which weights are updated; default * {@link #defaultLearningRate}. **/ public double learningRate; /** Sets all the default values. */ public Parameters() { weightVector = (SparseWeightVector) defaultWeightVector.clone(); learningRate = defaultLearningRate; } /** * Sets the parameters from the parent's parameters object, giving * defaults to all parameters declared in this object. **/ public Parameters(Learner.Parameters p) { super(p); weightVector = (SparseWeightVector) defaultWeightVector.clone(); learningRate = defaultLearningRate; } /** Copy constructor. */ public Parameters(Parameters p) { super(p); weightVector = p.weightVector; learningRate = p.learningRate; } /** * Calls the appropriate <code>Learner.setParameters(Parameters)</code> * method for this <code>Parameters</code> object. * * @param l The learner whose parameters will be set. **/ public void setParameters(Learner l) { ((StochasticGradientDescent) l).setParameters(this); } /** * Creates a string representation of these parameters in which only * those parameters that differ from their default values are mentioned. **/ public String nonDefaultString() { String result = super.nonDefaultString(); if (learningRate != StochasticGradientDescent.defaultLearningRate) result += ", learningRate = " + learningRate; if (result.startsWith(", ")) result = result.substring(2); return result; } } }