StochasticGradientDescent.java example

Explorer
MinorThird-master
package LBJ2.learn;

import java.io.PrintStream;

import LBJ2.classify.Feature;
import LBJ2.classify.FeatureVector;
import LBJ2.classify.RealPrimitiveStringFeature;
import LBJ2.classify.ScoreSet;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;


/**
  * Gradient descent is a batch learning algorithm for function approximation
  * in which the learner tries to follow the gradient of the error function to
  * the solution of minimal error.  This implementation is a stochastic
  * approximation to gradient descent in which the approximated function is
  * assumed to have linear form.
  *
  * <p> This algorithm's user-configurable parameters are stored in member
  * fields of this class.  They may be set via either a constructor that names
  * each parameter explicitly or a constructor that takes an instance of
  * {@link LBJ2.learn.StochasticGradientDescent.Parameters Parameters} as
  * input.  The documentation in each member field in this class indicates the
  * default value of the associated parameter when using the former type of
  * constructor.  The documentation of the associated member field in the
  * {@link LBJ2.learn.StochasticGradientDescent.Parameters Parameters} class
  * indicates the default value of the parameter when using the latter type of
  * constructor.
  *
  * @author Nick Rizzolo
 **/
public class StochasticGradientDescent extends Learner
{
  /** Default value for {@link #learningRate}. */
  public static final double defaultLearningRate = 0.1;
  /** Default for {@link #weightVector}. */
  public static final SparseWeightVector defaultWeightVector =
    new SparseWeightVector();


  /** The hypothesis vector; default {@link #defaultWeightVector}. */
  protected SparseWeightVector weightVector;
  /**
    * The bias is stored here rather than as an element of the weight vector.
   **/
  protected double bias;
  /**
    * The rate at which weights are updated; default
    * {@link #defaultLearningRate}.
   **/
  protected double learningRate;


  /**
    * The learning rate takes the default value, while the name of the
    * classifier gets the empty string.
   **/
  public StochasticGradientDescent() { this(""); }

  /**
    * Sets the learning rate to the specified value, while the name of the
    * classifier gets the empty string.
    *
    * @param r  The desired learning rate value.
   **/
  public StochasticGradientDescent(double r) { this("", r); }

  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link StochasticGradientDescent.Parameters} object.
    *
    * @param p  The settings of all parameters.
   **/
  public StochasticGradientDescent(Parameters p) { this("", p); }

  /**
    * The learning rate takes the default value.
    *
    * @param n  The name of the classifier.
   **/
  public StochasticGradientDescent(String n) { this(n, defaultLearningRate); }

  /**
    * Use this constructor to specify an alternative subclass of
    * {@link SparseWeightVector}.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate value.
   **/
  public StochasticGradientDescent(String n, double r) {
    super(n);
    Parameters p = new Parameters();
    p.learningRate = r;
    setParameters(p);
  }

  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link StochasticGradientDescent.Parameters} object.
    *
    * @param n  The name of the classifier.
    * @param p  The settings of all parameters.
   **/
  public StochasticGradientDescent(String n, Parameters p) {
    super(n);
    setParameters(p);
  }


  /**
    * Sets the values of parameters that control the behavior of this learning
    * algorithm.
    *
    * @param p  The parameters.
   **/
  public void setParameters(Parameters p) {
    weightVector = p.weightVector;
    learningRate = p.learningRate;
  }


  /**
    * Retrieves the parameters that are set in this learner.
    *
    * @return An object containing all the values of the parameters that
    *         control the behavior of this learning algorithm.
   **/
  public Learner.Parameters getParameters() {
    Parameters p = new Parameters(super.getParameters());
    p.weightVector = weightVector.emptyClone();
    p.learningRate = learningRate;
    return p;
  }


  /**
    * Returns the current value of the {@link #learningRate} variable.
    *
    * @return The value of the {@link #learningRate} variable.
   **/
  public double getLearningRate() { return learningRate; }


  /**
    * Sets the {@link #learningRate} member variable to the specified
    * value.
    *
    * @param t  The new value for {@link #learningRate}.
   **/
  public void setLearningRate(double t) { learningRate = t; }


  /** Resets the weight vector to all zeros. */
  public void forget() {
    super.forget();
    weightVector = weightVector.emptyClone();
    bias = 0;
  }


  /**
    * Returns a string describing the output feature type of this classifier.
    *
    * @return <code>"real"</code>
   **/
  public String getOutputType() { return "real"; }


  /**
    * Trains the learning algorithm given an object as an example.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @param exampleLabels    The example's label(s).
    * @param labelValues      The labels' values.
   **/
  public void learn(int[] exampleFeatures, double[] exampleValues,
                    int[] exampleLabels, double[] labelValues) {
    assert exampleLabels.length == 1
      : "Example must have a single label.";

    double labelValue = labelValues[0];
    double multiplier =
      learningRate
      * (labelValue - weightVector.dot(exampleFeatures, exampleValues)
         - bias);
    weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
    bias += multiplier;
  }


  /**
    * Since this algorithm returns a real feature, it does not return scores.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return <code>null</code>
   **/
  public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
    return null;
  }


  /**
    * Returns the classification of the given example as a single feature
    * instead of a {@link FeatureVector}.
    *
    * @param f  The features array.
    * @param v  The values array.
    * @return The classification of the example as a feature.
   **/
  public Feature featureValue(int[] f, double[] v) {
    return
      new RealPrimitiveStringFeature(containingPackage, name, "",
                                     realValue(f, v));
  }


  /**
    * Simply computes the dot product of the weight vector and the example
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return The computed real value.
   **/
  public double realValue(int[] exampleFeatures, double[] exampleValues) {
    return weightVector.dot(exampleFeatures, exampleValues) + bias;
  }


  /**
    * Simply computes the dot product of the weight vector and the feature
    * vector extracted from the example object.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return The computed feature (in a vector).
   **/
  public FeatureVector classify(int[] exampleFeatures, double[] exampleValues)
  {
    return new FeatureVector(featureValue(exampleFeatures, exampleValues));
  }


  /**
    * Writes the algorithm's internal representation as text.  In the first
    * line of output, the name of the classifier is printed, followed by
    * {@link #learningRate} and {@link #bias}.
    *
    * @param out  The output stream.
   **/
  public void write(PrintStream out) {
    out.println(name + ": " + learningRate + ", " + bias);
    if (lexicon.size() == 0) weightVector.write(out);
    else weightVector.write(out, lexicon);
  }


  /**
    * Writes the learned function's internal representation in binary form.
    *
    * @param out  The output stream.
   **/
  public void write(ExceptionlessOutputStream out) {
    super.write(out);
    out.writeDouble(learningRate);
    out.writeDouble(bias);
    weightVector.write(out);
  }


  /**
    * Reads the binary representation of a learner with this object's run-time
    * type, overwriting any and all learned or manually specified parameters
    * as well as the label lexicon but without modifying the feature lexicon.
    *
    * @param in The input stream.
   **/
  public void read(ExceptionlessInputStream in) {
    super.read(in);
    learningRate = in.readDouble();
    bias = in.readDouble();
    weightVector = SparseWeightVector.readWeightVector(in);
  }


  /** Returns a deep clone of this learning algorithm. */
  public Object clone() {
    StochasticGradientDescent clone = null;

    try { clone = (StochasticGradientDescent) super.clone(); }
    catch (Exception e) {
      System.err.println("Error cloning StochasticGradientDescent: " + e);
      System.exit(1);
    }

    clone.weightVector = (SparseWeightVector) weightVector.clone();
    return clone;
  }


  /**
    * Simply a container for all of {@link StochasticGradientDescent}'s
    * configurable parameters.  Using instances of this class should make code
    * more readable and constructors less complicated.
    *
    * @author Nick Rizzolo
   **/
  public static class Parameters extends Learner.Parameters
  {
    /**
      * The hypothesis vector; default
      * {@link StochasticGradientDescent#defaultWeightVector}.
     **/
    public SparseWeightVector weightVector;
    /**
      * The rate at which weights are updated; default
      * {@link #defaultLearningRate}.
     **/
    public double learningRate;


    /** Sets all the default values. */
    public Parameters() {
      weightVector = (SparseWeightVector) defaultWeightVector.clone();
      learningRate = defaultLearningRate;
    }


    /**
      * Sets the parameters from the parent's parameters object, giving
      * defaults to all parameters declared in this object.
     **/
    public Parameters(Learner.Parameters p) {
      super(p);
      weightVector = (SparseWeightVector) defaultWeightVector.clone();
      learningRate = defaultLearningRate;
    }


    /** Copy constructor. */
    public Parameters(Parameters p) {
      super(p);
      weightVector = p.weightVector;
      learningRate = p.learningRate;
    }


    /**
      * Calls the appropriate <code>Learner.setParameters(Parameters)</code>
      * method for this <code>Parameters</code> object.
      *
      * @param l  The learner whose parameters will be set.
     **/
    public void setParameters(Learner l) {
      ((StochasticGradientDescent) l).setParameters(this);
    }


    /**
      * Creates a string representation of these parameters in which only
      * those parameters that differ from their default values are mentioned.
     **/
    public String nonDefaultString() {
      String result = super.nonDefaultString();

      if (learningRate != StochasticGradientDescent.defaultLearningRate)
        result += ", learningRate = " + learningRate;

      if (result.startsWith(", ")) result = result.substring(2);
      return result;
    }
  }
}