LinearThresholdUnit.java example

Explorer
MinorThird-master
package LBJ2.learn;

import java.util.Arrays;

import LBJ2.classify.Classifier;
import LBJ2.classify.DiscretePrimitiveStringFeature;
import LBJ2.classify.Feature;
import LBJ2.classify.FeatureVector;
import LBJ2.classify.ScoreSet;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;
import LBJ2.util.FVector;


/**
  * A <code>LinearThresholdUnit</code> is a {@link Learner} for binary
  * classification in which a score is computed as a linear function a
  * <i>weight vector</i> and the input example, and the decision is made by
  * comparing the score to some threshold quantity.  Deriving a linear
  * threshold algorithm from this class gives the programmer more flexible
  * access to the score it computes as well as its promotion and demotion
  * methods (if it's on-line).
  *
  * <p> On-line, mistake driven algorithms derived from this class need only
  * override the {@link #promote(int[],double[],double)}, and
  * {@link #demote(int[],double[],double)}
  * methods, assuming the score returned by the {@link #score(Object)} method
  * need only be compared with {@link #threshold} to make a prediction.
  * Otherwise, the {@link #classify(Object)} method also needs to be
  * overridden.  If the algorithm is not mistake driven, the
  * {@link #learn(Object)} method needs to be overridden as well.
  *
  * <p> It is assumed that {@link Learner#labeler} is a single discrete
  * classifier that produces the same feature for every example object and
  * that the values that feature may take are available through the
  * {@link Classifier#allowableValues()} method.  The first value returned
  * from {@link Classifier#allowableValues()} is treated as "negative", and it
  * is assumed there are exactly 2 allowable values.  Assertions will produce
  * error messages if these assumptions do not hold.
  *
  * <p> Fitting a "thick separator" instead of just a hyperplane is also
  * supported through this class.
  *
  * <p> This algorithm's user-configurable parameters are stored in member
  * fields of this class.  They may be set via either a constructor that names
  * each parameter explicitly or a constructor that takes an instance of
  * {@link LBJ2.learn.LinearThresholdUnit.Parameters Parameters} as input.
  * The documentation in each member field in this class indicates the default
  * value of the associated parameter when using the former type of
  * constructor.  The documentation of the associated member field in the
  * {@link LBJ2.learn.LinearThresholdUnit.Parameters Parameters} class
  * indicates the default value of the parameter when using the latter type of
  * constructor.
  *
  * @author Nick Rizzolo
 **/
public abstract class LinearThresholdUnit extends Learner
{
  /** Default for {@link #initialWeight}. */
  public static final double defaultInitialWeight = 0;
  /** Default for {@link #threshold}. */
  public static final double defaultThreshold = 0;
  /** Default for {@link #positiveThickness}. */
  public static final double defaultThickness = 0;
  /** Default value for {@link #learningRate}. */
  public static final double defaultLearningRate = 0.1;
  /** Default for {@link #weightVector}. */
  public static final SparseWeightVector defaultWeightVector =
    new SparseWeightVector();

  /**
    * The rate at which weights are updated; default
    * {@link #defaultLearningRate}.
   **/
  protected double learningRate;
  /** The LTU's weight vector; default is an empty vector. */
  protected SparseWeightVector weightVector;
  /**
    * The weight associated with a feature when first added to the vector;
    * default {@link #defaultInitialWeight}.
   **/
  protected double initialWeight;
  /**
    * The score is compared against this value to make predictions; default
    * {@link LinearThresholdUnit#defaultThreshold}.
   **/
  protected double threshold;
  /**
    * The bias is stored here rather than as an element of the weight vector.
   **/
  protected double bias;
  /**
    * The thickness of the hyperplane on the positive side; default
    * {@link #defaultThickness}.
   **/
  protected double positiveThickness;
  /**
    * The thickness of the hyperplane on the negative side; default equal to
    * {@link #positiveThickness}.
   **/
  protected double negativeThickness;
  /** The label producing classifier's allowable values. */
  protected String[] allowableValues;



  /**
    * Default constructor.  The learning rate and threshold take default
    * values, while the name of the classifier gets the empty string.
   **/
  public LinearThresholdUnit() { this(""); }

  /**
    * Initializing constructor. Sets the learning rate to the specified value,
    * and the threshold and thickness take the default, while the name of the
    * classifier gets the empty string.
    *
    * @param r  The desired learning rate.
   **/
  public LinearThresholdUnit(double r) { this("", r); }


  /**
    * Sets the learning rate and threshold to the specified values, while the
    * name of the classifier gets the empty string.

    *
    * @param r  The desired learning rate value.
    * @param t  The desired threshold value.
   **/
  public LinearThresholdUnit(double r, double t) { this("", r, t); }

  /**
    * Use this constructor to fit a thick separator, where both the positive
    * and negative sides of the hyperplane will be given the specified
    * thickness, while the name of the classifier gets the empty string.
    *
    * @param r  The desired learning rate value.
    * @param t  The desired threshold value.
    * @param pt The desired thickness.
   **/
  public LinearThresholdUnit(double r, double t, double pt) {
    this("", r, t, pt);
  }

  /**
    * Use this constructor to fit a thick separator, where the positive and
    * negative sides of the hyperplane will be given the specified separate
    * thicknesses, while the name of the classifier gets the empty string.
    *
    * @param r  The desired learning rate value.
    * @param t  The desired threshold value.
    * @param pt The desired positive thickness.
    * @param nt The desired negative thickness.
   **/
  public LinearThresholdUnit(double r, double t, double pt, double nt) {
    this("", r, t, pt, nt);
  }

  /**
    * Initializing constructor.  Sets the threshold, positive thickness, and
    * negative thickness to their default values.
    *
    * @param n  The name of the classifier.
   **/
  protected LinearThresholdUnit(String n) { this(n, defaultLearningRate); }


  /**
    * Default constructor.  Sets the threshold, positive thickness, and
    * negative thickness to their default values.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate.
   **/
  protected LinearThresholdUnit(String n, double r) {
      this(n, r, defaultThreshold);
  }

  /**
    * Initializing constructor.  Sets the threshold to the specified value,
    * while the positive and negative thicknesses get their defaults.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate.
    * @param t  The desired value for the threshold.
   **/
  protected LinearThresholdUnit(String n, double r, double t) {
    this(n, r, t, defaultThickness);
  }

  /**
    * Initializing constructor.  Sets the threshold and positive thickness to
    * the specified values, and the negative thickness is set to the same
    * value as the positive thickness.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate.
    * @param t  The desired value for the threshold.
    * @param pt The desired thickness.
   **/
  protected LinearThresholdUnit(String n, double r, double t, double pt) {
    this(n, r, t, pt, pt);
  }

  /**
    * Initializing constructor.  Sets the threshold, positive thickness, and
    * negative thickness to the specified values.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate.
    * @param t  The desired value for the threshold.
    * @param pt The desired positive thickness.
    * @param nt The desired negative thickness.
   **/
  protected LinearThresholdUnit(String n, double r, double t, double pt,
                                double nt) {
    this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone());
  }

  /**
    * Initializing constructor.  Sets the threshold, positive thickness, and
    * negative thickness to the specified values.
    *
    * @param n  The name of the classifier.
    * @param r  The desired learning rate.
    * @param t  The desired value for the threshold.
    * @param pt The desired positive thickness.
    * @param nt The desired negative thickness.
    * @param v  An initial weight vector.
   **/
  protected LinearThresholdUnit(String n, double r, double t, double pt,
                                double nt, SparseWeightVector v) {
    super(n);
    Parameters p = new Parameters();
    p.weightVector = v;
    p.threshold = t;
    p.learningRate = r;
    p.positiveThickness = pt;
    p.negativeThickness = nt;
    setParameters(p);
  }


  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link LinearThresholdUnit.Parameters} object.
    * The name of the classifier is the empty string.
    *
    * @param p  The settings of all parameters.
   **/
  protected LinearThresholdUnit(Parameters p) { this("", p); }

  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link LinearThresholdUnit.Parameters} object.
    *
    * @param n  The name of the classifier.
    * @param p  The settings of all parameters.
   **/
  protected LinearThresholdUnit(String n, Parameters p) {
    super(n);
    setParameters(p);
  }


  /**
    * Sets the values of parameters that control the behavior of this learning
    * algorithm.
    *
    * @param p  The parameters.
   **/
  public void setParameters(Parameters p) {
    learningRate = p.learningRate;
    weightVector = p.weightVector;
    initialWeight = p.initialWeight;
    threshold = p.threshold;
    bias = p.initialWeight;
    positiveThickness = p.thickness + p.positiveThickness;
    negativeThickness = p.thickness + p.negativeThickness;
  }


  /**
    * Retrieves the parameters that are set in this learner.
    *
    * @return An object containing all the values of the parameters that
    *         control the behavior of this learning algorithm.
   **/
  public Learner.Parameters getParameters() {
    Parameters p = new Parameters(super.getParameters());
    p.learningRate = learningRate;
    p.weightVector = weightVector.emptyClone();
    p.initialWeight = initialWeight;
    p.threshold = threshold;
    p.positiveThickness = positiveThickness;
    p.negativeThickness = negativeThickness;
    return p;
  }


  /**
    * Sets the labels list.
    *
    * @param l  A new label producing classifier.
   **/
  public void setLabeler(Classifier l) {
    if (!(l == null || l.allowableValues().length == 2)) {
      System.err.println(
          "Error: " + name
          + ": An LTU must be given a single binary label classifier.");
      new Exception().printStackTrace();
      System.exit(1);
    }

    super.setLabeler(l);
    allowableValues = l == null ? null : l.allowableValues();
    labelLexicon.clear();
    labelLexicon.lookup(
        new DiscretePrimitiveStringFeature(
              l.containingPackage, l.name, "", allowableValues[0], (short) 0,
              (short) 2),
        true);
    labelLexicon.lookup(
        new DiscretePrimitiveStringFeature(
              l.containingPackage, l.name, "", allowableValues[1], (short) 1,
              (short) 2),
        true);
    predictions = new FVector(2);
    createPrediction(0);
    createPrediction(1);
  }


  /**
    * Returns the current value of the {@link #initialWeight} variable.
    *
    * @return The value of the {@link #initialWeight} variable.
   **/
  public double getInitialWeight() { return initialWeight; }


  /**
    * Sets the {@link #initialWeight} member variable to the specified value.
    *
    * @param w  The new value for {@link #initialWeight}.
   **/
  public void setInitialWeight(double w) { initialWeight = w; }


  /**
    * Returns the current value of the {@link #threshold} variable.
    *
    * @return The value of the {@link #threshold} variable.
   **/
  public double getThreshold() { return threshold; }


  /**
    * Sets the {@link #threshold} member variable to the specified value.
    *
    * @param t  The new value for {@link #threshold}.
   **/
  public void setThreshold(double t) { threshold = t; }


  /**
    * Returns the current value of the {@link #positiveThickness} variable.
    *
    * @return The value of the {@link #positiveThickness} variable.
   **/
  public double getPositiveThickness() { return positiveThickness; }


  /**
    * Sets the {@link #positiveThickness} member variable to the specified
    * value.
    *
    * @param t  The new value for {@link #positiveThickness}.
   **/
  public void setPositiveThickness(double t) {
    positiveThickness = t;
  }


  /**
    * Returns the current value of the {@link #negativeThickness} variable.
    *
    * @return The value of the {@link #negativeThickness} variable.
   **/
  public double getNegativeThickness() { return negativeThickness; }


  /**
    * Sets the {@link #negativeThickness} member variable to the specified
    * value.
    *
    * @param t  The new value for {@link #negativeThickness}.
   **/
  public void setNegativeThickness(double t) { negativeThickness = t; }


  /**
    * Sets the {@link #positiveThickness} and {@link #negativeThickness}
    * member variables to the specified value.
    *
    * @param t  The new thickness value.
   **/
  public void setThickness(double t) {
    positiveThickness = negativeThickness = t;
  }


  /**
    * Returns the array of allowable values that a feature returned by this
    * classifier may take.
    *
    * @return If a labeler has not yet been established for this LTU, byte
    *         strings equivalent to <code>{ "*", "*" }</code> are returned,
    *         which indicates to the compiler that classifiers derived from
    *         this learner will return features that take one of two values
    *         that are specified in the source code.  Otherwise, the allowable
    *         values of the labeler are returned.
   **/
  public String[] allowableValues() {
    if (allowableValues == null) allowableValues = new String[]{ "*", "*" };
    return allowableValues;
  }


  /**
    * The default training algorithm for a linear threshold unit consists of
    * evaluating the example object with the {@link #score(Object)} method and
    * {@link #threshold}, checking the result of evaluation against the label,
    * and, if they are different, promoting when the label is positive or
    * demoting when the label is negative.
    *
    * <p> This method does not call {@link #classify(Object)}; it calls
    * {@link #score(Object)} directly.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @param exampleLabels    The example's label(s)
    * @param labelValues      The labels' values
   **/
  public void learn(int[] exampleFeatures, double[] exampleValues,
                    int[] exampleLabels, double[] labelValues) {
    assert exampleLabels.length == 1
      : "Example must have a single label.";
    assert exampleLabels[0] == 0 || exampleLabels[0] == 1
      : "Example has unallowed label value.";

    boolean label = (exampleLabels[0] == 1);

    double s = score(exampleFeatures, exampleValues);

    if (shouldPromote(label, s, threshold, positiveThickness))
      promote(exampleFeatures, exampleValues,
              computeLearningRate(exampleFeatures, exampleValues, s, label));
    if (shouldDemote(label, s, threshold, negativeThickness))
      demote(exampleFeatures, exampleValues,
             computeLearningRate(exampleFeatures, exampleValues, s, label));
  }


  /**
    * Computes the value of the {@link #learningRate} variable if needed
    * and returns the value. By default, the current value of
    * {@link #learningRate}
    * is returned.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @param s The score of the example object
    * @param label The label of the example object
    * @return The computed value of the {@link #learningRate} variable
   **/
  public double computeLearningRate(int[] exampleFeatures,
                                    double[] exampleValues, double s,
                                    boolean label) {
    return learningRate;
  }


  /**
    * Determines if the weights should be promoted
    *
    * @param label The label of the example object
    * @param s     The score of the example object
    * @param threshold The LTU threshold
    * @param positiveThickness The thickness of the hyperplane on
                               the positive side
    * @return True if the weights should be promoted, false otherwise.
   **/
  public boolean shouldPromote(boolean label, double s, double threshold,
                               double positiveThickness) {
    return (label && s < threshold + positiveThickness);
  }

  /**
    * Determines if the weights should be demoted
    *
    * @param label The label of the example object
    * @param s     The score of the example object
    * @param threshold The LTU threshold
    * @param negativeThickness The thickness of the hyperplane on
                               the negative side
    * @return True if the weights should be demoted, false otherwise.
   **/
  public boolean shouldDemote(boolean label, double s, double threshold,
                              double negativeThickness) {
    return (!label && s >= threshold - negativeThickness);
  }


  /**
    * Initializes the weight vector array to the size of the specified number
    * of features, setting each weight equal to {@link #initialWeight}.
   **/
  public void initialize(int numExamples, int numFeatures) {
    double[] weights = new double[numFeatures];
    Arrays.fill(weights, initialWeight);
    weightVector = new SparseWeightVector(weights);
  }


  /**
    * An LTU returns two scores; one for the negative classification and one
    * for the positive classification.  By default, the score for the positive
    * classification is the result of {@link #score(Object)} minus the
    * {@link #threshold}, and the score for the negative classification is the
    * opposite of the positive classification's score.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @return Two scores as described above.
   **/
  public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
    double s = score(exampleFeatures, exampleValues) - threshold;
    ScoreSet result = new ScoreSet();
    result.put(allowableValues[0], -s);
    result.put(allowableValues[1], s);
    return result;
  }


  /**
    * Returns the classification of the given example as a single feature
    * instead of a {@link FeatureVector}.
    *
    * @param f  The features array.
    * @param v  The values array.
    * @return The classification of the example as a feature.
   **/
  public Feature featureValue(int[] f, double[] v) {
    int index = score(f, v) >= threshold ? 1 : 0;
    return predictions.get(index);
  }


  /**
    * The default evaluation method simply computes the score for the example
    * and returns a {@link DiscretePrimitiveStringFeature} set to either the
    * second value from the label classifier's array of allowable values if
    * the score is greater than or equal to {@link #threshold} or the first
    * otherwise.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @return The computed feature (in a vector).
   **/
  public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
    int index = score(exampleFeatures, exampleValues) >= threshold ? 1 : 0;
    return allowableValues[index];
  }


  /**
    * The default evaluation method simply computes the score for the example
    * and returns a {@link DiscretePrimitiveStringFeature} set to either the
    * second value from the label classifier's array of allowable values if
    * the score is greater than or equal to {@link #threshold} or the first
    * otherwise.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @return The computed feature (in a vector).
   **/
  public FeatureVector classify(int[] exampleFeatures, double[] exampleValues)
  {
    return new FeatureVector(featureValue(exampleFeatures, exampleValues));
  }


  /**
    * Computes the score for the specified example vector which will be
    * thresholded to make the binary classification.
    *
    * @param example  The example object.
    * @return The score for the given example vector.
   **/
  public double score(Object example) {
    Object[] exampleArray = getExampleArray(example, false);
    return score((int[]) exampleArray[0], (double[]) exampleArray[1]);
  }


  /**
    * Computes the score for the specified example vector which will be
    * thresholded to make the binary classification.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @return The score for the given example vector.
   **/
  public double score(int[] exampleFeatures, double[] exampleValues) {
    return weightVector.dot(exampleFeatures, exampleValues, initialWeight)
           + bias;
  }


  /**
    * Resets the weight vector to associate the default weight with all
    * features.
   **/
  public void forget() {
    super.forget();
    weightVector = weightVector.emptyClone();
    bias = initialWeight;
    setLabeler(labeler);
  }


  /**
    * If the <code>LinearThresholdUnit</code> is mistake driven, this method
    * should be overridden and used to update the internal representation when
    * a mistake is made on a positive example.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @param rate  The learning rate at which the weights are updated.
   **/
  public abstract void promote(int[] exampleFeatures, double[] exampleValues,
                               double rate);


  /**
    * If the <code>LinearThresholdUnit</code> is mistake driven, this method
    * should be overridden and used to update the internal representation when
    * a mistake is made on a negative example.
    *
    * @param exampleFeatures  The example's array of feature indices
    * @param exampleValues    The example's array of feature values
    * @param rate  The learning rate at which the weights are updated.
   **/
  public abstract void demote(int[] exampleFeatures, double[] exampleValues,
                             double rate);


  /**
    * Writes the learned function's internal representation in binary form.
    *
    * @param out  The output stream.
   **/
  public void write(ExceptionlessOutputStream out) {
    super.write(out);

    if (allowableValues == null) out.writeInt(0);
    else {
      out.writeInt(allowableValues.length);
      for (int i = 0; i < allowableValues.length; ++i)
        out.writeString(allowableValues[i]);
    }

    out.writeDouble(initialWeight);
    out.writeDouble(threshold);
    out.writeDouble(learningRate);
    out.writeDouble(positiveThickness);
    out.writeDouble(negativeThickness);
    out.writeDouble(bias);
    weightVector.write(out);
  }


  /**
    * Reads the binary representation of a learner with this object's run-time
    * type, overwriting any and all learned or manually specified parameters
    * as well as the label lexicon but without modifying the feature lexicon.
    *
    * @param in The input stream.
   **/
  public void read(ExceptionlessInputStream in) {
    super.read(in);

    int N = in.readInt();
    if (N == 0) allowableValues = null;
    else {
      allowableValues = new String[N];
      for (int i = 0; i < N; ++i)
        allowableValues[i] = in.readString();
    }

    initialWeight = in.readDouble();
    threshold = in.readDouble();
    learningRate = in.readDouble();
    positiveThickness = in.readDouble();
    negativeThickness = in.readDouble();
    bias = in.readDouble();
    weightVector = SparseWeightVector.readWeightVector(in);
  }


  /** Returns a deep clone of this learning algorithm. */
  public Object clone() {
    LinearThresholdUnit clone = (LinearThresholdUnit) super.clone();
    if (weightVector != null)
      clone.weightVector = (SparseWeightVector) weightVector.clone();
    return clone;
  }


  /**
    * Simply a container for all of {@link LinearThresholdUnit}'s configurable
    * parameters.  Using instances of this class should make code more
    * readable and constructors less complicated.
    *
    * @author Nick Rizzolo
   **/
  public static class Parameters extends Learner.Parameters
  {
    /**
      * The rate at which weights are updated; default
      * {@link LinearThresholdUnit#defaultLearningRate}.
     **/
    public double learningRate;
    /** The LTU's weight vector; default is an empty vector. */
    public SparseWeightVector weightVector;
    /**
      * The weight associated with a feature when first added to the vector;
      * default {@link LinearThresholdUnit#defaultInitialWeight}.
     **/
    public double initialWeight;
    /**
      * The score is compared against this value to make predictions; default
      * {@link LinearThresholdUnit#defaultThreshold}.
     **/
    public double threshold;
    /**
      * This thickness will be added to both {@link #positiveThickness} and
      * {@link #negativeThickness}; default
      * {@link LinearThresholdUnit#defaultThickness}.
     **/
    public double thickness;
    /** The thickness of the hyperplane on the positive side; default 0. */
    public double positiveThickness;
    /** The thickness of the hyperplane on the negative side; default 0. */
    public double negativeThickness;


    /** Sets all the default values. */
    public Parameters() {
      learningRate = defaultLearningRate;
      weightVector = (SparseWeightVector) defaultWeightVector.clone();
      initialWeight = defaultInitialWeight;
      threshold = defaultThreshold;
      thickness = defaultThickness;
    }


    /**
      * Sets the parameters from the parent's parameters object, giving
      * defaults to all parameters declared in this object.
     **/
    public Parameters(Learner.Parameters p) { this(); }


    /** Copy constructor. */
    public Parameters(Parameters p) {
      learningRate = p.learningRate;
      weightVector = p.weightVector;
      initialWeight = p.initialWeight;
      threshold = p.threshold;
      thickness = p.thickness;
      positiveThickness = p.positiveThickness;
      negativeThickness = p.negativeThickness;
    }


    /**
      * Calls the appropriate <code>Learner.setParameters(Parameters)</code>
      * method for this <code>Parameters</code> object.
      *
      * @param l  The learner whose parameters will be set.
     **/
    public void setParameters(Learner l) {
      ((LinearThresholdUnit) l).setParameters(this);
    }


    /**
      * Creates a string representation of these parameters in which only
      * those parameters that differ from their default values are mentioned.
     **/
    public String nonDefaultString() {
      String result = super.nonDefaultString();

      if (learningRate != LinearThresholdUnit.defaultLearningRate)
        result += ", learningRate = " + learningRate;
      if (initialWeight != LinearThresholdUnit.defaultInitialWeight)
        result += ", initialWeight = " + initialWeight;
      if (threshold != LinearThresholdUnit.defaultThreshold)
        result += ", threshold = " + threshold;
      if (thickness != LinearThresholdUnit.defaultThickness)
        result += ", thickness = " + thickness;
      if (positiveThickness != 0)
        result += ", positiveThickness = " + positiveThickness;
      if (negativeThickness != 0)
        result += ", negativeThickness = " + negativeThickness;

      if (result.startsWith(", ")) result = result.substring(2);
      return result;
    }
  }
}