package LBJ2.learn;
import java.util.Arrays;
import LBJ2.classify.Classifier;
import LBJ2.classify.DiscretePrimitiveStringFeature;
import LBJ2.classify.Feature;
import LBJ2.classify.FeatureVector;
import LBJ2.classify.ScoreSet;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;
import LBJ2.util.FVector;
/**
* A <code>LinearThresholdUnit</code> is a {@link Learner} for binary
* classification in which a score is computed as a linear function a
* <i>weight vector</i> and the input example, and the decision is made by
* comparing the score to some threshold quantity. Deriving a linear
* threshold algorithm from this class gives the programmer more flexible
* access to the score it computes as well as its promotion and demotion
* methods (if it's on-line).
*
* <p> On-line, mistake driven algorithms derived from this class need only
* override the {@link #promote(int[],double[],double)}, and
* {@link #demote(int[],double[],double)}
* methods, assuming the score returned by the {@link #score(Object)} method
* need only be compared with {@link #threshold} to make a prediction.
* Otherwise, the {@link #classify(Object)} method also needs to be
* overridden. If the algorithm is not mistake driven, the
* {@link #learn(Object)} method needs to be overridden as well.
*
* <p> It is assumed that {@link Learner#labeler} is a single discrete
* classifier that produces the same feature for every example object and
* that the values that feature may take are available through the
* {@link Classifier#allowableValues()} method. The first value returned
* from {@link Classifier#allowableValues()} is treated as "negative", and it
* is assumed there are exactly 2 allowable values. Assertions will produce
* error messages if these assumptions do not hold.
*
* <p> Fitting a "thick separator" instead of just a hyperplane is also
* supported through this class.
*
* <p> This algorithm's user-configurable parameters are stored in member
* fields of this class. They may be set via either a constructor that names
* each parameter explicitly or a constructor that takes an instance of
* {@link LBJ2.learn.LinearThresholdUnit.Parameters Parameters} as input.
* The documentation in each member field in this class indicates the default
* value of the associated parameter when using the former type of
* constructor. The documentation of the associated member field in the
* {@link LBJ2.learn.LinearThresholdUnit.Parameters Parameters} class
* indicates the default value of the parameter when using the latter type of
* constructor.
*
* @author Nick Rizzolo
**/
public abstract class LinearThresholdUnit extends Learner
{
/** Default for {@link #initialWeight}. */
public static final double defaultInitialWeight = 0;
/** Default for {@link #threshold}. */
public static final double defaultThreshold = 0;
/** Default for {@link #positiveThickness}. */
public static final double defaultThickness = 0;
/** Default value for {@link #learningRate}. */
public static final double defaultLearningRate = 0.1;
/** Default for {@link #weightVector}. */
public static final SparseWeightVector defaultWeightVector =
new SparseWeightVector();
/**
* The rate at which weights are updated; default
* {@link #defaultLearningRate}.
**/
protected double learningRate;
/** The LTU's weight vector; default is an empty vector. */
protected SparseWeightVector weightVector;
/**
* The weight associated with a feature when first added to the vector;
* default {@link #defaultInitialWeight}.
**/
protected double initialWeight;
/**
* The score is compared against this value to make predictions; default
* {@link LinearThresholdUnit#defaultThreshold}.
**/
protected double threshold;
/**
* The bias is stored here rather than as an element of the weight vector.
**/
protected double bias;
/**
* The thickness of the hyperplane on the positive side; default
* {@link #defaultThickness}.
**/
protected double positiveThickness;
/**
* The thickness of the hyperplane on the negative side; default equal to
* {@link #positiveThickness}.
**/
protected double negativeThickness;
/** The label producing classifier's allowable values. */
protected String[] allowableValues;
/**
* Default constructor. The learning rate and threshold take default
* values, while the name of the classifier gets the empty string.
**/
public LinearThresholdUnit() { this(""); }
/**
* Initializing constructor. Sets the learning rate to the specified value,
* and the threshold and thickness take the default, while the name of the
* classifier gets the empty string.
*
* @param r The desired learning rate.
**/
public LinearThresholdUnit(double r) { this("", r); }
/**
* Sets the learning rate and threshold to the specified values, while the
* name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
**/
public LinearThresholdUnit(double r, double t) { this("", r, t); }
/**
* Use this constructor to fit a thick separator, where both the positive
* and negative sides of the hyperplane will be given the specified
* thickness, while the name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired thickness.
**/
public LinearThresholdUnit(double r, double t, double pt) {
this("", r, t, pt);
}
/**
* Use this constructor to fit a thick separator, where the positive and
* negative sides of the hyperplane will be given the specified separate
* thicknesses, while the name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
**/
public LinearThresholdUnit(double r, double t, double pt, double nt) {
this("", r, t, pt, nt);
}
/**
* Initializing constructor. Sets the threshold, positive thickness, and
* negative thickness to their default values.
*
* @param n The name of the classifier.
**/
protected LinearThresholdUnit(String n) { this(n, defaultLearningRate); }
/**
* Default constructor. Sets the threshold, positive thickness, and
* negative thickness to their default values.
*
* @param n The name of the classifier.
* @param r The desired learning rate.
**/
protected LinearThresholdUnit(String n, double r) {
this(n, r, defaultThreshold);
}
/**
* Initializing constructor. Sets the threshold to the specified value,
* while the positive and negative thicknesses get their defaults.
*
* @param n The name of the classifier.
* @param r The desired learning rate.
* @param t The desired value for the threshold.
**/
protected LinearThresholdUnit(String n, double r, double t) {
this(n, r, t, defaultThickness);
}
/**
* Initializing constructor. Sets the threshold and positive thickness to
* the specified values, and the negative thickness is set to the same
* value as the positive thickness.
*
* @param n The name of the classifier.
* @param r The desired learning rate.
* @param t The desired value for the threshold.
* @param pt The desired thickness.
**/
protected LinearThresholdUnit(String n, double r, double t, double pt) {
this(n, r, t, pt, pt);
}
/**
* Initializing constructor. Sets the threshold, positive thickness, and
* negative thickness to the specified values.
*
* @param n The name of the classifier.
* @param r The desired learning rate.
* @param t The desired value for the threshold.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
**/
protected LinearThresholdUnit(String n, double r, double t, double pt,
double nt) {
this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone());
}
/**
* Initializing constructor. Sets the threshold, positive thickness, and
* negative thickness to the specified values.
*
* @param n The name of the classifier.
* @param r The desired learning rate.
* @param t The desired value for the threshold.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
* @param v An initial weight vector.
**/
protected LinearThresholdUnit(String n, double r, double t, double pt,
double nt, SparseWeightVector v) {
super(n);
Parameters p = new Parameters();
p.weightVector = v;
p.threshold = t;
p.learningRate = r;
p.positiveThickness = pt;
p.negativeThickness = nt;
setParameters(p);
}
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link LinearThresholdUnit.Parameters} object.
* The name of the classifier is the empty string.
*
* @param p The settings of all parameters.
**/
protected LinearThresholdUnit(Parameters p) { this("", p); }
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link LinearThresholdUnit.Parameters} object.
*
* @param n The name of the classifier.
* @param p The settings of all parameters.
**/
protected LinearThresholdUnit(String n, Parameters p) {
super(n);
setParameters(p);
}
/**
* Sets the values of parameters that control the behavior of this learning
* algorithm.
*
* @param p The parameters.
**/
public void setParameters(Parameters p) {
learningRate = p.learningRate;
weightVector = p.weightVector;
initialWeight = p.initialWeight;
threshold = p.threshold;
bias = p.initialWeight;
positiveThickness = p.thickness + p.positiveThickness;
negativeThickness = p.thickness + p.negativeThickness;
}
/**
* Retrieves the parameters that are set in this learner.
*
* @return An object containing all the values of the parameters that
* control the behavior of this learning algorithm.
**/
public Learner.Parameters getParameters() {
Parameters p = new Parameters(super.getParameters());
p.learningRate = learningRate;
p.weightVector = weightVector.emptyClone();
p.initialWeight = initialWeight;
p.threshold = threshold;
p.positiveThickness = positiveThickness;
p.negativeThickness = negativeThickness;
return p;
}
/**
* Sets the labels list.
*
* @param l A new label producing classifier.
**/
public void setLabeler(Classifier l) {
if (!(l == null || l.allowableValues().length == 2)) {
System.err.println(
"Error: " + name
+ ": An LTU must be given a single binary label classifier.");
new Exception().printStackTrace();
System.exit(1);
}
super.setLabeler(l);
allowableValues = l == null ? null : l.allowableValues();
labelLexicon.clear();
labelLexicon.lookup(
new DiscretePrimitiveStringFeature(
l.containingPackage, l.name, "", allowableValues[0], (short) 0,
(short) 2),
true);
labelLexicon.lookup(
new DiscretePrimitiveStringFeature(
l.containingPackage, l.name, "", allowableValues[1], (short) 1,
(short) 2),
true);
predictions = new FVector(2);
createPrediction(0);
createPrediction(1);
}
/**
* Returns the current value of the {@link #initialWeight} variable.
*
* @return The value of the {@link #initialWeight} variable.
**/
public double getInitialWeight() { return initialWeight; }
/**
* Sets the {@link #initialWeight} member variable to the specified value.
*
* @param w The new value for {@link #initialWeight}.
**/
public void setInitialWeight(double w) { initialWeight = w; }
/**
* Returns the current value of the {@link #threshold} variable.
*
* @return The value of the {@link #threshold} variable.
**/
public double getThreshold() { return threshold; }
/**
* Sets the {@link #threshold} member variable to the specified value.
*
* @param t The new value for {@link #threshold}.
**/
public void setThreshold(double t) { threshold = t; }
/**
* Returns the current value of the {@link #positiveThickness} variable.
*
* @return The value of the {@link #positiveThickness} variable.
**/
public double getPositiveThickness() { return positiveThickness; }
/**
* Sets the {@link #positiveThickness} member variable to the specified
* value.
*
* @param t The new value for {@link #positiveThickness}.
**/
public void setPositiveThickness(double t) {
positiveThickness = t;
}
/**
* Returns the current value of the {@link #negativeThickness} variable.
*
* @return The value of the {@link #negativeThickness} variable.
**/
public double getNegativeThickness() { return negativeThickness; }
/**
* Sets the {@link #negativeThickness} member variable to the specified
* value.
*
* @param t The new value for {@link #negativeThickness}.
**/
public void setNegativeThickness(double t) { negativeThickness = t; }
/**
* Sets the {@link #positiveThickness} and {@link #negativeThickness}
* member variables to the specified value.
*
* @param t The new thickness value.
**/
public void setThickness(double t) {
positiveThickness = negativeThickness = t;
}
/**
* Returns the array of allowable values that a feature returned by this
* classifier may take.
*
* @return If a labeler has not yet been established for this LTU, byte
* strings equivalent to <code>{ "*", "*" }</code> are returned,
* which indicates to the compiler that classifiers derived from
* this learner will return features that take one of two values
* that are specified in the source code. Otherwise, the allowable
* values of the labeler are returned.
**/
public String[] allowableValues() {
if (allowableValues == null) allowableValues = new String[]{ "*", "*" };
return allowableValues;
}
/**
* The default training algorithm for a linear threshold unit consists of
* evaluating the example object with the {@link #score(Object)} method and
* {@link #threshold}, checking the result of evaluation against the label,
* and, if they are different, promoting when the label is positive or
* demoting when the label is negative.
*
* <p> This method does not call {@link #classify(Object)}; it calls
* {@link #score(Object)} directly.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param exampleLabels The example's label(s)
* @param labelValues The labels' values
**/
public void learn(int[] exampleFeatures, double[] exampleValues,
int[] exampleLabels, double[] labelValues) {
assert exampleLabels.length == 1
: "Example must have a single label.";
assert exampleLabels[0] == 0 || exampleLabels[0] == 1
: "Example has unallowed label value.";
boolean label = (exampleLabels[0] == 1);
double s = score(exampleFeatures, exampleValues);
if (shouldPromote(label, s, threshold, positiveThickness))
promote(exampleFeatures, exampleValues,
computeLearningRate(exampleFeatures, exampleValues, s, label));
if (shouldDemote(label, s, threshold, negativeThickness))
demote(exampleFeatures, exampleValues,
computeLearningRate(exampleFeatures, exampleValues, s, label));
}
/**
* Computes the value of the {@link #learningRate} variable if needed
* and returns the value. By default, the current value of
* {@link #learningRate}
* is returned.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param s The score of the example object
* @param label The label of the example object
* @return The computed value of the {@link #learningRate} variable
**/
public double computeLearningRate(int[] exampleFeatures,
double[] exampleValues, double s,
boolean label) {
return learningRate;
}
/**
* Determines if the weights should be promoted
*
* @param label The label of the example object
* @param s The score of the example object
* @param threshold The LTU threshold
* @param positiveThickness The thickness of the hyperplane on
the positive side
* @return True if the weights should be promoted, false otherwise.
**/
public boolean shouldPromote(boolean label, double s, double threshold,
double positiveThickness) {
return (label && s < threshold + positiveThickness);
}
/**
* Determines if the weights should be demoted
*
* @param label The label of the example object
* @param s The score of the example object
* @param threshold The LTU threshold
* @param negativeThickness The thickness of the hyperplane on
the negative side
* @return True if the weights should be demoted, false otherwise.
**/
public boolean shouldDemote(boolean label, double s, double threshold,
double negativeThickness) {
return (!label && s >= threshold - negativeThickness);
}
/**
* Initializes the weight vector array to the size of the specified number
* of features, setting each weight equal to {@link #initialWeight}.
**/
public void initialize(int numExamples, int numFeatures) {
double[] weights = new double[numFeatures];
Arrays.fill(weights, initialWeight);
weightVector = new SparseWeightVector(weights);
}
/**
* An LTU returns two scores; one for the negative classification and one
* for the positive classification. By default, the score for the positive
* classification is the result of {@link #score(Object)} minus the
* {@link #threshold}, and the score for the negative classification is the
* opposite of the positive classification's score.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @return Two scores as described above.
**/
public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
double s = score(exampleFeatures, exampleValues) - threshold;
ScoreSet result = new ScoreSet();
result.put(allowableValues[0], -s);
result.put(allowableValues[1], s);
return result;
}
/**
* Returns the classification of the given example as a single feature
* instead of a {@link FeatureVector}.
*
* @param f The features array.
* @param v The values array.
* @return The classification of the example as a feature.
**/
public Feature featureValue(int[] f, double[] v) {
int index = score(f, v) >= threshold ? 1 : 0;
return predictions.get(index);
}
/**
* The default evaluation method simply computes the score for the example
* and returns a {@link DiscretePrimitiveStringFeature} set to either the
* second value from the label classifier's array of allowable values if
* the score is greater than or equal to {@link #threshold} or the first
* otherwise.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @return The computed feature (in a vector).
**/
public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
int index = score(exampleFeatures, exampleValues) >= threshold ? 1 : 0;
return allowableValues[index];
}
/**
* The default evaluation method simply computes the score for the example
* and returns a {@link DiscretePrimitiveStringFeature} set to either the
* second value from the label classifier's array of allowable values if
* the score is greater than or equal to {@link #threshold} or the first
* otherwise.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @return The computed feature (in a vector).
**/
public FeatureVector classify(int[] exampleFeatures, double[] exampleValues)
{
return new FeatureVector(featureValue(exampleFeatures, exampleValues));
}
/**
* Computes the score for the specified example vector which will be
* thresholded to make the binary classification.
*
* @param example The example object.
* @return The score for the given example vector.
**/
public double score(Object example) {
Object[] exampleArray = getExampleArray(example, false);
return score((int[]) exampleArray[0], (double[]) exampleArray[1]);
}
/**
* Computes the score for the specified example vector which will be
* thresholded to make the binary classification.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @return The score for the given example vector.
**/
public double score(int[] exampleFeatures, double[] exampleValues) {
return weightVector.dot(exampleFeatures, exampleValues, initialWeight)
+ bias;
}
/**
* Resets the weight vector to associate the default weight with all
* features.
**/
public void forget() {
super.forget();
weightVector = weightVector.emptyClone();
bias = initialWeight;
setLabeler(labeler);
}
/**
* If the <code>LinearThresholdUnit</code> is mistake driven, this method
* should be overridden and used to update the internal representation when
* a mistake is made on a positive example.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param rate The learning rate at which the weights are updated.
**/
public abstract void promote(int[] exampleFeatures, double[] exampleValues,
double rate);
/**
* If the <code>LinearThresholdUnit</code> is mistake driven, this method
* should be overridden and used to update the internal representation when
* a mistake is made on a negative example.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param rate The learning rate at which the weights are updated.
**/
public abstract void demote(int[] exampleFeatures, double[] exampleValues,
double rate);
/**
* Writes the learned function's internal representation in binary form.
*
* @param out The output stream.
**/
public void write(ExceptionlessOutputStream out) {
super.write(out);
if (allowableValues == null) out.writeInt(0);
else {
out.writeInt(allowableValues.length);
for (int i = 0; i < allowableValues.length; ++i)
out.writeString(allowableValues[i]);
}
out.writeDouble(initialWeight);
out.writeDouble(threshold);
out.writeDouble(learningRate);
out.writeDouble(positiveThickness);
out.writeDouble(negativeThickness);
out.writeDouble(bias);
weightVector.write(out);
}
/**
* Reads the binary representation of a learner with this object's run-time
* type, overwriting any and all learned or manually specified parameters
* as well as the label lexicon but without modifying the feature lexicon.
*
* @param in The input stream.
**/
public void read(ExceptionlessInputStream in) {
super.read(in);
int N = in.readInt();
if (N == 0) allowableValues = null;
else {
allowableValues = new String[N];
for (int i = 0; i < N; ++i)
allowableValues[i] = in.readString();
}
initialWeight = in.readDouble();
threshold = in.readDouble();
learningRate = in.readDouble();
positiveThickness = in.readDouble();
negativeThickness = in.readDouble();
bias = in.readDouble();
weightVector = SparseWeightVector.readWeightVector(in);
}
/** Returns a deep clone of this learning algorithm. */
public Object clone() {
LinearThresholdUnit clone = (LinearThresholdUnit) super.clone();
if (weightVector != null)
clone.weightVector = (SparseWeightVector) weightVector.clone();
return clone;
}
/**
* Simply a container for all of {@link LinearThresholdUnit}'s configurable
* parameters. Using instances of this class should make code more
* readable and constructors less complicated.
*
* @author Nick Rizzolo
**/
public static class Parameters extends Learner.Parameters
{
/**
* The rate at which weights are updated; default
* {@link LinearThresholdUnit#defaultLearningRate}.
**/
public double learningRate;
/** The LTU's weight vector; default is an empty vector. */
public SparseWeightVector weightVector;
/**
* The weight associated with a feature when first added to the vector;
* default {@link LinearThresholdUnit#defaultInitialWeight}.
**/
public double initialWeight;
/**
* The score is compared against this value to make predictions; default
* {@link LinearThresholdUnit#defaultThreshold}.
**/
public double threshold;
/**
* This thickness will be added to both {@link #positiveThickness} and
* {@link #negativeThickness}; default
* {@link LinearThresholdUnit#defaultThickness}.
**/
public double thickness;
/** The thickness of the hyperplane on the positive side; default 0. */
public double positiveThickness;
/** The thickness of the hyperplane on the negative side; default 0. */
public double negativeThickness;
/** Sets all the default values. */
public Parameters() {
learningRate = defaultLearningRate;
weightVector = (SparseWeightVector) defaultWeightVector.clone();
initialWeight = defaultInitialWeight;
threshold = defaultThreshold;
thickness = defaultThickness;
}
/**
* Sets the parameters from the parent's parameters object, giving
* defaults to all parameters declared in this object.
**/
public Parameters(Learner.Parameters p) { this(); }
/** Copy constructor. */
public Parameters(Parameters p) {
learningRate = p.learningRate;
weightVector = p.weightVector;
initialWeight = p.initialWeight;
threshold = p.threshold;
thickness = p.thickness;
positiveThickness = p.positiveThickness;
negativeThickness = p.negativeThickness;
}
/**
* Calls the appropriate <code>Learner.setParameters(Parameters)</code>
* method for this <code>Parameters</code> object.
*
* @param l The learner whose parameters will be set.
**/
public void setParameters(Learner l) {
((LinearThresholdUnit) l).setParameters(this);
}
/**
* Creates a string representation of these parameters in which only
* those parameters that differ from their default values are mentioned.
**/
public String nonDefaultString() {
String result = super.nonDefaultString();
if (learningRate != LinearThresholdUnit.defaultLearningRate)
result += ", learningRate = " + learningRate;
if (initialWeight != LinearThresholdUnit.defaultInitialWeight)
result += ", initialWeight = " + initialWeight;
if (threshold != LinearThresholdUnit.defaultThreshold)
result += ", threshold = " + threshold;
if (thickness != LinearThresholdUnit.defaultThickness)
result += ", thickness = " + thickness;
if (positiveThickness != 0)
result += ", positiveThickness = " + positiveThickness;
if (negativeThickness != 0)
result += ", negativeThickness = " + negativeThickness;
if (result.startsWith(", ")) result = result.substring(2);
return result;
}
}
}