package LBJ2.learn;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import LBJ2.classify.Classifier;
import LBJ2.classify.Feature;
import LBJ2.util.DVector;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;
/**
* An approximation to voted Perceptron, in which a weighted average of the
* weight vectors arrived at during training becomes the weight vector used
* to make predictions after training.
*
* <p> During training, after each example <i>e<sub>i</sub></i> is processed,
* the weight vector <i>w<sub>i</sub></i> becomes the active weight vector
* used to make predictions on future training examples. If a mistake was
* made on <i>e<sub>i</sub></i>, <i>w<sub>i</sub></i> will be different than
* <i>w<sub>i - 1</sub></i>. Otherwise, it will remain unchanged.
*
* <p> After training, each distinct weight vector arrived at during training
* is associated with an integer weight equal to the number of examples whose
* training made that weight vector active. A new weight vector
* <i>w<sup>*</sup></i> is computed by taking the average of all these weight
* vectors weighted as described. <i>w<sup>*</sup></i> is used to make all
* predictions returned to the user through methods such as
* {@link Classifier#classify(Object)} or
* {@link Classifier#discreteValue(Object)}.
*
* <p> The above description is a useful way to think about the operation of
* this {@link Learner}. However, the user should note that this
* implementation never explicitly stores <i>w<sup>*</sup></i>. Instead, it
* is computed efficiently on demand. Thus, interspersed online training and
* evaluation is efficient and operates as expected.
*
* <p> It is assumed that {@link Learner#labeler} is a single discrete
* classifier that produces the same feature for every example object and
* that the values that feature may take are available through the
* {@link Classifier#allowableValues()} method. The second value returned
* from {@link Classifier#allowableValues()} is treated as "positive", and it
* is assumed there are exactly 2 allowable values. Assertions will produce
* error messages if these assumptions do not hold.
*
* @author Nick Rizzolo
**/
public class SparseAveragedPerceptron extends SparsePerceptron
{
/** Default for {@link LinearThresholdUnit#weightVector}. */
public static final AveragedWeightVector defaultWeightVector =
new AveragedWeightVector();
/**
* Holds the same reference as {@link LinearThresholdUnit#weightVector}
* casted to {@link SparseAveragedPerceptron.AveragedWeightVector}.
**/
protected AveragedWeightVector awv;
/** Keeps the extra information necessary to compute the averaged bias. */
protected double averagedBias;
/**
* The learning rate and threshold take default values, while the name of
* the classifier gets the empty string.
**/
public SparseAveragedPerceptron() { this(""); }
/**
* Sets the learning rate to the specified value, and the threshold takes
* the default, while the name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
**/
public SparseAveragedPerceptron(double r) { this("", r); }
/**
* Sets the learning rate and threshold to the specified values, while the
* name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
**/
public SparseAveragedPerceptron(double r, double t) { this("", r, t); }
/**
* Use this constructor to fit a thick separator, where both the positive
* and negative sides of the hyperplane will be given the specified
* thickness, while the name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired thickness.
**/
public SparseAveragedPerceptron(double r, double t, double pt) {
this("", r, t, pt);
}
/**
* Use this constructor to fit a thick separator, where the positive and
* negative sides of the hyperplane will be given the specified separate
* thicknesses, while the name of the classifier gets the empty string.
*
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
**/
public SparseAveragedPerceptron(double r, double t, double pt, double nt) {
this("", r, t, pt, nt);
}
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link SparseAveragedPerceptron.Parameters} object.
*
* @param p The settings of all parameters.
**/
public SparseAveragedPerceptron(SparseAveragedPerceptron.Parameters p) {
this("", p);
}
/**
* The learning rate and threshold take default values.
*
* @param n The name of the classifier.
**/
public SparseAveragedPerceptron(String n) { this(n, defaultLearningRate); }
/**
* Sets the learning rate to the specified value, and the threshold takes
* the default.
*
* @param n The name of the classifier.
* @param r The desired learning rate value.
**/
public SparseAveragedPerceptron(String n, double r) {
this(n, r, defaultThreshold);
}
/**
* Sets the learning rate and threshold to the specified values.
*
* @param n The name of the classifier.
* @param r The desired learning rate value.
* @param t The desired threshold value.
**/
public SparseAveragedPerceptron(String n, double r, double t) {
this(n, r, t, defaultThickness);
}
/**
* Use this constructor to fit a thick separator, where both the positive
* and negative sides of the hyperplane will be given the specified
* thickness.
*
* @param n The name of the classifier.
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired thickness.
**/
public SparseAveragedPerceptron(String n, double r, double t, double pt) {
this(n, r, t, pt, pt);
}
/**
* Use this constructor to fit a thick separator, where the positive and
* negative sides of the hyperplane will be given the specified separate
* thicknesses.
*
* @param n The name of the classifier.
* @param r The desired learning rate value.
* @param t The desired threshold value.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
**/
public SparseAveragedPerceptron(String n, double r, double t, double pt,
double nt) {
super(n);
Parameters p = new Parameters();
p.learningRate = r;
p.threshold = t;
p.positiveThickness = pt;
p.negativeThickness = nt;
setParameters(p);
}
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link SparseAveragedPerceptron.Parameters} object.
*
* @param n The name of the classifier.
* @param p The settings of all parameters.
**/
public SparseAveragedPerceptron(String n,
SparseAveragedPerceptron.Parameters p) {
super(n);
setParameters(p);
}
/**
* Retrieves the parameters that are set in this learner.
*
* @return An object containing all the values of the parameters that
* control the behavior of this learning algorithm.
**/
public Learner.Parameters getParameters() {
Parameters p =
new Parameters((SparsePerceptron.Parameters) super.getParameters());
return p;
}
/**
* Sets the values of parameters that control the behavior of this learning
* algorithm.
*
* @param p The parameters.
**/
public void setParameters(Parameters p) {
super.setParameters(p);
awv = (AveragedWeightVector) weightVector;
}
/**
* The score of the specified object is equal to <code>w * x + bias</code>
* where <code>*</code> is dot product, <code>w</code> is the weight
* vector, and <code>x</code> is the feature vector produced by the
* extractor.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @return The result of the dot product plus the bias.
**/
public double score(int[] exampleFeatures, double[] exampleValues) {
double result = awv.dot(exampleFeatures, exampleValues, initialWeight);
int examples = awv.getExamples();
if (examples > 0)
result += (examples * bias - averagedBias) / (double) examples;
return result;
}
/**
* Scales the feature vector produced by the extractor by the learning rate
* and adds it to the weight vector.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
**/
public void promote(int[] exampleFeatures, double[] exampleValues,
double rate) {
bias += rate;
int examples = awv.getExamples();
averagedBias += examples * rate;
awv.scaledAdd(exampleFeatures, exampleValues, rate, initialWeight);
}
/**
* Scales the feature vector produced by the extractor by the learning rate
* and subtracts it from the weight vector.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
**/
public void demote(int[] exampleFeatures, double[] exampleValues,
double rate) {
bias -= rate;
int examples = awv.getExamples();
averagedBias -= examples * rate;
awv.scaledAdd(exampleFeatures, exampleValues, -rate, initialWeight);
}
/**
* This method works just like
* {@link LinearThresholdUnit#learn(int[],double[],int[],double[])}, except
* it notifies its weight vector when it got an example correct in addition
* to updating it when it makes a mistake.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param exampleLabels The example's label(s)
* @param labelValues The labels' values
**/
public void learn(int[] exampleFeatures, double[] exampleValues,
int[] exampleLabels, double[] labelValues) {
assert exampleLabels.length == 1
: "Example must have a single label.";
assert exampleLabels[0] == 0 || exampleLabels[0] == 1
: "Example has unallowed label value.";
boolean label = (exampleLabels[0] == 1);
double s =
awv.simpleDot(exampleFeatures, exampleValues, initialWeight) + bias;
if (label && s < threshold + positiveThickness)
promote(exampleFeatures, exampleValues, getLearningRate());
else if (!label && s >= threshold - negativeThickness)
demote(exampleFeatures, exampleValues, getLearningRate());
else awv.correctExample();
}
/**
* Initializes the weight vector array to the size of
* the supplied number of features, with each cell taking
* the default value of {@link #initialWeight}.
*
* @param numExamples The number of examples
* @param numFeatures The number of features
**/
public void initialize(int numExamples, int numFeatures) {
double[] weights = new double[numFeatures];
Arrays.fill(weights, initialWeight);
weightVector = awv = new AveragedWeightVector(weights);
}
/** Resets the weight vector to all zeros. */
public void forget() {
super.forget();
awv = (AveragedWeightVector) weightVector;
averagedBias = 0;
}
/**
* Writes the algorithm's internal representation as text. In the first
* line of output, the name of the classifier is printed, followed by
* {@link SparsePerceptron#learningRate},
* {@link LinearThresholdUnit#initialWeight},
* {@link LinearThresholdUnit#threshold},
* {@link LinearThresholdUnit#positiveThickness},
* {@link LinearThresholdUnit#negativeThickness},
* {@link LinearThresholdUnit#bias}, and finally {@link #averagedBias}.
*
* @param out The output stream.
**/
public void write(PrintStream out) {
out.println(name + ": " + learningRate + ", " + initialWeight + ", "
+ threshold + ", " + positiveThickness + ", "
+ negativeThickness + ", " + bias + ", " + averagedBias);
if (lexicon == null || lexicon.size() == 0) awv.write(out);
else awv.write(out, lexicon);
}
/**
* Writes the learned function's internal representation in binary form.
*
* @param out The output stream.
**/
public void write(ExceptionlessOutputStream out) {
super.write(out);
out.writeDouble(averagedBias);
}
/**
* Reads the binary representation of a learner with this object's run-time
* type, overwriting any and all learned or manually specified parameters
* as well as the label lexicon but without modifying the feature lexicon.
*
* @param in The input stream.
**/
public void read(ExceptionlessInputStream in) {
super.read(in);
awv = (AveragedWeightVector) weightVector;
averagedBias = in.readDouble();
}
/**
* Simply a container for all of {@link SparseAveragedPerceptron}'s
* configurable parameters. Using instances of this class should make code
* more readable and constructors less complicated. Note that if the
* object referenced by {@link LinearThresholdUnit.Parameters#weightVector}
* is replaced via an instance of this class, it must be replaced with an
* {@link SparseAveragedPerceptron.AveragedWeightVector}.
*
* @author Nick Rizzolo
**/
public static class Parameters extends SparsePerceptron.Parameters
{
/** Sets all the default values. */
public Parameters() {
weightVector = (AveragedWeightVector) defaultWeightVector.clone();
}
/**
* Sets the parameters from the parent's parameters object, giving
* defaults to all parameters declared in this object.
**/
public Parameters(SparsePerceptron.Parameters p) { super(p); }
/** Copy constructor. */
public Parameters(Parameters p) { super(p); }
/**
* Calls the appropriate <code>Learner.setParameters(Parameters)</code>
* method for this <code>Parameters</code> object.
*
* @param l The learner whose parameters will be set.
**/
public void setParameters(Learner l) {
((SparseAveragedPerceptron) l).setParameters(this);
}
}
/**
* This implementation of a sparse weight vector associates two
* <code>double</code>s with each {@link Feature}. The first plays the
* role of the usual weight vector, and the second accumulates multiples of
* examples on which mistakes were made to help implement the weighted
* average.
*
* @author Nick Rizzolo
**/
public static class AveragedWeightVector extends SparseWeightVector
{
/**
* Together with {@link SparseWeightVector#weights}, this vector provides
* enough information to reconstruct the average of all weight vectors
* arrived at during the course of learning.
**/
public DVector averagedWeights;
/** Counts the total number of training examples this vector has seen. */
protected int examples;
/** Simply instantiates the weight vectors. */
public AveragedWeightVector() { this(new DVector(defaultCapacity)); }
/**
* Simply initializes the weight vectors.
*
* @param w An array of weights.
**/
public AveragedWeightVector(double[] w) { this(new DVector(w)); }
/**
* Simply initializes the weight vectors.
*
* @param w A vector of weights.
**/
public AveragedWeightVector(DVector w) {
super((DVector) w.clone());
averagedWeights = w;
}
/** Increments the {@link #examples} variable. */
public void correctExample() { ++examples; }
/** Returns the {@link #examples} variable. */
public int getExamples() { return examples; }
/**
* Returns the averaged weight of the given feature.
*
* @param featureIndex The feature index.
* @param defaultW The default weight.
* @return The weight of the feature.
**/
public double getAveragedWeight(int featureIndex, double defaultW) {
if (examples == 0) return 0;
double aw = averagedWeights.get(featureIndex, defaultW);
double w = getWeight(featureIndex, defaultW);
return (examples*w - aw) / (double) examples;
}
/**
* Takes the dot product of this <code>AveragedWeightVector</code> with
* the argument vector, using the hard coded default weight.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @return The computed dot product.
**/
public double dot(int[] exampleFeatures, double[] exampleValues) {
return dot(exampleFeatures, exampleValues, defaultWeight);
}
/**
* Takes the dot product of this <code>AveragedWeightVector</code> with
* the argument vector, using the specified default weight when one is
* not yet present in this vector.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @param defaultW The default weight.
* @return The computed dot product.
**/
public double dot(int[] exampleFeatures, double[] exampleValues,
double defaultW) {
double sum = 0;
for (int i = 0; i < exampleFeatures.length; i++) {
double w = getAveragedWeight(exampleFeatures[i], defaultW);
sum += w * exampleValues[i];
}
return sum;
}
/**
* Takes the dot product of the regular, non-averaged, Perceptron weight
* vector with the given vector, using the hard coded default weight.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @return The computed dot product.
**/
public double simpleDot(int[] exampleFeatures, double[] exampleValues) {
return super.dot(exampleFeatures, exampleValues, defaultWeight);
}
/**
* Takes the dot product of the regular, non-averaged, Perceptron weight
* vector with the given vector, using the specified default weight when
* a feature is not yet present in this vector.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @param defaultW An initial weight for new features.
* @return The computed dot product.
**/
public double simpleDot(int[] exampleFeatures, double[] exampleValues,
double defaultW) {
return super.dot(exampleFeatures, exampleValues, defaultW);
}
/**
* Performs pairwise addition of the feature values in the given vector
* scaled by the given factor, modifying this weight vector, using the
* specified default weight when a feature from the given vector is not
* yet present in this vector. The default weight is used to initialize
* new feature weights.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @param factor The scaling factor.
**/
public void scaledAdd(int[] exampleFeatures, double[] exampleValues,
double factor) {
scaledAdd(exampleFeatures, exampleValues, factor, defaultWeight);
}
/**
* Performs pairwise addition of the feature values in the given vector
* scaled by the given factor, modifying this weight vector, using the
* specified default weight when a feature from the given vector is not
* yet present in this vector.
*
* @param exampleFeatures The example's array of feature indices.
* @param exampleValues The example's array of feature values.
* @param factor The scaling factor.
* @param defaultW An initial weight for new features.
**/
public void scaledAdd(int[] exampleFeatures, double[] exampleValues,
double factor, double defaultW) {
for (int i = 0; i < exampleFeatures.length; i++) {
int featureIndex = exampleFeatures[i];
double currentWeight = getWeight(featureIndex, defaultW);
double w = currentWeight + factor*exampleValues[i];
double difference = w - currentWeight;
updateAveragedWeight(featureIndex, examples*difference);
setWeight(featureIndex, w);
}
++examples;
}
/**
* Adds a new value to the current averaged weight indexed
* by the supplied feature index.
*
* @param featureIndex The feature index.
* @param w The value to add to the current weight.
**/
protected void updateAveragedWeight(int featureIndex, double w) {
double newWeight = averagedWeights.get(featureIndex, defaultWeight) + w;
averagedWeights.set(featureIndex, newWeight, defaultWeight);
}
/**
* Outputs the contents of this <code>SparseWeightVector</code> into the
* specified <code>PrintStream</code>. The string representation starts
* with a <code>"Begin"</code> annotation, ends with an
* <code>"End"</code> annotation, and without a <code>Lexicon</code>
* passed as a parameter, the weights are simply printed in the order of
* their integer indices.
*
* @param out The stream to write to.
**/
public void write(PrintStream out) {
out.println("Begin AveragedWeightVector");
for (int i = 0; i < averagedWeights.size(); ++i)
out.println(getAveragedWeight(i, 0));
out.println("End AveragedWeightVector");
}
/**
* Outputs the contents of this <code>SparseWeightVector</code> into the
* specified <code>PrintStream</code>. The string representation starts
* with a <code>"Begin"</code> annotation, ends with an
* <code>"End"</code> annotation, and lists each feature with its
* corresponding weight on the same, separate line in between.
*
* @param out The stream to write to.
* @param lex The feature lexicon.
**/
public void write(PrintStream out, Lexicon lex) {
out.println("Begin AveragedWeightVector");
Map map = lex.getMap();
Map.Entry[] entries =
(Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]);
Arrays.sort(entries,
new Comparator() {
public int compare(Object o1, Object o2) {
Map.Entry e1 = (Map.Entry) o1;
Map.Entry e2 = (Map.Entry) o2;
int i1 = ((Integer) e1.getValue()).intValue();
int i2 = ((Integer) e2.getValue()).intValue();
if ((i1 < weights.size()) != (i2 < weights.size()))
return i1 - i2;
return ((Feature) e1.getKey()).compareTo(e2.getKey());
}
});
int i, biggest = 0;
for (i = 0; i < entries.length; ++i) {
String key =
entries[i].getKey().toString()
+ (((Integer) entries[i].getValue()).intValue() < weights.size()
? "" : " (pruned)");
biggest = Math.max(biggest, key.length());
}
if (biggest % 2 == 0) biggest += 2;
else ++biggest;
for (i = 0; i < entries.length; ++i) {
String key =
entries[i].getKey().toString()
+ (((Integer) entries[i].getValue()).intValue() < weights.size()
? "" : " (pruned)");
out.print(key);
for (int j = 0; key.length() + j < biggest; ++j) out.print(" ");
int index = ((Integer) entries[i].getValue()).intValue();
double weight = getAveragedWeight(index, 0);
out.println(weight);
}
out.println("End AveragedWeightVector");
}
/**
* Writes the weight vector's internal representation in binary form.
*
* @param out The output stream.
**/
public void write(ExceptionlessOutputStream out) {
super.write(out);
out.writeInt(examples);
averagedWeights.write(out);
}
/**
* Reads the representation of a weight vector with this object's
* run-time type from the given stream, overwriting the data in this
* object.
*
* <p> This method is appropriate for reading weight vectors as written
* by {@link #write(ExceptionlessOutputStream)}.
*
* @param in The input stream.
**/
public void read(ExceptionlessInputStream in) {
super.read(in);
examples = in.readInt();
averagedWeights.read(in);
}
/**
* Returns a copy of this <code>AveragedWeightVector</code>.
*
* @return A copy of this <code>AveragedWeightVector</code>.
**/
public Object clone() {
AveragedWeightVector clone = (AveragedWeightVector) super.clone();
clone.averagedWeights = (DVector) averagedWeights.clone();
return clone;
}
/**
* Returns a new, empty weight vector with the same parameter settings as
* this one.
*
* @return An empty weight vector.
**/
public SparseWeightVector emptyClone() {
return new AveragedWeightVector();
}
}
}