package LBJ2.learn;
import java.io.PrintStream;
import LBJ2.classify.FeatureVector;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;
/**
* This is an implementation of the approximate "variance algorithm" of
* <i>Confidence Weighted Linear Classification</i>, Dredze, et.al (ICML,
* 2008). This algorithm envisions each parameter stored in a linear
* threshold unit's weight vector as having been drawn independently from a
* normal distribution with an independent mean and variance representing our
* estimate and confidence in that parameter. Given a training example, this
* algorithm then tries to find new values for all these means and
* confidences such that both of the following hold:
*
* <ul>
* <li> the KL-divergence between the old and new distributions is
* minimized, and
* <li> the current example is classified correctly when a weight vector is
* drawn according to the current distributions with user-specified
* confidence.
* </ul>
*
* <p> In this implementation, the user-specified confidence parameter is a
* real value representing the result of applying the inverse cumulative
* function of the normal distribution to a probability (ie, a real value
* greater than or equal to 0 and less than or equal to 1). The inverse of
* the normal cdf is a monotonically increasing function.
*
* <p> It is assumed that {@link Learner#labeler} is a single discrete
* classifier that produces the same feature for every example object and
* that the values that feature may take are available through the
* {@link LBJ2.classify.Classifier#allowableValues()} method. The second
* value returned from {@link LBJ2.classify.Classifier#allowableValues()} is
* treated as "positive", and it is assumed there are exactly 2 allowable
* values. Assertions will produce error messages if these assumptions do
* not hold.
*
* <p> This algorithm's user-configurable parameters are stored in member
* fields of this class. They may be set via either a constructor that names
* each parameter explicitly or a constructor that takes an instance of
* {@link LBJ2.learn.SparseConfidenceWeighted.Parameters Parameters} as
* input. The documentation in each member field in this class indicates the
* default value of the associated parameter when using the former type of
* constructor. The documentation of the associated member field in the
* {@link LBJ2.learn.SparseConfidenceWeighted.Parameters Parameters} class
* indicates the default value of the parameter when using the latter type of
* constructor.
*
* @author Nick Rizzolo
**/
public class SparseConfidenceWeighted extends LinearThresholdUnit
{
/** Default value for {@link #confidence}. */
public static final double defaultConfidence = 2;
/** Default value for {@link #initialVariance}. */
public static final double defaultInitialVariance = 1;
/**
* The confidence parameter as described above; default
* {@link #defaultConfidence}.
**/
protected double confidence;
/**
* The strictly positive initial variance of the parameters; default
* {@link #defaultInitialVariance}.
**/
protected double initialVariance;
/** The <i>inverses of</i> the current variances of the parameters. */
protected SparseWeightVector variances;
/** The bias element of the {@link #variances} vector. */
protected double variancesBias;
/** All parameters get default values. */
public SparseConfidenceWeighted() { this(""); }
/**
* Sets the {@link #confidence} parameter.
*
* @param c The desired confidence value.
**/
public SparseConfidenceWeighted(double c) { this("", c); }
/**
* Sets the {@link #confidence} and {@link #initialVariance} parameters.
*
* @param c The desired confidence value.
* @param v The desired initial variance.
**/
public SparseConfidenceWeighted(double c, double v) {
this("", c, v);
}
/**
* Sets the {@link #confidence}, {@link #initialVariance}, and
* {@link LinearThresholdUnit#weightVector} parameters.
*
* @param c The desired confidence value.
* @param v The desired initial variance.
* @param vm An empty sparse weight vector of means, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
**/
public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm) {
this("", c, v, vm);
}
/**
* Sets the {@link #confidence}, {@link #initialVariance},
* {@link LinearThresholdUnit#weightVector}, and {@link #variances}
* parameters. Make sure that the references passed to the last two
* arguments refer to different objects.
*
* @param c The desired confidence value.
* @param v The desired initial variance.
* @param vm An empty sparse weight vector of means, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
* @param vv An empty sparse weight vector of variances, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
**/
public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm,
SparseWeightVector vv) {
this("", c, v, vm, vv);
}
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link SparseConfidenceWeighted.Parameters} object.
*
* @param p The settings of all parameters.
**/
public SparseConfidenceWeighted(Parameters p) { this("", p); }
/**
* All parameters get default values.
*
* @param n The name of the classifier.
**/
public SparseConfidenceWeighted(String n) { this(n, defaultConfidence); }
/**
* Sets the {@link #confidence} parameter.
*
* @param n The name of the classifier.
* @param c The desired confidence value.
**/
public SparseConfidenceWeighted(String n, double c) {
this(n, c, defaultInitialVariance);
}
/**
* Sets the {@link #confidence} and {@link #initialVariance} parameters.
*
* @param n The name of the classifier.
* @param c The desired confidence value.
* @param v The desired initial variance.
**/
public SparseConfidenceWeighted(String n, double c, double v) {
this(n, c, v, (SparseWeightVector) defaultWeightVector.clone());
}
/**
* Sets the {@link #confidence}, {@link #initialVariance}, and
* {@link LinearThresholdUnit#weightVector} parameters.
*
* @param n The name of the classifier.
* @param c The desired confidence value.
* @param v The desired initial variance.
* @param vm An empty sparse weight vector of means, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
**/
public SparseConfidenceWeighted(String n, double c, double v,
SparseWeightVector vm) {
this(n, c, v, vm, (SparseWeightVector) defaultWeightVector.clone());
}
/**
* Sets the {@link #confidence}, {@link #initialVariance},
* {@link LinearThresholdUnit#weightVector}, and {@link #variances}
* parameters. Make sure that the references passed to the last two
* arguments refer to different objects.
*
* @param n The name of the classifier.
* @param c The desired confidence value.
* @param v The desired initial variance.
* @param vm An empty sparse weight vector of means, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
* @param vv An empty sparse weight vector of variances, perhaps of an
* alternative subclass of {@link SparseWeightVector}.
**/
public SparseConfidenceWeighted(String n, double c, double v,
SparseWeightVector vm,
SparseWeightVector vv) {
super(n);
Parameters p = new Parameters();
p.confidence = c;
p.initialVariance = v;
p.weightVector = vm;
p.variances = vv;
setParameters(p);
}
/**
* Initializing constructor. Sets all member variables to their associated
* settings in the {@link SparseConfidenceWeighted.Parameters} object.
*
* @param n The name of the classifier.
* @param p The settings of all parameters.
**/
public SparseConfidenceWeighted(String n, Parameters p) {
super(n);
setParameters(p);
}
/**
* Sets the values of parameters that control the behavior of this learning
* algorithm.
*
* @param p The parameters.
**/
public void setParameters(Parameters p) {
super.setParameters(p);
confidence = p.confidence;
initialVariance = p.initialVariance;
variances = p.variances;
variancesBias = 1 / initialVariance;
}
/**
* Retrieves the parameters that are set in this learner.
*
* @return An object containing all the values of the parameters that
* control the behavior of this learning algorithm.
**/
public Learner.Parameters getParameters() {
Parameters p =
new Parameters((LinearThresholdUnit.Parameters) super.getParameters());
p.confidence = confidence;
p.initialVariance = initialVariance;
p.variances = variances.emptyClone();
return p;
}
/**
* Returns the current value of the {@link #confidence} variable.
*
* @return The value of the {@link #confidence} variable.
**/
public double getConfidence() { return confidence; }
/**
* Sets the {@link #confidence} member variable to the specified
* value.
*
* @param c The new value for {@link #confidence}.
**/
public void setConfidence(double c) { confidence = c; }
/**
* Returns the current value of the {@link #initialVariance} variable.
*
* @return The value of the {@link #initialVariance} variable.
**/
public double getInitialVariance() { return initialVariance; }
/**
* Sets the {@link #initialVariance} member variable to the specified
* value.
*
* @param v The new value for {@link #initialVariance}.
**/
public void setInitialVariance(double v) { initialVariance = v; }
/**
* Updates the means and variances according to the new labeled example.
*
* @param exampleFeatures The example's array of feature indices
* @param exampleValues The example's array of feature values
* @param exampleLabels The example's label(s)
* @param labelValues The labels' values
**/
public void learn(int[] exampleFeatures, double[] exampleValues,
int[] exampleLabels, double[] labelValues) {
assert exampleLabels.length == 1
: "Example must have a single label.";
assert exampleLabels[0] == 0 || exampleLabels[0] == 1
: "Example has unallowed label value.";
double y = 2 * exampleLabels[0] - 1;
double m = y * (weightVector.dot(exampleFeatures, exampleValues) + bias);
Object sigmaX[] =
variances.pairwiseMultiply(exampleFeatures, exampleValues,
initialVariance, true);
int sigmaXFeatures[] = (int[])sigmaX[0];
double sigmaXValues[] = (double[])sigmaX[1];
double v =
FeatureVector.dot(exampleFeatures, exampleValues, sigmaXFeatures,
sigmaXValues)
+ 1 / variancesBias;
double t = 2 * confidence * m + 1;
double sqrtTerm = t * t - 8 * confidence * (m - confidence * v);
double alpha = (-t + Math.sqrt(sqrtTerm)) / (4 * confidence * v);
if (alpha > 0) {
weightVector.scaledAdd(sigmaXFeatures, sigmaXValues, alpha * y);
bias += alpha * y / variancesBias;
variances.scaledAdd(exampleFeatures, exampleValues,
2 * alpha * confidence);
variancesBias += 2 * alpha * confidence;
}
}
/**
* This method does nothing. The entire implementation is in
* {@link #learn(Object)}.
*/
public void demote(int[] exampleFeatures, double[] exampleValues,
double rate) {
}
/**
* This method does nothing. The entire implementation is in
* {@link #learn(Object)}.
*/
public void promote(int[] exampleFeatures, double[] exampleValues,
double rate) {
}
/**
* Reinitializes the learner to the state it started at before any learning
* was performed.
**/
public void forget() {
super.forget();
variances = variances.emptyClone();
variancesBias = 1 / initialVariance;
}
/**
* Writes the algorithm's internal representation as text. In the first
* line of output, the name of the classifier is printed, followed by
* {@link #confidence} and {@link #initialVariance}. Next, the annotation
* <code>Begin means</code> on its own line is followed by the contents of
* {@link LinearThresholdUnit#weightVector} and the annotation <code>End
* means</code> on its own line. Finally, the annotation <code>Begin
* variances</code> on its own line is followed by the contents of
* {@link #variances} and the annotation <code>End variances</code> on its
* own line.
*
* @param out The output stream.
**/
public void write(PrintStream out) {
out.println(name + ": " + confidence + ", " + initialVariance);
out.println("Means:");
if (lexicon.size() == 0) weightVector.write(out);
else weightVector.write(out, lexicon);
out.println("\nVariances:");
if (lexicon.size() == 0) variances.write(out);
else variances.write(out, lexicon);
}
/**
* Writes the learned function's internal representation in binary form.
*
* @param out The output stream.
**/
public void write(ExceptionlessOutputStream out) {
super.write(out);
out.writeDouble(confidence);
out.writeDouble(initialVariance);
out.writeDouble(variancesBias);
variances.write(out);
}
/**
* Reads the binary representation of a learner with this object's run-time
* type, overwriting any and all learned or manually specified parameters
* as well as the label lexicon but without modifying the feature lexicon.
*
* @param in The input stream.
**/
public void read(ExceptionlessInputStream in) {
super.read(in);
confidence = in.readDouble();
initialVariance = in.readDouble();
variancesBias = in.readDouble();
variances = SparseWeightVector.readWeightVector(in);
}
/** Returns a deep clone of this learning algorithm. */
public Object clone() {
SparseConfidenceWeighted clone = null;
try { clone = (SparseConfidenceWeighted) super.clone(); }
catch (Exception e) {
System.err.println("Error cloning SparseConfidenceWeighted: " + e);
System.exit(1);
}
if (variances != null)
clone.variances = (SparseWeightVector) variances.clone();
return clone;
}
/**
* Simply a container for all of {@link SparseConfidenceWeighted}'s
* configurable parameters. Using instances of this class should make code
* more readable and constructors less complicated.
*
* @author Nick Rizzolo
**/
public static class Parameters extends LinearThresholdUnit.Parameters
{
/**
* The confidence parameter as described above; default
* {@link SparseConfidenceWeighted#defaultConfidence}.
**/
protected double confidence;
/**
* The strictly positive initial variance of the parameters; default
* {@link SparseConfidenceWeighted#defaultInitialVariance}.
**/
protected double initialVariance;
/**
* The current variances of the parameters; default
* {@link LinearThresholdUnit#defaultWeightVector}.
**/
protected SparseWeightVector variances;
/** Sets all the default values. */
public Parameters() {
confidence = defaultConfidence;
initialVariance = defaultInitialVariance;
variances = (SparseWeightVector) defaultWeightVector.clone();
}
/**
* Sets the parameters from the parent's parameters object, giving
* defaults to all parameters declared in this object.
**/
public Parameters(LinearThresholdUnit.Parameters p) {
super(p);
confidence = defaultConfidence;
initialVariance = defaultInitialVariance;
variances = (SparseWeightVector) defaultWeightVector.clone();
}
/** Copy constructor. */
public Parameters(Parameters p) {
super(p);
confidence = p.confidence;
initialVariance = p.initialVariance;
variances = p.variances;
}
/**
* Calls the appropriate <code>Learner.setParameters(Parameters)</code>
* method for this <code>Parameters</code> object.
*
* @param l The learner whose parameters will be set.
**/
public void setParameters(Learner l) {
((SparseConfidenceWeighted) l).setParameters(this);
}
/**
* Creates a string representation of these parameters in which only
* those parameters that differ from their default values are mentioned.
**/
public String nonDefaultString() {
String result = super.nonDefaultString();
if (confidence != SparseConfidenceWeighted.defaultConfidence)
result += ", confidence = " + confidence;
if (initialVariance != SparseConfidenceWeighted.defaultInitialVariance)
result += ", initialVariance = " + initialVariance;
if (result.startsWith(", ")) result = result.substring(2);
return result;
}
}
}