NaiveBayes.java example

Explorer
MinorThird-master
package LBJ2.learn;

import java.io.IOException;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;

import LBJ2.classify.Classifier;
import LBJ2.classify.Feature;
import LBJ2.classify.FeatureVector;
import LBJ2.classify.RealFeature;
import LBJ2.classify.Score;
import LBJ2.classify.ScoreSet;
import LBJ2.util.ExceptionlessInputStream;
import LBJ2.util.ExceptionlessOutputStream;
import LBJ2.util.OVector;


/**
  * Naive Bayes is a multi-class learner that uses prediction value counts and
  * feature counts given a particular prediction value to select the most
  * likely prediction value.  More precisely, a score <i>s<sub>v</sub></i> for
  * a given prediction value <i>v</i> is computed such that
  * <i>e<sup>s<sub>v</sub></sup></i> is proportional to
  *
  * <blockquote>
  *   <i>P(v) Prod<sub>f</sub> P(f|v)</i>
  * </blockquote>
  *
  * where <i>Prod</i> is a multiplication quantifier over <i>f</i>, and
  * <i>f</i> stands for a feature.  The value corresponding to the highest
  * score is selected as the prediction.  Feature values that were never
  * observed given a particular prediction value during training are smoothed
  * with a configurable constant that defaults to <i>e<sup>-15</sup></i>.
  *
  * <p> This {@link Learner} learns a <code>discrete</code> classifier from
  * other <code>discrete</code> classifiers.  <i>Features coming from
  * <code>real</code> classifiers are ignored</i>.  It is also assumed that a
  * single discrete label feature will be produced in association with each
  * example object.  A feature taking one of the values observed in that label
  * feature will be produced by the learned classifier.
  *
  * <p> This algorithm's user-configurable parameters are stored in member
  * fields of this class.  They may be set via either a constructor that names
  * each parameter explicitly or a constructor that takes an instance of
  * {@link LBJ2.learn.NaiveBayes.Parameters Parameters} as input.  The
  * documentation in each member field in this class indicates the default
  * value of the associated parameter when using the former type of
  * constructor.  The documentation of the associated member field in the
  * {@link LBJ2.learn.NaiveBayes.Parameters Parameters} class indicates the
  * default value of the parameter when using the latter type of constructor.
  *
  * @see    NaiveBayesVector
  * @author Nick Rizzolo
 **/
public class NaiveBayes extends Learner
{
  /**
    * The default conditional feature probability is
    * <i>e<sup><code>defaultSmoothing</code></sup></i>.
   **/
  public static final int defaultSmoothing = -15;


  /**
    * The exponential of this number is used as the conditional probability of
    * a feature that was never observed during training; default
    * {@link #defaultSmoothing}.
   **/
  protected double smoothing;
  /** One {@link NaiveBayesVector} for each observed prediction value. */
  protected OVector network;


  /** Default constructor. */
  public NaiveBayes() { this(""); }

  /**
    * Initializes the smoothing constant.
    *
    * @param smooth The exponential of this number is used as the conditional
    *               probability of a feature that was never observed during
    *               training.
   **/
  public NaiveBayes(double smooth) { this("", smooth); }

  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link NaiveBayes.Parameters} object.
    *
    * @param p  The settings of all parameters.
   **/
  public NaiveBayes(Parameters p) { this("", p); }

  /**
    * Initializes the name of the classifier.
    *
    * @param n  The classifier's name.
   **/
  public NaiveBayes(String n) { this(n, defaultSmoothing); }

  /**
    * Initializes the name and smoothing constant.
    *
    * @param name   The classifier's name.
    * @param smooth The exponential of this number is used as the conditional
    *               probability of a feature that was never observed during
    *               training.
   **/
  public NaiveBayes(String name, double smooth) {
    super(name);
    network = new OVector();
    smoothing = smooth;
  }

  /**
    * Initializing constructor.  Sets all member variables to their associated
    * settings in the {@link NaiveBayes.Parameters} object.
    *
    * @param n  The name of the classifier.
    * @param p  The settings of all parameters.
   **/
  public NaiveBayes(String n, Parameters p) {
    super(n);
    network = new OVector();
    setParameters(p);
  }


  /**
    * Sets the values of parameters that control the behavior of this learning
    * algorithm.
    *
    * @param p  The parameters.
   **/
  public void setParameters(Parameters p) {
    smoothing = p.smoothing;
  }


  /**
    * Retrieves the parameters that are set in this learner.
    *
    * @return An object containing all the values of the parameters that
    *         control the behavior of this learning algorithm.
   **/
  public Learner.Parameters getParameters() {
    Parameters p = new Parameters(super.getParameters());
    p.smoothing = smoothing;
    return p;
  }


  /**
    * Sets the smoothing parameter to the specified value.
    *
    * @param s  The new value for the smoothing parameter.
   **/
  public void setSmoothing(double s) { smoothing = s; }


  /**
    * Sets the labeler.
    *
    * @param l  A labeling classifier.
   **/
  public void setLabeler(Classifier l) {
    if (!l.getOutputType().equals("discrete")) {
      System.err.println(
          "LBJ WARNING: NaiveBayes will only work with a label classifier "
          + "that returns discrete.");
      System.err.println(
          "             The given label classifier, " + l.getClass().getName()
          + ", returns " + l.getOutputType() + ".");
    }

    super.setLabeler(l);
  }


  /**
    * Trains the learning algorithm given an object as an example.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @param exampleLabels    The example's label(s).
    * @param labelValues      The labels' values.
   **/
  public void learn(int[] exampleFeatures, double[] exampleValues,
                    int[] exampleLabels, double[] labelValues) {
    int label = exampleLabels[0];
    int N = network.size();

    NaiveBayesVector labelVector = null;
    if (label >= N) {
      while (N++ < label)
        network.add(new NaiveBayesVector());
      labelVector = new NaiveBayesVector();
      network.add(labelVector);
    }
    else labelVector = (NaiveBayesVector) network.get(label);

    labelVector.scaledAdd(exampleFeatures, exampleValues, 1.0);
  }


  /** Clears the network. */
  public void forget() {
    super.forget();
    network = new OVector();
  }


  /**
    * The scores in the returned {@link ScoreSet} are the posterior
    * probabilities of each possible label given the example.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return A set of scores indicating the degree to which each possible
    *         discrete classification value is associated with the given
    *         example object.
   **/
  public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
    ScoreSet s = new ScoreSet();

    for (int l = 0; l < network.size(); l++) {
      NaiveBayesVector vector = (NaiveBayesVector) network.get(l);
      double score = vector.dot(exampleFeatures, exampleValues);
      s.put(labelLexicon.lookupKey(l).getStringValue(), score);
    }

    Score[] original = s.toArray();
    ScoreSet result = new ScoreSet();

    // This code would clearly run quicker if you computed each exp(score)
    // ahead of time, and divided them each by their sum.  However, each score
    // is likely to be a very negative number, so exp(score) may not be
    // numerically stable.  Subtracting two scores, however, hopefully leaves
    // you with a "less negative" number, so exp applied to the subtraction
    // hopefully behaves better.

    for (int i = 0; i < original.length; ++i) {
      double score = 1;

      for (int j = 0; j < original.length; ++j) {
        if (i == j) continue;
        score += Math.exp(original[j].score - original[i].score);
      }

      result.put(original[i].value, 1 / score);
    }

    return result;
  }


  /**
    * Returns the classification of the given example as a single feature
    * instead of a {@link FeatureVector}.
    *
    * @param f  The features array.
    * @param v  The values array.
    * @return The classification of the example as a feature.
   **/
  public Feature featureValue(int[] f, double[] v) {
    double bestScore = -Double.MAX_VALUE;
    int bestLabel = -1;

    for (int l = 0; l < network.size(); l++) {
      NaiveBayesVector vector = (NaiveBayesVector) network.get(l);
      double score = vector.dot(f, v);

      if (score > bestScore) {
        bestLabel = l;
        bestScore = score;
      }
    }

    if (bestLabel == -1) return null;
    return predictions.get(bestLabel);
  }


  /**
    * Prediction value counts and feature counts given a particular prediction
    * value are used to select the most likely prediction value.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return The most likely discrete value.
   **/
  public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
    return featureValue(exampleFeatures, exampleValues).getStringValue();
  }


  /**
    * Prediction value counts and feature counts given a particular prediction
    * value are used to select the most likely prediction value.
    *
    * @param exampleFeatures  The example's array of feature indices.
    * @param exampleValues    The example's array of feature values.
    * @return A single discrete feature, set to the most likely value.
   **/
  public FeatureVector classify(int[] exampleFeatures, double[] exampleValues)
  {
    return new FeatureVector(featureValue(exampleFeatures, exampleValues));
  }


  /**
    * Writes the algorithm's internal representation as text.
    *
    * @param out  The output stream.
   **/
  public void write(PrintStream out) {
    int N = network.size();
    for (int i = 0; i < N; ++i) {
      out.println("label: " + labelLexicon.lookupKey(i).getStringValue());
      ((NaiveBayesVector) network.get(i)).write(out);
    }

    out.println("End of NaiveBayes");
  }


  /**
    * Writes the learned function's internal representation in binary form.
    *
    * @param out  The output stream.
   **/
  public void write(ExceptionlessOutputStream out) {
    super.write(out);
    out.writeDouble(smoothing);
    int N = network.size();
    out.writeInt(N);
    for (int i = 0; i < N; ++i)
      ((NaiveBayesVector) network.get(i)).write(out);
  }


  /**
    * Reads the binary representation of a learner with this object's run-time
    * type, overwriting any and all learned or manually specified parameters
    * as well as the label lexicon but without modifying the feature lexicon.
    *
    * @param in The input stream.
   **/
  public void read(ExceptionlessInputStream in) {
    super.read(in);
    smoothing = in.readDouble();
    int N = in.readInt();
    network = new OVector(N);

    for (int i = 0; i < N; ++i) {
      NaiveBayesVector nbv = new NaiveBayesVector();
      nbv.read(in);
      network.add(nbv);
    }
  }


  /** Returns a deep clone of this learning algorithm. */
  public Object clone() {
    NaiveBayes clone = (NaiveBayes) super.clone();
    int N = network.size();
    clone.network = new OVector(N);
    for (int i = 0; i < N; ++i)
      clone.network.add(((NaiveBayesVector) network.get(i)).clone());
    return clone;
  }


  /**
    * Simply a container for all of {@link NaiveBayes}'s configurable
    * parameters.  Using instances of this class should make code more
    * readable and constructors less complicated.
    *
    * @author Nick Rizzolo
   **/
  public static class Parameters extends Learner.Parameters
  {
    /**
      * The exponential of this number is used as the conditional probability
      * of a feature that was never observed during training; default
      * {@link NaiveBayes#defaultSmoothing}.
     **/
    public double smoothing;


    /** Sets all the default values. */
    public Parameters() {
      smoothing = defaultSmoothing;
    }


    /**
      * Sets the parameters from the parent's parameters object, giving
      * defaults to all parameters declared in this object.
     **/
    public Parameters(Learner.Parameters p) {
      super(p);
      smoothing = defaultSmoothing;
    }


    /** Copy constructor. */
    public Parameters(Parameters p) {
      super(p);
      smoothing = p.smoothing;
    }


    /**
      * Calls the appropriate <code>Learner.setParameters(Parameters)</code>
      * method for this <code>Parameters</code> object.
      *
      * @param l  The learner whose parameters will be set.
     **/
    public void setParameters(Learner l) {
      ((NaiveBayes) l).setParameters(this);
    }


    /**
      * Creates a string representation of these parameters in which only
      * those parameters that differ from their default values are mentioned.
     **/
    public String nonDefaultString() {
      String result = super.nonDefaultString();

      if (smoothing != NaiveBayes.defaultSmoothing)
        result += ", smoothing = " + smoothing;

      if (result.startsWith(", ")) result = result.substring(2);
      return result;
    }
  }


  /**
    * A <code>Count</code> object stores two <code>doubles</code>, one which
    * holds a accumulated count value and the other intended to hold the
    * natural logarithm of the count.  The object also contains a
    * <code>boolean</code> flag that is set when the log needs to be updated.
    *
    * @author Nick Rizzolo
   **/
  protected static class Count implements Cloneable, Serializable
  {
    /** The accumulated value. */
    protected double count;
    /** The natural logartihm of {@link #count} is sometimes stored here. */
    protected transient double logCount;
    /** A flag that is set iff {@link #logCount} is not up to date. */
    protected transient boolean updateLog;


    /** Sets the count to 0. */
    public Count() {
      count = 0;
      logCount = 0;
      updateLog = true;
    }


    /** Returns the integer count. */
    public double getCount() { return count; }


    /**
      * Increments the count, but does not update the log.
      *
      * @param inc  The amount the count should be incremented by.
     **/
    public void increment(double inc) {
      count += inc;
      updateLog = true;
    }


    /** Returns the log after updating it. */
    public double getLog() {
      if (updateLog) {
        logCount = Math.log(count);
        updateLog = false;
      }

      return logCount;
    }


    /**
      * The string representation of a <code>Count</code> object is simply the
      * integer count.
     **/
    public String toString() { return "" + count; }


    /**
      * Writes the count's internal representation in binary form.
      *
      * @param out  The output stream.
     **/
    public void write(ExceptionlessOutputStream out) {
      out.writeDouble(count);
    }


    /**
      * Reads the binary representation of a count into this object,
      * overwriting any data that may already be here.
      *
      * @param in The input stream.
     **/
    public void read(ExceptionlessInputStream in) {
      count = in.readDouble();
      updateLog = true;
    }


    /**
      * This method returns a shallow clone.
      *
      * @return A shallow clone.
     **/
    public Object clone() {
      Object clone = null;

      try { clone = super.clone(); }
      catch (Exception e) {
        System.err.println("Error cloning " + getClass().getName() + ":");
        e.printStackTrace();
        System.exit(1);
      }

      return clone;
    }


    /**
      * Special handling during deserialization to ensure that
      * {@link #updateLog} is set to <code>true</code>.
      *
      * @param in The stream to deserialize from.
     **/
    private void readObject(java.io.ObjectInputStream in)
            throws IOException, ClassNotFoundException {
      in.defaultReadObject();
      updateLog = true;
    }
  }


  /**
    * Keeps track of all the counts associated with a given label.
    * Features are associated with {@link NaiveBayes.Count}s.  Those not
    * appearing in this vector are assumed to have a count of 0.  The
    * invocation of either of the <code>scaledAdd</code> methods increments
    * the prior count for the label.
    *
    * <p> {@link RealFeature}s' strengths are ignored by this vector; they are
    * assumed to be equal to 1, as if the feature were an active Boolean
    * feature.
    *
    * @author Nick Rizzolo
   **/
  protected class NaiveBayesVector extends SparseWeightVector
  {
    /** The counts in the vector indexed by their {@link Lexicon} key. */
    protected OVector counts;
    /**
      * The prior count is the number of times either <code>scaledAdd</code>
      * method has been called.
     **/
    protected Count priorCount;


    /** Simply instantiates {@link NaiveBayes.NaiveBayesVector#counts}. */
    public NaiveBayesVector() { this(new OVector(defaultCapacity)); }

    /**
      * Simply initializes {@link #counts}.
      *
      * @param w  An array of counts.
     **/
    public NaiveBayesVector(Count[] w) { this(new OVector(w)); }

    /**
      * Simply initializes {@link #counts}.
      *
      * @param w  A vector of counts.
     **/
    public NaiveBayesVector(OVector w) {
      counts = w;
      priorCount = new Count();
    }


    /**
      * Returns the prior count of the prediction value associated with this
      * vector.
     **/
    public Count getPrior() { return priorCount; }


    /**
      * Takes the dot product of this vector with the given vector, using the
      * hard coded smoothing weight.
      *
      * @param exampleFeatures  The example's array of feature indices.
      * @param exampleValues    The example's array of feature values.
      * @return The computed dot product.
     **/
    public double dot(int[] exampleFeatures, double[] exampleValues) {
      return
        dot(exampleFeatures, exampleValues, priorCount.getLog() + smoothing);
    }


    /**
      * Takes the dot product of this vector with the given vector,
      * using the specified default weight when encountering a feature that is
      * not yet present in this vector.  Here, weights are taken as
      * <i>log(feature count / prior count)</i>.  The output of this method is
      * related to the empirical probability of the example <i>e</i> as
      * follows: <br><br>
      *
      * <i>exp(dot(e)) / (sum of all labels' prior counts)) =</i><br>
      * <i>P(e's label && e)</i>
      *
      * @param exampleFeatures  The example's array of feature indices.
      * @param exampleValues    The example's array of feature values.
      * @param defaultW         The default weight.
      * @return The computed dot product.
     **/
    public double dot(int[] exampleFeatures, double[] exampleValues,
                      double defaultW) {
      double sum = (1 - exampleFeatures.length) * priorCount.getLog();
      for (int i = 0; i < exampleFeatures.length; i++)
        sum += getWeight(exampleFeatures[i], defaultW);
      return sum;
    }


    /**
      * Returns the count of the given feature.
      *
      * @param featureIndex The feature index.
      * @return The count of the feature.
     **/
    public double getCount(int featureIndex) {
      while (counts.size() <= featureIndex) counts.add(new Count());
      return ((Count) counts.get(featureIndex)).getCount();
    }


    /**
      * Returns the weight of the given feature
      *
      * @param featureIndex The feature index.
      * @param defaultW     The default count.
      * @return The weight of the feature.
     **/
    public double getWeight(int featureIndex, double defaultW) {
      while (counts.size() <= featureIndex) counts.add(new Count());
      Count c = (Count) counts.get(featureIndex);
      if (c.getCount() == 0) return defaultW;
      return c.getLog();
    }


    /**
      * This method is overridden to do nothing; use
      * {@link #incrementCount(int,double)} instead.
      *
      * @param f  Unused.
      * @param w  Unused.
     **/
    public void setWeight(int f, double w) { }


    /**
      * Increments the count of the given feature.
      *
      * @param featureIndex  The index of the feature to update.
      * @param factor        The factor by which to increment.
     **/
    public void incrementCount(int featureIndex, double factor) {
      if (featureIndex < counts.size())
        ((Count) counts.get(featureIndex)).increment(factor);
      else {
        while (counts.size() < featureIndex) counts.add(new Count());
        Count c = new Count();
        c.increment(factor);
        counts.add(c);
      }
    }


    /**
      * This method is similar to the implementation in
      * {@link SparseWeightVector} except that
      * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)}
      * is called instead of
      * {@link SparseWeightVector#setWeight(int,double)}.
      *
      * @param exampleFeatures  The example's array of feature indices.
      * @param exampleValues    The example's array of feature values.
      * @param factor           The scaling factor.
     **/
    public void scaledAdd(int[] exampleFeatures, double[] exampleValues,
                          double factor) {
      priorCount.increment(factor);
      for (int i = 0; i < exampleFeatures.length; i++)
        incrementCount(exampleFeatures[i], factor);
    }


    /**
      * This method is similar to the implementation in
      * {@link SparseWeightVector} except that the <code>defaultW</code>
      * argument is ignored and
      * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)}
      * is called instead of
      * {@link SparseWeightVector#setWeight(int,double)}.
      *
      * @param exampleFeatures  The example's array of feature indices.
      * @param exampleValues    The example's array of feature values.
      * @param factor           The scaling factor.
      * @param defaultW         Unused.
     **/
    public void scaledAdd(int[] exampleFeatures, double[] exampleValues,
                          double factor, double defaultW) {
      scaledAdd(exampleFeatures, exampleValues, factor);
    }


    /**
      * Outputs the contents of this vector into the specified
      * <code>PrintStream</code>.  The string representation is the same as in
      * the super class, except the <code>"Begin"</code> annotation line also
      * contains the value of {@link #priorCount} in parentheses.  In
      * addition, this method has access to the lexicon, so the output of this
      * method is equivalent to that of {@link #write(PrintStream,Lexicon)}.
      *
      * @param out  The stream to write to.
     **/
    public void write(PrintStream out) {
      write(out, lexicon);
    }


    /**
      * Outputs the contents of this vector into the specified
      * <code>PrintStream</code>.  The string representation is the same as in
      * the super class, except the <code>"Begin"</code> annotation line also
      * contains the value of {@link #priorCount} in parentheses.
      *
      * @param out  The stream to write to.
      * @param lex  The feature lexicon.
     **/
    public void write(PrintStream out, Lexicon lex) {
      out.println("Begin NaiveBayesVector (" + priorCount + ")");

      Map map = lex.getMap();
      Map.Entry[] entries =
        (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]);
      Arrays.sort(entries,
                  new Comparator() {
                    public int compare(Object o1, Object o2) {
                      Map.Entry e1 = (Map.Entry) o1;
                      Map.Entry e2 = (Map.Entry) o2;
                      return ((Feature) e1.getKey()).compareTo(e2.getKey());
                    }
                  });

      int i, biggest = 0;
      for (i = 0; i < entries.length; ++i) {
        String key = entries[i].getKey().toString();
        biggest = Math.max(biggest, key.length());
      }

      if (biggest % 2 == 0) biggest += 2;
      else ++biggest;

      for (i = 0; i < entries.length; ++i) {
        String key = entries[i].getKey().toString();
        int index = ((Integer) entries[i].getValue()).intValue();
        out.print(key);
        for (int j = 0; key.length() + j < biggest; ++j) out.print(" ");
        out.println(getCount(index));
      }

      out.println("End NaiveBayesVector");
    }


    /**
      * Writes the weight vector's internal representation in binary form.
      * <b>Note:</b> this method does not call
      * {@link SparseWeightVector#write(ExceptionlessOutputStream)} and does
      * not output its class name or the contents of
      * {@link SparseWeightVector#weights} since there shouldn't be any.
      *
      * @param out  The output stream.
     **/
    public void write(ExceptionlessOutputStream out) {
      priorCount.write(out);
      out.writeInt(counts.size());
      for (int i = 0; i < counts.size(); ++i)
        ((Count) counts.get(i)).write(out);
    }


    /**
      * Reads the representation of a weight vector with this object's
      * run-time type from the given stream, overwriting the data in this
      * object.
      *
      * <p> This method is appropriate for reading weight vectors as written
      * by {@link #write(ExceptionlessOutputStream)}.
      *
      * @param in The input stream.
     **/
    public void read(ExceptionlessInputStream in) {
      priorCount = new Count();
      priorCount.read(in);
      int N = in.readInt();
      counts = new OVector(N);
      for (int i = 0; i < N; ++i) {
        Count c = new Count();
        c.read(in);
        counts.add(c);
      }
    }


    /**
      * Returns a copy of this <code>NaiveBayesVector</code>.
      *
      * @return A copy of this <code>NaiveBayesVector</code>.
     **/
    public Object clone() {
      NaiveBayesVector clone = (NaiveBayesVector) super.clone();
      Count[] array = new Count[counts.size()];
      for (int i = 0; i < counts.size(); ++i)
        array[i] = (Count) ((Count) counts.get(i)).clone();
      clone.counts = new OVector(array);
      return clone;
    }


    /**
      * Returns a new, empty weight vector with the same parameter settings as
      * this one.
      *
      * @return An empty weight vector.
     **/
    public SparseWeightVector emptyClone() {
      return new NaiveBayesVector();
    }
  }
}