MIBoost.java example

Explorer
AIWekaProject-master
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * MIBoost.java
 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.mi;

import weka.classifiers.Classifier;
import weka.classifiers.SingleClassifierEnhancer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Optimization;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Discretize;
import weka.filters.unsupervised.attribute.MultiInstanceToPropositional;

import java.util.Enumeration;
import java.util.Vector;

/**
 <!-- globalinfo-start -->
 * MI AdaBoost method, considers the geometric mean of posterior of instances inside a bag (arithmatic mean of log-posterior) and the expectation for a bag is taken inside the loss function.<br/>
 * <br/>
 * For more information about Adaboost, see:<br/>
 * <br/>
 * Yoav Freund, Robert E. Schapire: Experiments with a new boosting algorithm. In: Thirteenth International Conference on Machine Learning, San Francisco, 148-156, 1996.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- technical-bibtex-start -->
 * BibTeX:
 * <pre>
 * @inproceedings{Freund1996,
 *    address = {San Francisco},
 *    author = {Yoav Freund and Robert E. Schapire},
 *    booktitle = {Thirteenth International Conference on Machine Learning},
 *    pages = {148-156},
 *    publisher = {Morgan Kaufmann},
 *    title = {Experiments with a new boosting algorithm},
 *    year = {1996}
 * }
 * </pre>
 * <p/>
 <!-- technical-bibtex-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -D
 *  Turn on debugging output.</pre>
 * 
 * <pre> -B <num>
 *  The number of bins in discretization
 *  (default 0, no discretization)</pre>
 * 
 * <pre> -R <num>
 *  Maximum number of boost iterations.
 *  (default 10)</pre>
 * 
 * <pre> -W <class name>
 *  Full name of classifier to boost.
 *  eg: weka.classifiers.bayes.NaiveBayes</pre>
 * 
 * <pre> -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console</pre>
 * 
 <!-- options-end -->
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Xin Xu (xx5@cs.waikato.ac.nz)
 * @version $Revision: 1.6 $ 
 */
public class MIBoost 
  extends SingleClassifierEnhancer
  implements OptionHandler, MultiInstanceCapabilitiesHandler,
             TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = -3808427225599279539L;
  
  /** the models for the iterations */
  protected Classifier[] m_Models;

  /** The number of the class labels */
  protected int m_NumClasses;

  /** Class labels for each bag */
  protected int[] m_Classes;

  /** attributes name for the new dataset used to build the model  */
  protected Instances m_Attributes;

  /** Number of iterations */   
  private int m_NumIterations = 100;

  /** Voting weights of models */ 
  protected double[] m_Beta;

  /** the maximum number of boost iterations */
  protected int m_MaxIterations = 10;

  /** the number of discretization bins */
  protected int m_DiscretizeBin = 0;

  /** filter used for discretization */
  protected Discretize m_Filter = null;

  /** filter used to convert the MI dataset into single-instance dataset */
  protected MultiInstanceToPropositional m_ConvertToSI = new MultiInstanceToPropositional();

  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return 
        "MI AdaBoost method, considers the geometric mean of posterior "
      + "of instances inside a bag (arithmatic mean of log-posterior) and "
      + "the expectation for a bag is taken inside the loss function.\n\n"
      + "For more information about Adaboost, see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing 
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation 	result;
    
    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "Yoav Freund and Robert E. Schapire");
    result.setValue(Field.TITLE, "Experiments with a new boosting algorithm");
    result.setValue(Field.BOOKTITLE, "Thirteenth International Conference on Machine Learning");
    result.setValue(Field.YEAR, "1996");
    result.setValue(Field.PAGES, "148-156");
    result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
    result.setValue(Field.ADDRESS, "San Francisco");
    
    return result;
  }

  /**
   * Returns an enumeration describing the available options
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {
    Vector result = new Vector();

    result.addElement(new Option(
          "\tTurn on debugging output.",
          "D", 0, "-D"));

    result.addElement(new Option(
          "\tThe number of bins in discretization\n"
          + "\t(default 0, no discretization)",
          "B", 1, "-B <num>"));	

    result.addElement(new Option(
          "\tMaximum number of boost iterations.\n"
          + "\t(default 10)",
          "R", 1, "-R <num>"));	

    result.addElement(new Option(
          "\tFull name of classifier to boost.\n"
          + "\teg: weka.classifiers.bayes.NaiveBayes",
          "W", 1, "-W <class name>"));

    Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
    while (enu.hasMoreElements()) {
      result.addElement(enu.nextElement());
    }

    return result.elements();
  }

  /**
   * Parses a given list of options. <p/>
   * 
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -D
   *  Turn on debugging output.</pre>
   * 
   * <pre> -B <num>
   *  The number of bins in discretization
   *  (default 0, no discretization)</pre>
   * 
   * <pre> -R <num>
   *  Maximum number of boost iterations.
   *  (default 10)</pre>
   * 
   * <pre> -W <class name>
   *  Full name of classifier to boost.
   *  eg: weka.classifiers.bayes.NaiveBayes</pre>
   * 
   * <pre> -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    setDebug(Utils.getFlag('D', options));

    String bin = Utils.getOption('B', options);
    if (bin.length() != 0) {
      setDiscretizeBin(Integer.parseInt(bin));
    } else {
      setDiscretizeBin(0);
    }

    String boostIterations = Utils.getOption('R', options);
    if (boostIterations.length() != 0) {
      setMaxIterations(Integer.parseInt(boostIterations));
    } else {
      setMaxIterations(10);
    }

    super.setOptions(options);
  }

  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector        result;
    String[]      options;
    int           i;
    
    result  = new Vector();

    result.add("-R");
    result.add("" + getMaxIterations());

    result.add("-B");
    result.add("" + getDiscretizeBin());

    options = super.getOptions();
    for (i = 0; i < options.length; i++)
      result.add(options[i]);

    return (String[]) result.toArray(new String[result.size()]);
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String maxIterationsTipText() {
    return "The maximum number of boost iterations.";
  }

  /**
   * Set the maximum number of boost iterations
   *
   * @param maxIterations the maximum number of boost iterations
   */
  public void setMaxIterations(int maxIterations) {	
    m_MaxIterations = maxIterations;
  }

  /**
   * Get the maximum number of boost iterations
   *
   * @return the maximum number of boost iterations
   */
  public int getMaxIterations() {

    return m_MaxIterations;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String discretizeBinTipText() {
    return "The number of bins in discretization.";
  }

  /**
   * Set the number of bins in discretization
   *
   * @param bin the number of bins in discretization
   */
  public void setDiscretizeBin(int bin) {	
    m_DiscretizeBin = bin;
  }

  /**
   * Get the number of bins in discretization
   *
   * @return the number of bins in discretization
   */
  public int getDiscretizeBin() {	
    return m_DiscretizeBin;
  }

  private class OptEng 
    extends Optimization {
    
    private double[] weights, errs;

    public void setWeights(double[] w){
      weights = w;
    }

    public void setErrs(double[] e){
      errs = e;
    }

    /** 
     * Evaluate objective function
     * @param x the current values of variables
     * @return the value of the objective function 
     * @throws Exception if result is NaN
     */
    protected double objectiveFunction(double[] x) throws Exception{
      double obj=0;
      for(int i=0; i<weights.length; i++){
        obj += weights[i]*Math.exp(x[0]*(2.0*errs[i]-1.0));
        if(Double.isNaN(obj))
          throw new Exception("Objective function value is NaN!");

      }
      return obj;
    }

    /** 
     * Evaluate Jacobian vector
     * @param x the current values of variables
     * @return the gradient vector 
     * @throws Exception if gradient is NaN
     */
    protected double[] evaluateGradient(double[] x)  throws Exception{
      double[] grad = new double[1];
      for(int i=0; i<weights.length; i++){
        grad[0] += weights[i]*(2.0*errs[i]-1.0)*Math.exp(x[0]*(2.0*errs[i]-1.0));
        if(Double.isNaN(grad[0]))
          throw new Exception("Gradient is NaN!");

      }
      return grad;
    }
    
    /**
     * Returns the revision string.
     * 
     * @return		the revision
     */
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.6 $");
    }
  }

  /**
   * Returns default capabilities of the classifier.
   *
   * @return      the capabilities of this classifier
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();

    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.RELATIONAL_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);

    // class
    result.disableAllClasses();
    result.disableAllClassDependencies();
    if (super.getCapabilities().handles(Capability.BINARY_CLASS))
      result.enable(Capability.BINARY_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);
    
    // other
    result.enable(Capability.ONLY_MULTIINSTANCE);
    
    return result;
  }

  /**
   * Returns the capabilities of this multi-instance classifier for the
   * relational data.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getMultiInstanceCapabilities() {
    Capabilities result = super.getCapabilities();
    
    // class
    result.disableAllClasses();
    result.enable(Capability.NO_CLASS);
    
    return result;
  }

  /**
   * Builds the classifier
   *
   * @param exps the training data to be used for generating the
   * boosted classifier.
   * @throws Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances exps) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(exps);

    // remove instances with missing class
    Instances train = new Instances(exps);
    train.deleteWithMissingClass();

    m_NumClasses = train.numClasses();
    m_NumIterations = m_MaxIterations;

    if (m_Classifier == null)
      throw new Exception("A base classifier has not been specified!");
    if(!(m_Classifier instanceof WeightedInstancesHandler))
      throw new Exception("Base classifier cannot handle weighted instances!");

    m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
    if(m_Debug)
      System.err.println("Base classifier: "+m_Classifier.getClass().getName());

    m_Beta = new double[m_NumIterations];

    /* modified by Lin Dong. (use MIToSingleInstance filter to convert the MI datasets) */

    //Initialize the bags' weights
    double N = (double)train.numInstances(), sumNi=0;
    for(int i=0; i<N; i++)
      sumNi += train.instance(i).relationalValue(1).numInstances();	
    for(int i=0; i<N; i++){
      train.instance(i).setWeight(sumNi/N);
    }

    //convert the training dataset into single-instance dataset
    m_ConvertToSI.setInputFormat(train);
    Instances data = Filter.useFilter( train, m_ConvertToSI);
    data.deleteAttributeAt(0); //remove the bagIndex attribute;


    // Assume the order of the instances are preserved in the Discretize filter
    if(m_DiscretizeBin > 0){
      m_Filter = new Discretize();
      m_Filter.setInputFormat(new Instances(data, 0));
      m_Filter.setBins(m_DiscretizeBin);
      data = Filter.useFilter(data, m_Filter);
    }

    // Main algorithm
    int dataIdx;
iterations:
    for(int m=0; m < m_MaxIterations; m++){
      if(m_Debug)
        System.err.println("\nIteration "+m); 


      // Build a model
      m_Models[m].buildClassifier(data);

      // Prediction of each bag
      double[] err=new double[(int)N], weights=new double[(int)N];
      boolean perfect = true, tooWrong=true;
      dataIdx = 0;
      for(int n=0; n<N; n++){
        Instance exn = train.instance(n);
        // Prediction of each instance and the predicted class distribution
        // of the bag		
        double nn = (double)exn.relationalValue(1).numInstances();
        for(int p=0; p<nn; p++){
          Instance testIns = data.instance(dataIdx++);			
          if((int)m_Models[m].classifyInstance(testIns) 
              != (int)exn.classValue()) // Weighted instance-wise 0-1 errors
            err[n] ++;		       		       
        }
        weights[n] = exn.weight();
        err[n] /= nn;
        if(err[n] > 0.5)
          perfect = false;
        if(err[n] < 0.5)
          tooWrong = false;
      }

      if(perfect || tooWrong){ // No or 100% classification error, cannot find beta
        if (m == 0)
          m_Beta[m] = 1.0;
        else		    
          m_Beta[m] = 0;		
        m_NumIterations = m+1;
        if(m_Debug)  System.err.println("No errors");
        break iterations;
      }

      double[] x = new double[1];
      x[0] = 0;
      double[][] b = new double[2][x.length];
      b[0][0] = Double.NaN;
      b[1][0] = Double.NaN;

      OptEng opt = new OptEng();	
      opt.setWeights(weights);
      opt.setErrs(err);
      //opt.setDebug(m_Debug);
      if (m_Debug)
        System.out.println("Start searching for c... ");
      x = opt.findArgmin(x, b);
      while(x==null){
        x = opt.getVarbValues();
        if (m_Debug)
          System.out.println("200 iterations finished, not enough!");
        x = opt.findArgmin(x, b);
      }	
      if (m_Debug)
        System.out.println("Finished.");    
      m_Beta[m] = x[0];

      if(m_Debug)
        System.err.println("c = "+m_Beta[m]);

      // Stop if error too small or error too big and ignore this model
      if (Double.isInfinite(m_Beta[m]) 
          || Utils.smOrEq(m_Beta[m], 0)
         ) {
        if (m == 0)
          m_Beta[m] = 1.0;
        else		    
          m_Beta[m] = 0;
        m_NumIterations = m+1;
        if(m_Debug)
          System.err.println("Errors out of range!");
        break iterations;
         }

      // Update weights of data and class label of wfData
      dataIdx=0;
      double totWeights=0;
      for(int r=0; r<N; r++){		
        Instance exr = train.instance(r);
        exr.setWeight(weights[r]*Math.exp(m_Beta[m]*(2.0*err[r]-1.0)));
        totWeights += exr.weight();
      }

      if(m_Debug)
        System.err.println("Total weights = "+totWeights);

      for(int r=0; r<N; r++){		
        Instance exr = train.instance(r);
        double num = (double)exr.relationalValue(1).numInstances();
        exr.setWeight(sumNi*exr.weight()/totWeights);
        //if(m_Debug)
        //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
        for(int s=0; s<num; s++){
          Instance inss = data.instance(dataIdx);	
          inss.setWeight(exr.weight()/num);		   
          //    if(m_Debug)
          //  System.err.print("instance "+s+"="+inss.weight()+
          //			 "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
          if(Double.isNaN(inss.weight()))
            throw new Exception("instance "+s+" in bag "+r+" has weight NaN!"); 
          dataIdx++;
        }
        //if(m_Debug)
        //    System.err.println();
      }	       
    }
  }		

  /**
   * Computes the distribution for a given exemplar
   *
   * @param exmp the exemplar for which distribution is computed
   * @return the classification
   * @throws Exception if the distribution can't be computed successfully
   */
  public double[] distributionForInstance(Instance exmp) 
    throws Exception { 

    double[] rt = new double[m_NumClasses];

    Instances insts = new Instances(exmp.dataset(), 0);
    insts.add(exmp);

    // convert the training dataset into single-instance dataset
    insts = Filter.useFilter( insts, m_ConvertToSI);
    insts.deleteAttributeAt(0); //remove the bagIndex attribute	

    double n = insts.numInstances();

    if(m_DiscretizeBin > 0)
      insts = Filter.useFilter(insts, m_Filter);

    for(int y=0; y<n; y++){
      Instance ins = insts.instance(y);	
      for(int x=0; x<m_NumIterations; x++){ 
        rt[(int)m_Models[x].classifyInstance(ins)] += m_Beta[x]/n;
      }
    }

    for(int i=0; i<rt.length; i++)
      rt[i] = Math.exp(rt[i]);

    Utils.normalize(rt);
    return rt;
  }

  /**
   * Gets a string describing the classifier.
   *
   * @return a string describing the classifer built.
   */
  public String toString() {

    if (m_Models == null) {
      return "No model built yet!";
    }
    StringBuffer text = new StringBuffer();
    text.append("MIBoost: number of bins in discretization = "+m_DiscretizeBin+"\n");
    if (m_NumIterations == 0) {
      text.append("No model built yet.\n");
    } else if (m_NumIterations == 1) {
      text.append("No boosting possible, one classifier used: Weight = " 
          + Utils.roundDouble(m_Beta[0], 2)+"\n");
      text.append("Base classifiers:\n"+m_Models[0].toString());
    } else {
      text.append("Base classifiers and their weights: \n");
      for (int i = 0; i < m_NumIterations ; i++) {
        text.append("\n\n"+i+": Weight = " + Utils.roundDouble(m_Beta[i], 2)
            +"\nBase classifier:\n"+m_Models[i].toString() );
      }
    }

    text.append("\n\nNumber of performed Iterations: " 
        + m_NumIterations + "\n");

    return text.toString();
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.6 $");
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain the command line arguments to the
   * scheme (see Evaluation)
   */
  public static void main(String[] argv) {
    runClassifier(new MIBoost(), argv);
  }
}