/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MIBoost.java
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.mi;
import weka.classifiers.Classifier;
import weka.classifiers.SingleClassifierEnhancer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Optimization;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Discretize;
import weka.filters.unsupervised.attribute.MultiInstanceToPropositional;
import java.util.Enumeration;
import java.util.Vector;
/**
<!-- globalinfo-start -->
* MI AdaBoost method, considers the geometric mean of posterior of instances inside a bag (arithmatic mean of log-posterior) and the expectation for a bag is taken inside the loss function.<br/>
* <br/>
* For more information about Adaboost, see:<br/>
* <br/>
* Yoav Freund, Robert E. Schapire: Experiments with a new boosting algorithm. In: Thirteenth International Conference on Machine Learning, San Francisco, 148-156, 1996.
* <p/>
<!-- globalinfo-end -->
*
<!-- technical-bibtex-start -->
* BibTeX:
* <pre>
* @inproceedings{Freund1996,
* address = {San Francisco},
* author = {Yoav Freund and Robert E. Schapire},
* booktitle = {Thirteenth International Conference on Machine Learning},
* pages = {148-156},
* publisher = {Morgan Kaufmann},
* title = {Experiments with a new boosting algorithm},
* year = {1996}
* }
* </pre>
* <p/>
<!-- technical-bibtex-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* Turn on debugging output.</pre>
*
* <pre> -B <num>
* The number of bins in discretization
* (default 0, no discretization)</pre>
*
* <pre> -R <num>
* Maximum number of boost iterations.
* (default 10)</pre>
*
* <pre> -W <class name>
* Full name of classifier to boost.
* eg: weka.classifiers.bayes.NaiveBayes</pre>
*
* <pre> -D
* If set, classifier is run in debug mode and
* may output additional info to the console</pre>
*
<!-- options-end -->
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @author Xin Xu (xx5@cs.waikato.ac.nz)
* @version $Revision: 1.6 $
*/
public class MIBoost
extends SingleClassifierEnhancer
implements OptionHandler, MultiInstanceCapabilitiesHandler,
TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = -3808427225599279539L;
/** the models for the iterations */
protected Classifier[] m_Models;
/** The number of the class labels */
protected int m_NumClasses;
/** Class labels for each bag */
protected int[] m_Classes;
/** attributes name for the new dataset used to build the model */
protected Instances m_Attributes;
/** Number of iterations */
private int m_NumIterations = 100;
/** Voting weights of models */
protected double[] m_Beta;
/** the maximum number of boost iterations */
protected int m_MaxIterations = 10;
/** the number of discretization bins */
protected int m_DiscretizeBin = 0;
/** filter used for discretization */
protected Discretize m_Filter = null;
/** filter used to convert the MI dataset into single-instance dataset */
protected MultiInstanceToPropositional m_ConvertToSI = new MultiInstanceToPropositional();
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return
"MI AdaBoost method, considers the geometric mean of posterior "
+ "of instances inside a bag (arithmatic mean of log-posterior) and "
+ "the expectation for a bag is taken inside the loss function.\n\n"
+ "For more information about Adaboost, see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR, "Yoav Freund and Robert E. Schapire");
result.setValue(Field.TITLE, "Experiments with a new boosting algorithm");
result.setValue(Field.BOOKTITLE, "Thirteenth International Conference on Machine Learning");
result.setValue(Field.YEAR, "1996");
result.setValue(Field.PAGES, "148-156");
result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
result.setValue(Field.ADDRESS, "San Francisco");
return result;
}
/**
* Returns an enumeration describing the available options
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector result = new Vector();
result.addElement(new Option(
"\tTurn on debugging output.",
"D", 0, "-D"));
result.addElement(new Option(
"\tThe number of bins in discretization\n"
+ "\t(default 0, no discretization)",
"B", 1, "-B <num>"));
result.addElement(new Option(
"\tMaximum number of boost iterations.\n"
+ "\t(default 10)",
"R", 1, "-R <num>"));
result.addElement(new Option(
"\tFull name of classifier to boost.\n"
+ "\teg: weka.classifiers.bayes.NaiveBayes",
"W", 1, "-W <class name>"));
Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
while (enu.hasMoreElements()) {
result.addElement(enu.nextElement());
}
return result.elements();
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* Turn on debugging output.</pre>
*
* <pre> -B <num>
* The number of bins in discretization
* (default 0, no discretization)</pre>
*
* <pre> -R <num>
* Maximum number of boost iterations.
* (default 10)</pre>
*
* <pre> -W <class name>
* Full name of classifier to boost.
* eg: weka.classifiers.bayes.NaiveBayes</pre>
*
* <pre> -D
* If set, classifier is run in debug mode and
* may output additional info to the console</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setDebug(Utils.getFlag('D', options));
String bin = Utils.getOption('B', options);
if (bin.length() != 0) {
setDiscretizeBin(Integer.parseInt(bin));
} else {
setDiscretizeBin(0);
}
String boostIterations = Utils.getOption('R', options);
if (boostIterations.length() != 0) {
setMaxIterations(Integer.parseInt(boostIterations));
} else {
setMaxIterations(10);
}
super.setOptions(options);
}
/**
* Gets the current settings of the classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
result.add("-R");
result.add("" + getMaxIterations());
result.add("-B");
result.add("" + getDiscretizeBin());
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
return (String[]) result.toArray(new String[result.size()]);
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String maxIterationsTipText() {
return "The maximum number of boost iterations.";
}
/**
* Set the maximum number of boost iterations
*
* @param maxIterations the maximum number of boost iterations
*/
public void setMaxIterations(int maxIterations) {
m_MaxIterations = maxIterations;
}
/**
* Get the maximum number of boost iterations
*
* @return the maximum number of boost iterations
*/
public int getMaxIterations() {
return m_MaxIterations;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String discretizeBinTipText() {
return "The number of bins in discretization.";
}
/**
* Set the number of bins in discretization
*
* @param bin the number of bins in discretization
*/
public void setDiscretizeBin(int bin) {
m_DiscretizeBin = bin;
}
/**
* Get the number of bins in discretization
*
* @return the number of bins in discretization
*/
public int getDiscretizeBin() {
return m_DiscretizeBin;
}
private class OptEng
extends Optimization {
private double[] weights, errs;
public void setWeights(double[] w){
weights = w;
}
public void setErrs(double[] e){
errs = e;
}
/**
* Evaluate objective function
* @param x the current values of variables
* @return the value of the objective function
* @throws Exception if result is NaN
*/
protected double objectiveFunction(double[] x) throws Exception{
double obj=0;
for(int i=0; i<weights.length; i++){
obj += weights[i]*Math.exp(x[0]*(2.0*errs[i]-1.0));
if(Double.isNaN(obj))
throw new Exception("Objective function value is NaN!");
}
return obj;
}
/**
* Evaluate Jacobian vector
* @param x the current values of variables
* @return the gradient vector
* @throws Exception if gradient is NaN
*/
protected double[] evaluateGradient(double[] x) throws Exception{
double[] grad = new double[1];
for(int i=0; i<weights.length; i++){
grad[0] += weights[i]*(2.0*errs[i]-1.0)*Math.exp(x[0]*(2.0*errs[i]-1.0));
if(Double.isNaN(grad[0]))
throw new Exception("Gradient is NaN!");
}
return grad;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.6 $");
}
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.RELATIONAL_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.disableAllClasses();
result.disableAllClassDependencies();
if (super.getCapabilities().handles(Capability.BINARY_CLASS))
result.enable(Capability.BINARY_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
// other
result.enable(Capability.ONLY_MULTIINSTANCE);
return result;
}
/**
* Returns the capabilities of this multi-instance classifier for the
* relational data.
*
* @return the capabilities of this object
* @see Capabilities
*/
public Capabilities getMultiInstanceCapabilities() {
Capabilities result = super.getCapabilities();
// class
result.disableAllClasses();
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Builds the classifier
*
* @param exps the training data to be used for generating the
* boosted classifier.
* @throws Exception if the classifier could not be built successfully
*/
public void buildClassifier(Instances exps) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(exps);
// remove instances with missing class
Instances train = new Instances(exps);
train.deleteWithMissingClass();
m_NumClasses = train.numClasses();
m_NumIterations = m_MaxIterations;
if (m_Classifier == null)
throw new Exception("A base classifier has not been specified!");
if(!(m_Classifier instanceof WeightedInstancesHandler))
throw new Exception("Base classifier cannot handle weighted instances!");
m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
if(m_Debug)
System.err.println("Base classifier: "+m_Classifier.getClass().getName());
m_Beta = new double[m_NumIterations];
/* modified by Lin Dong. (use MIToSingleInstance filter to convert the MI datasets) */
//Initialize the bags' weights
double N = (double)train.numInstances(), sumNi=0;
for(int i=0; i<N; i++)
sumNi += train.instance(i).relationalValue(1).numInstances();
for(int i=0; i<N; i++){
train.instance(i).setWeight(sumNi/N);
}
//convert the training dataset into single-instance dataset
m_ConvertToSI.setInputFormat(train);
Instances data = Filter.useFilter( train, m_ConvertToSI);
data.deleteAttributeAt(0); //remove the bagIndex attribute;
// Assume the order of the instances are preserved in the Discretize filter
if(m_DiscretizeBin > 0){
m_Filter = new Discretize();
m_Filter.setInputFormat(new Instances(data, 0));
m_Filter.setBins(m_DiscretizeBin);
data = Filter.useFilter(data, m_Filter);
}
// Main algorithm
int dataIdx;
iterations:
for(int m=0; m < m_MaxIterations; m++){
if(m_Debug)
System.err.println("\nIteration "+m);
// Build a model
m_Models[m].buildClassifier(data);
// Prediction of each bag
double[] err=new double[(int)N], weights=new double[(int)N];
boolean perfect = true, tooWrong=true;
dataIdx = 0;
for(int n=0; n<N; n++){
Instance exn = train.instance(n);
// Prediction of each instance and the predicted class distribution
// of the bag
double nn = (double)exn.relationalValue(1).numInstances();
for(int p=0; p<nn; p++){
Instance testIns = data.instance(dataIdx++);
if((int)m_Models[m].classifyInstance(testIns)
!= (int)exn.classValue()) // Weighted instance-wise 0-1 errors
err[n] ++;
}
weights[n] = exn.weight();
err[n] /= nn;
if(err[n] > 0.5)
perfect = false;
if(err[n] < 0.5)
tooWrong = false;
}
if(perfect || tooWrong){ // No or 100% classification error, cannot find beta
if (m == 0)
m_Beta[m] = 1.0;
else
m_Beta[m] = 0;
m_NumIterations = m+1;
if(m_Debug) System.err.println("No errors");
break iterations;
}
double[] x = new double[1];
x[0] = 0;
double[][] b = new double[2][x.length];
b[0][0] = Double.NaN;
b[1][0] = Double.NaN;
OptEng opt = new OptEng();
opt.setWeights(weights);
opt.setErrs(err);
//opt.setDebug(m_Debug);
if (m_Debug)
System.out.println("Start searching for c... ");
x = opt.findArgmin(x, b);
while(x==null){
x = opt.getVarbValues();
if (m_Debug)
System.out.println("200 iterations finished, not enough!");
x = opt.findArgmin(x, b);
}
if (m_Debug)
System.out.println("Finished.");
m_Beta[m] = x[0];
if(m_Debug)
System.err.println("c = "+m_Beta[m]);
// Stop if error too small or error too big and ignore this model
if (Double.isInfinite(m_Beta[m])
|| Utils.smOrEq(m_Beta[m], 0)
) {
if (m == 0)
m_Beta[m] = 1.0;
else
m_Beta[m] = 0;
m_NumIterations = m+1;
if(m_Debug)
System.err.println("Errors out of range!");
break iterations;
}
// Update weights of data and class label of wfData
dataIdx=0;
double totWeights=0;
for(int r=0; r<N; r++){
Instance exr = train.instance(r);
exr.setWeight(weights[r]*Math.exp(m_Beta[m]*(2.0*err[r]-1.0)));
totWeights += exr.weight();
}
if(m_Debug)
System.err.println("Total weights = "+totWeights);
for(int r=0; r<N; r++){
Instance exr = train.instance(r);
double num = (double)exr.relationalValue(1).numInstances();
exr.setWeight(sumNi*exr.weight()/totWeights);
//if(m_Debug)
// System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
for(int s=0; s<num; s++){
Instance inss = data.instance(dataIdx);
inss.setWeight(exr.weight()/num);
// if(m_Debug)
// System.err.print("instance "+s+"="+inss.weight()+
// "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
if(Double.isNaN(inss.weight()))
throw new Exception("instance "+s+" in bag "+r+" has weight NaN!");
dataIdx++;
}
//if(m_Debug)
// System.err.println();
}
}
}
/**
* Computes the distribution for a given exemplar
*
* @param exmp the exemplar for which distribution is computed
* @return the classification
* @throws Exception if the distribution can't be computed successfully
*/
public double[] distributionForInstance(Instance exmp)
throws Exception {
double[] rt = new double[m_NumClasses];
Instances insts = new Instances(exmp.dataset(), 0);
insts.add(exmp);
// convert the training dataset into single-instance dataset
insts = Filter.useFilter( insts, m_ConvertToSI);
insts.deleteAttributeAt(0); //remove the bagIndex attribute
double n = insts.numInstances();
if(m_DiscretizeBin > 0)
insts = Filter.useFilter(insts, m_Filter);
for(int y=0; y<n; y++){
Instance ins = insts.instance(y);
for(int x=0; x<m_NumIterations; x++){
rt[(int)m_Models[x].classifyInstance(ins)] += m_Beta[x]/n;
}
}
for(int i=0; i<rt.length; i++)
rt[i] = Math.exp(rt[i]);
Utils.normalize(rt);
return rt;
}
/**
* Gets a string describing the classifier.
*
* @return a string describing the classifer built.
*/
public String toString() {
if (m_Models == null) {
return "No model built yet!";
}
StringBuffer text = new StringBuffer();
text.append("MIBoost: number of bins in discretization = "+m_DiscretizeBin+"\n");
if (m_NumIterations == 0) {
text.append("No model built yet.\n");
} else if (m_NumIterations == 1) {
text.append("No boosting possible, one classifier used: Weight = "
+ Utils.roundDouble(m_Beta[0], 2)+"\n");
text.append("Base classifiers:\n"+m_Models[0].toString());
} else {
text.append("Base classifiers and their weights: \n");
for (int i = 0; i < m_NumIterations ; i++) {
text.append("\n\n"+i+": Weight = " + Utils.roundDouble(m_Beta[i], 2)
+"\nBase classifier:\n"+m_Models[i].toString() );
}
}
text.append("\n\nNumber of performed Iterations: "
+ m_NumIterations + "\n");
return text.toString();
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.6 $");
}
/**
* Main method for testing this class.
*
* @param argv should contain the command line arguments to the
* scheme (see Evaluation)
*/
public static void main(String[] argv) {
runClassifier(new MIBoost(), argv);
}
}