/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * MIWrapper.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */ package weka.classifiers.mi; import weka.classifiers.SingleClassifierEnhancer; import weka.core.Capabilities; import weka.core.Instance; import weka.core.Instances; import weka.core.MultiInstanceCapabilitiesHandler; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.Tag; import weka.core.TechnicalInformation; import weka.core.TechnicalInformationHandler; import weka.core.Utils; import weka.core.Capabilities.Capability; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; import weka.filters.Filter; import weka.filters.unsupervised.attribute.MultiInstanceToPropositional; import java.util.Enumeration; import java.util.Vector; /** <!-- globalinfo-start --> * A simple Wrapper method for applying standard propositional learners to multi-instance data.<br/> * <br/> * For more information see:<br/> * <br/> * E. T. Frank, X. Xu (2003). Applying propositional learning algorithms to multi-instance data. Department of Computer Science, University of Waikato, Hamilton, NZ. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @techreport{Frank2003, * address = {Department of Computer Science, University of Waikato, Hamilton, NZ}, * author = {E. T. Frank and X. Xu}, * institution = {University of Waikato}, * month = {06}, * title = {Applying propositional learning algorithms to multi-instance data}, * year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -P [1|2|3] * The method used in testing: * 1.arithmetic average * 2.geometric average * 3.max probability of positive bag. * (default: 1)</pre> * * <pre> -A [0|1|2|3] * The type of weight setting for each single-instance: * 0.keep the weight to be the same as the original value; * 1.weight = 1.0 * 2.weight = 1.0/Total number of single-instance in the * corresponding bag * 3. weight = Total number of single-instance / (Total * number of bags * Total number of single-instance * in the corresponding bag). * (default: 3)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $ */ public class MIWrapper extends SingleClassifierEnhancer implements MultiInstanceCapabilitiesHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -7707766152904315910L; /** The number of the class labels */ protected int m_NumClasses; /** arithmetic average */ public static final int TESTMETHOD_ARITHMETIC = 1; /** geometric average */ public static final int TESTMETHOD_GEOMETRIC = 2; /** max probability of positive bag */ public static final int TESTMETHOD_MAXPROB = 3; /** the test methods */ public static final Tag[] TAGS_TESTMETHOD = { new Tag(TESTMETHOD_ARITHMETIC, "arithmetic average"), new Tag(TESTMETHOD_GEOMETRIC, "geometric average"), new Tag(TESTMETHOD_MAXPROB, "max probability of positive bag") }; /** the test method */ protected int m_Method = TESTMETHOD_GEOMETRIC; /** Filter used to convert MI dataset into single-instance dataset */ protected MultiInstanceToPropositional m_ConvertToProp = new MultiInstanceToPropositional(); /** the single-instance weight setting method */ protected int m_WeightMethod = MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A simple Wrapper method for applying standard propositional learners " + "to multi-instance data.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "E. T. Frank and X. Xu"); result.setValue(Field.TITLE, "Applying propositional learning algorithms to multi-instance data"); result.setValue(Field.YEAR, "2003"); result.setValue(Field.MONTH, "06"); result.setValue(Field.INSTITUTION, "University of Waikato"); result.setValue(Field.ADDRESS, "Department of Computer Science, University of Waikato, Hamilton, NZ"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tThe method used in testing:\n" + "\t1.arithmetic average\n" + "\t2.geometric average\n" + "\t3.max probability of positive bag.\n" + "\t(default: 1)", "P", 1, "-P [1|2|3]")); result.addElement(new Option( "\tThe type of weight setting for each single-instance:\n" + "\t0.keep the weight to be the same as the original value;\n" + "\t1.weight = 1.0\n" + "\t2.weight = 1.0/Total number of single-instance in the\n" + "\t\tcorresponding bag\n" + "\t3. weight = Total number of single-instance / (Total\n" + "\t\tnumber of bags * Total number of single-instance \n" + "\t\tin the corresponding bag).\n" + "\t(default: 3)", "A", 1, "-A [0|1|2|3]")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { result.addElement(enu.nextElement()); } return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -P [1|2|3] * The method used in testing: * 1.arithmetic average * 2.geometric average * 3.max probability of positive bag. * (default: 1)</pre> * * <pre> -A [0|1|2|3] * The type of weight setting for each single-instance: * 0.keep the weight to be the same as the original value; * 1.weight = 1.0 * 2.weight = 1.0/Total number of single-instance in the * corresponding bag * 3. weight = Total number of single-instance / (Total * number of bags * Total number of single-instance * in the corresponding bag). * (default: 3)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String methodString = Utils.getOption('P', options); if (methodString.length() != 0) { setMethod( new SelectedTag(Integer.parseInt(methodString), TAGS_TESTMETHOD)); } else { setMethod( new SelectedTag(TESTMETHOD_ARITHMETIC, TAGS_TESTMETHOD)); } String weightString = Utils.getOption('A', options); if (weightString.length() != 0) { setWeightMethod( new SelectedTag( Integer.parseInt(weightString), MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } else { setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } super.setOptions(options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); result.add("-P"); result.add("" + m_Method); result.add("-A"); result.add("" + m_WeightMethod); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); return (String[]) result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String weightMethodTipText() { return "The method used for weighting the instances."; } /** * The new method for weighting the instances. * * @param method the new method */ public void setWeightMethod(SelectedTag method){ if (method.getTags() == MultiInstanceToPropositional.TAGS_WEIGHTMETHOD) m_WeightMethod = method.getSelectedTag().getID(); } /** * Returns the current weighting method for instances. * * @return the current weighting method */ public SelectedTag getWeightMethod(){ return new SelectedTag( m_WeightMethod, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String methodTipText() { return "The method used for testing."; } /** * Set the method used in testing. * * @param method the index of method to use. */ public void setMethod(SelectedTag method) { if (method.getTags() == TAGS_TESTMETHOD) m_Method = method.getSelectedTag().getID(); } /** * Get the method used in testing. * * @return the index of method used in testing. */ public SelectedTag getMethod() { return new SelectedTag(m_Method, TAGS_TESTMETHOD); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.disableAllClassDependencies(); if (super.getCapabilities().handles(Capability.NOMINAL_CLASS)) result.enable(Capability.NOMINAL_CLASS); if (super.getCapabilities().handles(Capability.BINARY_CLASS)) result.enable(Capability.BINARY_CLASS); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance classifier for the * relational data. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getMultiInstanceCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.enable(Capability.NO_CLASS); return result; } /** * Builds the classifier * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances train = new Instances(data); train.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (getDebug()) System.out.println("Start training ..."); m_NumClasses = train.numClasses(); //convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod(getWeightMethod()); m_ConvertToProp.setInputFormat(train); train = Filter.useFilter(train, m_ConvertToProp); train.deleteAttributeAt(0); // remove the bag index attribute m_Classifier.buildClassifier(train); } /** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { Instances testData = new Instances (exmp.dataset(),0); testData.add(exmp); // convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); testData = Filter.useFilter(testData, m_ConvertToProp); testData.deleteAttributeAt(0); //remove the bag index attribute // Compute the log-probability of the bag double [] distribution = new double[m_NumClasses]; double nI = (double)testData.numInstances(); double [] maxPr = new double [m_NumClasses]; for(int i=0; i<nI; i++){ double[] dist = m_Classifier.distributionForInstance(testData.instance(i)); for(int j=0; j<m_NumClasses; j++){ switch(m_Method){ case TESTMETHOD_ARITHMETIC: distribution[j] += dist[j]/nI; break; case TESTMETHOD_GEOMETRIC: // Avoid 0/1 probability if(dist[j]<0.001) dist[j] = 0.001; else if(dist[j]>0.999) dist[j] = 0.999; distribution[j] += Math.log(dist[j])/nI; break; case TESTMETHOD_MAXPROB: if (dist[j]>maxPr[j]) maxPr[j] = dist[j]; break; } } } if(m_Method == TESTMETHOD_GEOMETRIC) for(int j=0; j<m_NumClasses; j++) distribution[j] = Math.exp(distribution[j]); if(m_Method == TESTMETHOD_MAXPROB){ // for positive bag distribution[1] = maxPr[1]; distribution[0] = 1 - distribution[1]; } if (Utils.eq(Utils.sum(distribution), 0)) { for (int i = 0; i < distribution.length; i++) distribution[i] = 1.0 / (double) distribution.length; } else { Utils.normalize(distribution); } return distribution; } /** * Gets a string describing the classifier. * * @return a string describing the classifer built. */ public String toString() { return "MIWrapper with base classifier: \n"+m_Classifier.toString(); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.5 $"); } /** * Main method for testing this class. * * @param argv should contain the command line arguments to the * scheme (see Evaluation) */ public static void main(String[] argv) { runClassifier(new MIWrapper(), argv); } }