/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * MILES.java * Copyright (C) 2008-09 University of Waikato, Hamilton, New Zealand */ package weka.filters.unsupervised.attribute; import weka.core.*; import weka.core.Capabilities.Capability; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; import weka.filters.SimpleBatchFilter; import weka.filters.UnsupervisedFilter; import java.util.Enumeration; import java.util.LinkedList; /** <!-- globalinfo-start --> * Implements the MILES transformation that maps multiple instance bags into a high-dimensional single-instance feature space.<br/> * For more information see:<br/> * <br/> * Y. Chen, J. Bi, J.Z. Wang (2006). MILES: Multiple-instance learning via embedded instance selection. IEEE PAMI. 28(12):1931-1947.<br/> * <br/> * James Foulds, Eibe Frank: Revisiting multiple-instance learning via embedded instance selection. In: 21st Australasian Joint Conference on Artificial Intelligence, 300-310, 2008. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Chen2006, * author = {Y. Chen and J. Bi and J.Z. Wang}, * journal = {IEEE PAMI}, * number = {12}, * pages = {1931-1947}, * title = {MILES: Multiple-instance learning via embedded instance selection}, * volume = {28}, * year = {2006} * } * * @inproceedings{Foulds2008, * author = {James Foulds and Eibe Frank}, * booktitle = {21st Australasian Joint Conference on Artificial Intelligence}, * pages = {300-310}, * publisher = {Springer}, * title = {Revisiting multiple-instance learning via embedded instance selection}, * year = {2008} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the sigma parameter (default: sqrt(800000)</pre> * <!-- options-end --> * * @author Jimmy Foulds * @author Eibe Frank * @version $Revision: 5987 $ */ public class MILESFilter extends SimpleBatchFilter implements UnsupervisedFilter, OptionHandler, TechnicalInformationHandler { /** For serialization */ static final long serialVersionUID = 4694489111366063853L; /** Index of bag attribute */ public static final int BAG_ATTRIBUTE = 1; /** Index of label attribute */ public static final int LABEL_ATTRIBUTE = 2; /** Sigma parameter (default: square root of 800000) */ private double m_sigma = Math.sqrt(800000); /** Linked list of all instances collected */ private LinkedList<Instance> m_allInsts = null; /** * Returns the tip text for this property */ public String sigmaTipText() { return "The value of the sigma parameter."; } /** * Sets the sigma parameter. */ public void setSigma(double sigma) { m_sigma = sigma; } /** * Gets the sigma parameter. */ public double getSigma() { return m_sigma; } /** * Global info for the filter. */ public String globalInfo() { return "Implements the MILES transformation that maps multiple instance bags into" + " a high-dimensional single-instance feature space." + "\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Y. Chen and J. Bi and J.Z. Wang"); result.setValue(Field.TITLE, "MILES: Multiple-instance learning via embedded instance selection"); result.setValue(Field.JOURNAL, "IEEE PAMI"); result.setValue(Field.YEAR, "2006"); result.setValue(Field.VOLUME, "28"); result.setValue(Field.PAGES, "1931-1947"); result.setValue(Field.NUMBER, "12"); additional = result.add(Type.INPROCEEDINGS); additional.setValue(Field.AUTHOR, "James Foulds and Eibe Frank"); additional.setValue(Field.TITLE, "Revisiting multiple-instance learning via embedded instance selection"); additional.setValue(Field.BOOKTITLE, "21st Australasian Joint Conference on Artificial Intelligence"); additional.setValue(Field.YEAR, "2008"); additional.setValue(Field.PAGES, "300-310"); additional.setValue(Field.PUBLISHER, "Springer"); return result; } /** * Capabilities for the filter. */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Determines the output format for the filter. */ protected Instances determineOutputFormat(Instances inputFormat) { // Create attributes FastVector atts = new FastVector(); m_allInsts = new LinkedList<Instance>(); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instances bag = getInputFormat().instance(i).relationalValue(BAG_ATTRIBUTE); for (int j = 0; j < bag.numInstances(); j++) { m_allInsts.add(bag.instance(j)); } } for (int i = 0; i < m_allInsts.size(); i++) { atts.addElement(new Attribute("" + i)); } atts.addElement(inputFormat.attribute(LABEL_ATTRIBUTE)); //class //TODO set relation name properly Instances returner = new Instances("", atts, 0); returner.setClassIndex(returner.numAttributes() - 1); return returner; } /** * Processes a set of instances. */ protected Instances process(Instances inst) { // Get instances object with correct output format Instances result = getOutputFormat(); result.setClassIndex(result.numAttributes() - 1); // Can't do much if bag is empty if (inst.numInstances() == 0) { return result; } // Go through all the instances in the bag to be transformed for (int i = 0; i < inst.numInstances(); i++) //for every bag { // Allocate memory for instance double[] outputInstance = new double[result.numAttributes()]; // Get the bag Instances bag = inst.instance(i).relationalValue(BAG_ATTRIBUTE); int k = 0; for (Instance x_k : m_allInsts) //for every instance in every bag { //TODO handle empty bags double dSquared = Double.MAX_VALUE; for (int j = 0; j < bag.numInstances(); j++) //for every instance in the current bag { // Compute sum of squared differences double total = 0; Instance x_ij = bag.instance(j); double numMissingValues = 0; for (int l = 0; l < x_k.numAttributes(); l++) //for every attribute { // Can skip missing values in reference instance if (x_k.isMissing(l)) { continue; } // Need to keep track of how many values in current instance are missing if (!x_ij.isMissing(l)) { total += (x_ij.value(l) - x_k.value(l)) * (x_ij.value(l) - x_k.value(l)); } else { numMissingValues++; } } // Adjust for missing values total *= x_k.numAttributes() / (x_k.numAttributes() - numMissingValues); // Update minimum if (total < dSquared || dSquared == Double.MAX_VALUE) { dSquared = total; } } if (dSquared == Double.MAX_VALUE) outputInstance[k] = 0; //TODO is this ok? else outputInstance[k] = Math.exp(-1.0 * dSquared / (m_sigma * m_sigma)); k++; } // Set class label double label = inst.instance(i).value(LABEL_ATTRIBUTE); outputInstance[outputInstance.length - 1] = label; // Add instance to result result.add(new DenseInstance(inst.instance(i).weight(), outputInstance)); } return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { FastVector newVector = new FastVector(1); newVector.addElement(new Option( "\tSpecify the sigma parameter (default: sqrt(800000)", "S", 1, "-S <num>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the sigma parameter (default: sqrt(800000)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String sigmaString = Utils.getOption('S', options); if (sigmaString.length() != 0) { setSigma(Double.parseDouble(sigmaString)); } else { setSigma(Math.sqrt(800000)); } } /** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [2]; int current = 0; options[current++] = "-S"; options[current++] = "" + getSigma(); while (current < options.length) { options[current++] = ""; } return options; } public static void main(String[] args) { runFilter(new MILESFilter(), args); } public String getRevision() { return RevisionUtils.extract("$Revision: 5987 $"); } }