PartitionedMultiFilter.java example

Explorer
TimeSeriesClassification-master
- TimeSeriesClassification
  - src
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * PartitionedMultiFilter.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.filters.AllFilter;
import weka.filters.Filter;
import weka.filters.SimpleBatchFilter;

/**
 <!-- globalinfo-start -->
 * A filter that applies filters on subsets of attributes and assembles the output into a new dataset. Attributes that are not covered by any of the ranges can be either retained or removed from the output.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre> -D
 *  Turns on output of debugging information.</pre>
 *
 * <pre> -F <classname [options]>
 *  A filter to apply (can be specified multiple times).</pre>
 *
 * <pre> -R <range>
 *  An attribute range (can be specified multiple times).
 *  For each filter a range must be supplied. 'first' and 'last'
 *  are valid indices. 'inv(...)' around the range denotes an
 *  inverted range.</pre>
 *
 * <pre> -U
 *  Flag for leaving unused attributes out of the output, by default
 *  these are included in the filter output.</pre>
 *
 <!-- options-end -->
 *
 * @author  FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 8034 $
 * @see     weka.filters.StreamableFilter
 */
public class PartitionedMultiFilter
  extends SimpleBatchFilter {

  /** for serialization. */
  private static final long serialVersionUID = -6293720886005713120L;

  /** The filters. */
  protected Filter m_Filters[] = {new AllFilter()};

  /** The attribute ranges. */
  protected Range m_Ranges[] = {new Range("first-last")};

  /** Whether unused attributes are left out of the output. */
  protected boolean m_RemoveUnused = false;

  /** the indices of the unused attributes. */
  protected int[] m_IndicesUnused = new int[0];

  /**
   * Returns a string describing this filter.
   * @return 		a description of the filter suitable for
   * 			displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return
        "A filter that applies filters on subsets of attributes and "
      + "assembles the output into a new dataset. Attributes that are "
      + "not covered by any of the ranges can be either retained or removed "
      + "from the output.";
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return 		an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector result = new Vector();
    Enumeration enm = super.listOptions();
    while (enm.hasMoreElements())
      result.add(enm.nextElement());

    result.addElement(new Option(
        "\tA filter to apply (can be specified multiple times).",
        "F", 1, "-F <classname [options]>"));

    result.addElement(new Option(
        "\tAn attribute range (can be specified multiple times).\n"
	+ "\tFor each filter a range must be supplied. 'first' and 'last'\n"
	+ "\tare valid indices. 'inv(...)' around the range denotes an\n"
	+ "\tinverted range.",
        "R", 1, "-R <range>"));

    result.addElement(new Option(
        "\tFlag for leaving unused attributes out of the output, by default\n"
	+ "\tthese are included in the filter output.",
        "U", 0, "-U"));

    return result.elements();
  }

  /**
   * Parses a list of options for this object. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -D
   *  Turns on output of debugging information.</pre>
   *
   * <pre> -F <classname [options]>
   *  A filter to apply (can be specified multiple times).</pre>
   *
   * <pre> -R <range>
   *  An attribute range (can be specified multiple times).
   *  For each filter a range must be supplied. 'first' and 'last'
   *  are valid indices. 'inv(...)' around the range denotes an
   *  inverted range.</pre>
   *
   * <pre> -U
   *  Flag for leaving unused attributes out of the output, by default
   *  these are included in the filter output.</pre>
   *
   <!-- options-end -->
   *
   * @param options 	the list of options as an array of strings
   * @throws Exception 	if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String	tmpStr;
    String	classname;
    String[]	options2;
    Vector	objects;
    Range	range;

    super.setOptions(options);

    setRemoveUnused(Utils.getFlag("U", options));

    objects = new Vector();
    while ((tmpStr = Utils.getOption("F", options)).length() != 0) {
      options2    = Utils.splitOptions(tmpStr);
      classname      = options2[0];
      options2[0] = "";
      objects.add(Utils.forName(Filter.class, classname, options2));
    }

    // at least one filter
    if (objects.size() == 0)
      objects.add(new AllFilter());

    setFilters((Filter[]) objects.toArray(new Filter[objects.size()]));

    objects = new Vector();
    while ((tmpStr = Utils.getOption("R", options)).length() != 0) {
      if (tmpStr.startsWith("inv(") && tmpStr.endsWith(")")) {
	range = new Range(tmpStr.substring(4, tmpStr.length() - 1));
	range.setInvert(true);
      }
      else {
	range = new Range(tmpStr);
      }
      objects.add(range);
    }

    // at least one Range
    if (objects.size() == 0)
      objects.add(new Range("first-last"));

    setRanges((Range[]) objects.toArray(new Range[objects.size()]));

    // is number of filters the same as ranges?
    checkDimensions();
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return 		an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector	result;
    String[]	options;
    int		i;
    String	tmpStr;

    result = new Vector();

    options = super.getOptions();
    for (i = 0; i < options.length; i++)
      result.add(options[i]);

    if (getRemoveUnused())
      result.add("-U");

    for (i = 0; i < getFilters().length; i++) {
      result.add("-F");
      result.add(getFilterSpec(getFilter(i)));
    }

    for (i = 0; i < getRanges().length; i++) {
      tmpStr = getRange(i).getRanges();
      if (getRange(i).getInvert())
	tmpStr = "inv(" + tmpStr + ")";
      result.add("-R");
      result.add(tmpStr);
    }

    return (String[]) result.toArray(new String[result.size()]);
  }

  /**
   * checks whether the dimensions of filters and ranges fit together.
   *
   * @throws Exception	if dimensions differ
   */
  protected void checkDimensions() throws Exception {
    if (getFilters().length != getRanges().length)
      throw new IllegalArgumentException(
	  "Number of filters (= " + getFilters().length + ") "
	  + "and ranges (= " + getRanges().length + ") don't match!");
  }

  /**
   * tests the data whether the filter can actually handle it.
   *
   * @param instanceInfo	the data to test
   * @throws Exception		if the test fails
   */
  protected void testInputFormat(Instances instanceInfo) throws Exception {
    for (int i = 0; i < getRanges().length; i++) {
      Instances newi = new Instances(instanceInfo, 0);
      if (instanceInfo.size() > 0){
	newi.add((Instance)instanceInfo.get(0).copy());
      }
      Range range = getRanges()[i];
      range.setUpper(instanceInfo.numAttributes() - 1);
      Instances subset = generateSubset(newi, range);
      getFilters()[i].setInputFormat(subset);
    }
  }

  /**
   * Sets whether unused attributes (ones that are not covered by any of the
   * ranges) are removed from the output.
   *
   * @param value	if true then the unused attributes get removed
   */
  public void setRemoveUnused(boolean value) {
    m_RemoveUnused = value;
  }

  /**
   * Gets whether unused attributes (ones that are not covered by any of the
   * ranges) are removed from the output.
   *
   * @return		true if unused attributes are removed
   */
  public boolean getRemoveUnused() {
    return m_RemoveUnused;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return    	tip text for this property suitable for
   *            	displaying in the explorer/experimenter gui
   */
  public String removeUnusedTipText() {
    return
        "If true then unused attributes (ones that are not covered by any "
      + "of the ranges) will be removed from the output.";
  }

  /**
   * Sets the list of possible filters to choose from.
   * Also resets the state of the filter (this reset doesn't affect the
   * options).
   *
   * @param filters	an array of filters with all options set.
   * @see #reset()
   */
  public void setFilters(Filter[] filters) {
    m_Filters = filters;
    reset();
  }

  /**
   * Gets the list of possible filters to choose from.
   *
   * @return 		the array of Filters
   */
  public Filter[] getFilters() {
    return m_Filters;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return    	tip text for this property suitable for
   *            	displaying in the explorer/experimenter gui
   */
  public String filtersTipText() {
    return "The base filters to be used.";
  }

  /**
   * Gets a single filter from the set of available filters.
   *
   * @param index 	the index of the filter wanted
   * @return 		the Filter
   */
  public Filter getFilter(int index) {
    return m_Filters[index];
  }

  /**
   * returns the filter classname and the options as one string.
   *
   * @param filter	the filter to get the specs for
   * @return		the classname plus options
   */
  protected String getFilterSpec(Filter filter) {
    String        result;

    if (filter == null) {
      result = "";
    }
    else {
      result  = filter.getClass().getName();
      if (filter instanceof OptionHandler)
        result += " "
          + Utils.joinOptions(((OptionHandler) filter).getOptions());
    }

    return result;
  }

  /**
   * Sets the list of possible Ranges to choose from.
   * Also resets the state of the Range (this reset doesn't affect the
   * options).
   *
   * @param Ranges	an array of Ranges with all options set.
   * @see #reset()
   */
  public void setRanges(Range[] Ranges) {
    m_Ranges = Ranges;
    reset();
  }

  /**
   * Gets the list of possible Ranges to choose from.
   *
   * @return 		the array of Ranges
   */
  public Range[] getRanges() {
    return m_Ranges;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return    	tip text for this property suitable for
   *            	displaying in the explorer/experimenter gui
   */
  public String rangesTipText() {
    return "The attribute ranges to be used; 'inv(...)' denotes an inverted range.";
  }

  /**
   * Gets a single Range from the set of available Ranges.
   *
   * @param index 	the index of the Range wanted
   * @return 		the Range
   */
  public Range getRange(int index) {
    return m_Ranges[index];
  }

  /**
   * determines the indices of unused attributes (ones that are not covered
   * by any of the range).
   *
   * @param data	the data to base the determination on
   * @see 		#m_IndicesUnused
   */
  protected void determineUnusedIndices(Instances data) {
    Vector<Integer>	indices;
    int			i;
    int			n;
    boolean		covered;

    // traverse all ranges
    indices = new Vector<Integer>();
    for (i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex())
	continue;

      covered = false;
      for (n = 0; n < getRanges().length; n++) {
	if (getRanges()[n].isInRange(i)) {
	  covered = true;
	  break;
	}
      }

      if (!covered)
	indices.add(new Integer(i));
    }

    // create array
    m_IndicesUnused = new int[indices.size()];
    for (i = 0; i < indices.size(); i++)
      m_IndicesUnused[i] = indices.get(i).intValue();

    if (getDebug())
      System.out.println(
	  "Unused indices: " + Utils.arrayToString(m_IndicesUnused));
  }

  /**
   * generates a subset of the dataset with only the attributes from the range
   * (class is always added if present).
   *
   * @param data	the data to work on
   * @param range	the range of attribute to use
   * @return		the generated subset
   * @throws Exception	if creation fails
   */
  protected Instances generateSubset(Instances data, Range range) throws Exception {
    Remove		filter;
    StringBuilder	atts;
    Instances		result;
    int[]		indices;
    int			i;

    // determine attributes
    indices = range.getSelection();
    atts    = new StringBuilder();
    for (i = 0; i < indices.length; i++) {
      if (i > 0)
	atts.append(",");
      atts.append("" + (indices[i] + 1));
    }
    if ((data.classIndex() > -1) && (!range.isInRange(data.classIndex())))
      atts.append("," + (data.classIndex() + 1));

    // setup filter
    filter = new Remove();
    filter.setAttributeIndices(atts.toString());
    filter.setInvertSelection(true);
    filter.setInputFormat(data);

    // generate output
    result = Filter.useFilter(data, filter);

    return result;
  }

  /**
   * renames all the attributes in the dataset (excluding the class if present)
   * by adding the prefix to the name.
   *
   * @param data	the data to work on
   * @param prefix	the prefix for the attributes
   * @return		a copy of the data with the attributes renamed
   * @throws Exception	if renaming fails
   */
  protected Instances renameAttributes(Instances data, String prefix) throws Exception {
    Instances			result;
    int				i;
    ArrayList<Attribute>	atts;

    // rename attributes
    atts = new ArrayList<Attribute>();
    for (i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex())
	atts.add((Attribute) data.attribute(i).copy());
      else
	atts.add(data.attribute(i).copy(prefix + data.attribute(i).name()));
    }

    // create new dataset
    result = new Instances(data.relationName(), atts, data.numInstances());
    for (i = 0; i < data.numInstances(); i++) {
      result.add((Instance) data.instance(i).copy());
    }

    // set class if present
    if (data.classIndex() > -1)
      result.setClassIndex(data.classIndex());

    return result;
  }

  /**
   * Determines the output format based only on the full input dataset and
   * returns this otherwise null is returned. In case the output format cannot
   * be returned immediately, i.e., immediateOutputFormat() returns false,
   * then this method will be called from batchFinished().
   *
   * @param inputFormat     the input format to base the output format on
   * @return                the output format
   * @throws Exception      in case the determination goes wrong
   * @see                   #hasImmediateOutputFormat()
   * @see                   #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances   		result;
    Instances			processed;
    int         		i;
    int				n;
    ArrayList<Attribute>	atts;
    Attribute			att;

    if (!isFirstBatchDone()) {
      // we need the full dataset here, see process(Instances)
      if (inputFormat.numInstances() == 0)
	return null;

      checkDimensions();

      // determine unused indices
      determineUnusedIndices(inputFormat);

      atts = new ArrayList<Attribute>();
      for (i = 0; i < getFilters().length; i++) {
	if (!isFirstBatchDone()) {
	  // generate subset
	  processed = generateSubset(inputFormat, getRange(i));
	  // set input format
	  if (!getFilter(i).setInputFormat(processed))
	    Filter.useFilter(processed, getFilter(i));
	}

	// get output format
	processed = getFilter(i).getOutputFormat();

	// rename attributes
	processed = renameAttributes(processed, "filtered-" + i + "-");

	// add attributes
	for (n = 0; n < processed.numAttributes(); n++) {
	  if (n == processed.classIndex())
	    continue;
	  atts.add((Attribute) processed.attribute(n).copy());
	}
      }

      // add unused attributes
      if (!getRemoveUnused()) {
	for (i = 0; i < m_IndicesUnused.length; i++) {
	  att = inputFormat.attribute(m_IndicesUnused[i]);
	  atts.add(att.copy("unfiltered-" + att.name()));
	}
      }

      // add class if present
      if (inputFormat.classIndex() > -1)
	atts.add((Attribute) inputFormat.classAttribute().copy());

      // generate new dataset
      result = new Instances(inputFormat.relationName(), atts, 0);
      if (inputFormat.classIndex() > -1)
	result.setClassIndex(result.numAttributes() - 1);
    }
    else {
      result = getOutputFormat();
    }

    return result;
  }

  /**
   * Processes the given data (may change the provided dataset) and returns
   * the modified version. This method is called in batchFinished().
   *
   * @param instances   the data to process
   * @return            the modified data
   * @throws Exception  in case the processing goes wrong
   * @see               #batchFinished()
   */
  public Instances process(Instances instances) throws Exception {
    Instances		result;
    int        		i;
    int			n;
    int			m;
    int			index;
    Instances[]		processed;
    Instance		inst;
    Instance		newInst;
    double[]		values;
    Vector		errors;

    if (!isFirstBatchDone()) {
      checkDimensions();

      // set upper limits
      for (i = 0; i < m_Ranges.length; i++)
	m_Ranges[i].setUpper(instances.numAttributes() - 1);

      // determine unused indices
      determineUnusedIndices(instances);
    }

    // pass data through all filters
    processed = new Instances[getFilters().length];
    for (i = 0; i < getFilters().length; i++) {
      processed[i] = generateSubset(instances, getRange(i));
      if (!isFirstBatchDone())
	getFilter(i).setInputFormat(processed[i]);
      processed[i] = Filter.useFilter(processed[i], getFilter(i));
    }

    // set output format (can only be determined with full dataset, hence here)
    if (!isFirstBatchDone()) {
      result = determineOutputFormat(instances);
      setOutputFormat(result);
    }
    else {
      result = getOutputFormat();
    }

    // check whether all filters didn't change the number of instances
    errors = new Vector();
    for (i = 0; i < processed.length; i++) {
      if (processed[i].numInstances() != instances.numInstances())
	errors.add(new Integer(i));
    }
    if (errors.size() > 0)
      throw new IllegalStateException(
	  "The following filter(s) changed the number of instances: " + errors);

    // assemble data
    for (i = 0; i < instances.numInstances(); i++) {
      inst   = instances.instance(i);
      values = new double[result.numAttributes()];

      // filtered data
      index = 0;
      for (n = 0; n < processed.length; n++) {
	for (m = 0; m < processed[n].numAttributes(); m++) {
	  if (m == processed[n].classIndex())
	    continue;
	  if (result.attribute(index).isString())
	    values[index] = result.attribute(index).addStringValue(processed[n].instance(i).stringValue(m));
	  else if (result.attribute(index).isRelationValued())
	    values[index] = result.attribute(index).addRelation(processed[n].instance(i).relationalValue(m));
	  else
	    values[index] = processed[n].instance(i).value(m);
	  index++;
	}
      }

      // unused attributes
      if (!getRemoveUnused()) {
	for (n = 0; n < m_IndicesUnused.length; n++) {
	  if (result.attribute(index).isString())
	    values[index] = result.attribute(index).addStringValue(inst.stringValue(m_IndicesUnused[n]));
	  else if (result.attribute(index).isRelationValued())
	    values[index] = result.attribute(index).addRelation(inst.relationalValue(m_IndicesUnused[n]));
	  else
	    values[index] = inst.value(m_IndicesUnused[n]);
	  index++;
	}
      }

      // class
      if (instances.classIndex() > -1)
	values[values.length - 1] = inst.value(instances.classIndex());

      // generate and add instance
      if (inst instanceof SparseInstance)
	newInst = new SparseInstance(instances.instance(i).weight(), values);
      else
	newInst = new DenseInstance(instances.instance(i).weight(), values);
      result.add(newInst);
    }

    return result;
  }

  /**
   * Returns the revision string.
   *
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 8034 $");
  }

  /**
   * Main method for executing this class.
   *
   * @param args should contain arguments for the filter: use -h for help
   */
  public static void main(String[] args) {
    runFilter(new PartitionedMultiFilter(), args);
  }
}