/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* AddCluster.java
* Copyright (C) 2002 Richard Kirkby
*
*/
package weka.filters.unsupervised.attribute;
import weka.filters.*;
import weka.clusterers.Clusterer;
import weka.core.*;
import java.util.Enumeration;
import java.util.Vector;
/**
* A filter that adds a new nominal attribute representing the cluster assigned
* to each instance by the specified clustering algorithm.<p>
*
* Valid filter-specific options are: <p>
*
* -W clusterer string <br>
* Full class name of clusterer to use, followed by scheme options. (required)<p>
*
* -I range string <br>
* The range of attributes the clusterer should ignore.<p>
*
* @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class AddCluster extends Filter implements UnsupervisedFilter, OptionHandler {
/** The clusterer used to do the cleansing */
protected Clusterer m_Clusterer = new weka.clusterers.SimpleKMeans();
/** Range of attributes to ignore */
protected Range m_IgnoreAttributesRange = null;
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the inputFormat can't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
return false;
}
/**
* Signify that this batch of input to the filter is finished.
*
* @return true if there are instances pending output
* @exception IllegalStateException if no input structure has been defined
*/
public boolean batchFinished() throws Exception {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
Instances toFilter = getInputFormat();
Instances toFilterIgnoringAttributes = toFilter;
// filter out attributes if necessary
if (m_IgnoreAttributesRange != null) {
toFilterIgnoringAttributes = new Instances(toFilter);
Filter removeAttributes = new Remove();
((Remove)removeAttributes).setAttributeIndices(m_IgnoreAttributesRange.getRanges());
((Remove)removeAttributes).setInvertSelection(false);
removeAttributes.setInputFormat(toFilter);
for (int i = 0; i < toFilter.numInstances(); i++) {
removeAttributes.input(toFilter.instance(i));
}
removeAttributes.batchFinished();
toFilterIgnoringAttributes = removeAttributes.getOutputFormat();
Instance tempInst;
while ((tempInst = removeAttributes.output()) != null) {
toFilterIgnoringAttributes.add(tempInst);
}
}
// build the clusterer
m_Clusterer.buildClusterer(toFilterIgnoringAttributes);
// create output dataset with new attribute
Instances filtered = new Instances(toFilter, 0);
FastVector nominal_values = new FastVector(m_Clusterer.numberOfClusters());
for (int i=0; i<m_Clusterer.numberOfClusters(); i++) {
nominal_values.addElement("cluster" + (i+1));
}
filtered.insertAttributeAt(new Attribute("cluster", nominal_values),
filtered.numAttributes());
setOutputFormat(filtered);
Instance original, processed;
// build new dataset
for (int i=0; i<toFilter.numInstances(); i++) {
original = toFilter.instance(i);
// copy values
double[] instanceVals = new double[filtered.numAttributes()];
for(int j = 0; j < toFilter.numAttributes(); j++) {
instanceVals[j] = original.value(j);
}
// add cluster to end
instanceVals[toFilter.numAttributes()]
= m_Clusterer.clusterInstance(toFilterIgnoringAttributes.instance(i));
// create new instance
if (original instanceof SparseInstance) {
processed = new SparseInstance(original.weight(), instanceVals);
} else {
processed = new Instance(original.weight(), instanceVals);
}
copyStringValues(original, false, original.dataset(), getOutputStringIndex(),
getOutputFormat(), getOutputStringIndex());
processed.setDataset(filtered);
push(processed);
}
return (numPendingOutput() != 0);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(new Option(
"\tFull class name of clusterer to use, followed\n"
+ "\tby scheme options. (required)\n"
+ "\teg: \"weka.clusterers.SimpleKMeans -N 3\"",
"W", 1, "-W <clusterer specification>"));
newVector.addElement(new Option(
"\tThe range of attributes the clusterer should ignore.\n",
"I", 1,"-I <att1,att2-att4,...>"));
return newVector.elements();
}
/**
* Parses the options for this object. Valid options are: <p>
*
* -W clusterer string <br>
* Full class name of clusterer to use, followed by scheme options. (required)<p>
*
* -I range string <br>
* The range of attributes the clusterer should ignore.<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String clustererString = Utils.getOption('W', options);
if (clustererString.length() == 0) {
throw new Exception("A clusterer must be specified"
+ " with the -W option.");
}
String[] clustererSpec = Utils.splitOptions(clustererString);
if (clustererSpec.length == 0) {
throw new Exception("Invalid clusterer specification string");
}
String clustererName = clustererSpec[0];
clustererSpec[0] = "";
setClusterer(Clusterer.forName(clustererName, clustererSpec));
setIgnoredAttributeIndices(Utils.getOption('I', options));
Utils.checkForRemainingOptions(options);
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [5];
int current = 0;
options[current++] = "-W"; options[current++] = "" + getClustererSpec();
if (!getIgnoredAttributeIndices().equals("")) {
options[current++] = "-I"; options[current++] = getIgnoredAttributeIndices();
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "A filter that adds a new nominal attribute representing the cluster "
+ "assigned to each instance by the specified clustering algorithm.";
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String clustererTipText() {
return "The clusterer to assign clusters with.";
}
/**
* Sets the clusterer to assign clusters with.
*
* @param clusterer The clusterer to be used (with its options set).
*/
public void setClusterer(Clusterer clusterer) {
m_Clusterer = clusterer;
}
/**
* Gets the clusterer used by the filter.
*
* @return The clusterer being used.
*/
public Clusterer getClusterer() {
return m_Clusterer;
}
/**
* Gets the clusterer specification string, which contains the class name of
* the clusterer and any options to the clusterer.
*
* @return the clusterer string.
*/
protected String getClustererSpec() {
Clusterer c = getClusterer();
if (c instanceof OptionHandler) {
return c.getClass().getName() + " "
+ Utils.joinOptions(((OptionHandler)c).getOptions());
}
return c.getClass().getName();
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String ignoredAttributeIndicesTipText() {
return "The range of attributes to be ignored by the clusterer. eg: first-3,5,9-last";
}
/**
* Gets ranges of attributes to be ignored.
*
* @return a string containing a comma-separated list of ranges
*/
public String getIgnoredAttributeIndices() {
if (m_IgnoreAttributesRange == null) {
return "";
} else {
return m_IgnoreAttributesRange.getRanges();
}
}
/**
* Sets the ranges of attributes to be ignored. If provided string
* is null, no attributes will be ignored.
*
* @param rangeList a string representing the list of attributes.
* eg: first-3,5,6-last
* @exception IllegalArgumentException if an invalid range list is supplied
*/
public void setIgnoredAttributeIndices(String rangeList) {
if ((rangeList == null) || (rangeList.length() == 0)) {
m_IgnoreAttributesRange = null;
} else {
m_IgnoreAttributesRange = new Range();
m_IgnoreAttributesRange.setRanges(rangeList);
}
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter: use -h for help
*/
public static void main(String [] argv) {
try {
if (Utils.getFlag('b', argv)) {
Filter.batchFilterFile(new AddCluster(), argv);
} else {
Filter.filterFile(new AddCluster(), argv);
}
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}