/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ClassOrder.java
* Copyright (C) 2002 Xin Xu
*
*/
package weka.filters.supervised.attribute;
import weka.filters.*;
import java.util.Enumeration;
import java.util.Vector;
import java.util.Random;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
/**
* A filter that sorts the order of classes so that the class values are
* no longer of in the order of that in the header file after filtered.
* The values of the class will be in the order specified by the user
* -- it could be either in ascending/descending order by the class
* frequency or in random order.<p>
*
* The format of the header is thus not changed in this filter
* (although it still uses <code>setInputFormat()</code>), but
* the class value of each instance is converted to sorted
* values within the same range. The value can also be converted back
* using <code>originalValue(double value)</code> procedure.<p>
*
* @author Xin Xu (xx5@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class ClassOrder extends Filter implements SupervisedFilter,
OptionHandler {
/** The seed of randomization */
private long m_Seed = 1;
/** The random object */
private Random m_Random = null;
/**
* The 1-1 converting table from the original class values
* to the new values
*/
private double[] m_Converter = null;
/** Class attribute of the data */
private Attribute m_ClassAttribute = null;
/** The class order to be sorted */
private int m_ClassOrder = 0;
/** The class values are sorted in ascending order based on their frequencies */
public static final int FREQ_ASCEND = 0;
/** The class values are sorted in descending order based on their frequencies */
public static final int FREQ_DESCEND = 1;
/** The class values are sorted in random order*/
public static final int RANDOM =2;
/** This class can provide the class distribution in the sorted order
* as side effect */
private double[] m_ClassCounts = null;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(1);
newVector.addElement(new Option("\tSpecify the seed of randomization\n"
+ "\tused to randomize the class\n"
+ "\torder (default: 1)",
"R", 1, "-R <seed>"));
newVector.addElement(new Option("\tSpecify the class order to be\n"
+ "\tsorted, could be 0: ascending\n"
+ "\t1: descending and 2: random.(default: 0)",
"C", 1, "-C <order>"));
return newVector.elements();
}
/**
* Parses a given list of options controlling the behaviour of this object.
* Valid options are:<p>
*
* -R <seed> <br>
* Specify the seed of randomization used to randomize the class order
* (default: 1)<p>
*
* -C <order><br>
* Specify the class order to be sorted, could be 0: ascending, 1: descending
* and 2: random(default: 0)<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String seedString = Utils.getOption('R', options);
if (seedString.length() != 0)
m_Seed = Long.parseLong(seedString);
else
m_Seed = 1;
String orderString = Utils.getOption('C', options);
if (orderString.length() != 0)
m_ClassOrder = Integer.parseInt(orderString);
else
m_ClassOrder = FREQ_ASCEND;
if (getInputFormat() != null)
setInputFormat(getInputFormat());
m_Random = null;
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [4];
int current = 0;
options[current++] = "-R";
options[current++] = "" + m_Seed;
options[current++] = "-C";
options[current++] = "" + m_ClassOrder;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "Specify the seed of randomization of the class order";
}
/**
* Get the current randomization seed
*
* @return a seed
*/
public long getSeed() {
return m_Seed;
}
/**
* Set randomization seed
*
* @param seed the set seed
*/
public void setSeed(long seed){
m_Seed = seed;
m_Random = null;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String classOrderTipText() {
return "Specify the class order after the filtering";
}
/**
* Get the wanted class order
*
* @return class order
*/
public int getClassOrder() {
return m_ClassOrder;
}
/**
* Set the wanted class order
*
* @param order the class order
*/
public void setClassOrder(int order){
m_ClassOrder = order;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(new Instances(instanceInfo, 0));
m_ClassAttribute = instanceInfo.classAttribute();
m_Random = new Random(m_Seed);
int numClasses = instanceInfo.numClasses();
m_Converter = new double[numClasses];
m_ClassCounts = new double[numClasses];
return false;
}
/**
* Input an instance for filtering. Ordinarily the instance is processed
* and made available for output immediately. Some filters require all
* instances be read before producing output.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @exception IllegalStateException if no input format has been defined.
*/
public boolean input(Instance instance) {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
if((!instance.isMissing(m_ClassAttribute))
&& m_ClassAttribute.isNominal())
m_ClassCounts[(int)instance.classValue()] += instance.weight();
bufferInput(instance);
return false;
}
/**
* Signify that this batch of input to the filter is finished. If
* the filter requires all instances prior to filtering, output()
* may now be called to retrieve the filtered instances. Any
* subsequent instances filtered should be filtered based on setting
* obtained from the first batch (unless the inputFormat has been
* re-assigned or new options have been set). This implementation
* sorts the class values and provide class counts in the output format
*
* @return true if there are instances pending output
* @exception NullPointerException if no input structure has been defined,
* @exception Exception if there was a problem finishing the batch.
*/
public boolean batchFinished() throws Exception {
Instances data = getInputFormat();
if ( data == null)
throw new NullPointerException("No input instance format defined");
data.deleteWithMissingClass();
if(data.numInstances() == 0)
throw new Exception(" No instances with a class value!");
Instances newInsts = new Instances(data, 0);
if(m_ClassAttribute.isNominal()){
switch(m_ClassOrder){
case FREQ_ASCEND:
case FREQ_DESCEND:
// Sort it and put into a double array
int[] tmp = Utils.sort(m_ClassCounts);
double[] classOrder = new double[tmp.length];
for(int t=0; t < tmp.length; t++)
classOrder[t] = (double)tmp[t];
int lo=-1, hi=-1, max=m_Converter.length-1;
for(int y=0; y<=max; y++){ // y is the new class label
double next = (y==max) ? -1.0 : m_ClassCounts[(int)classOrder[y+1]];
if(Utils.eq(m_ClassCounts[(int)classOrder[y]], next)){
if(lo == -1)
lo = y;
}
else if(lo != -1){ // Randomize the order of classes with same size
hi = y;
randomize(classOrder, lo, hi);
for(int yy=lo; yy<=hi; yy++){
if(m_ClassOrder == FREQ_ASCEND)
m_Converter[(int)classOrder[yy]] = (double)yy;
else
m_Converter[(int)classOrder[yy]] = (double)(max-yy);
}
lo = hi = -1;
}
else{ // Record in the converting table
if(m_ClassOrder == FREQ_ASCEND)
m_Converter[(int)classOrder[y]] = (double)y;
else
m_Converter[(int)classOrder[y]] = (double)(max-y);
}
}
break;
case RANDOM:
for(int x=0; x < m_Converter.length; x++)
m_Converter[x] = (double)x;
randomize(m_Converter, 0, m_Converter.length-1);
break;
default:
throw new Exception("Class order not defined!");
}
// Reset the class values
int classIndex = newInsts.classIndex();
double[] cls = new double[m_ClassCounts.length];
for(int z=0; z<m_Converter.length; z++){
newInsts.renameAttributeValue(classIndex, (int)m_Converter[z], m_ClassAttribute.value(z));
cls[(int)m_Converter[z]] = m_ClassCounts[z];
}
m_ClassCounts = cls;
}
setOutputFormat(newInsts);
// Process all instances
for(int xyz=0; xyz<data.numInstances(); xyz++){
Instance datum = data.instance(xyz);
if(m_ClassAttribute.isNominal())
datum.setClassValue(m_Converter[(int)datum.classValue()]);
// Add back the String attributes
copyStringValues(datum, false, data, getOutputFormat());
datum.setDataset(getOutputFormat());
push(datum);
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* Helper function to randomize the given double array from the
* the given low index to the given high index
*
* @param array the given double array
* @param low low index
* @param high high index
*/
private void randomize(double[] array, int low, int high){
for(int y=low; y <= high; y++){
int swapPos = m_Random.nextInt(high-y+1) + y;
double temp = array[y];
array[y] = array[swapPos];
array[swapPos] = temp;
}
}
/**
* Get the class distribution of the sorted class values. If class is numeric
* it returns null
*
* @return the class counts
*/
public double[] getClassCounts(){
if(m_ClassAttribute.isNominal())
return m_ClassCounts;
else
return null;
}
/**
* Convert the given class distribution back to the distributions
* with the original internal class index
*
* @param before the given class distribution
* @return the distribution converted back
*/
public double[] distributionsByOriginalIndex (double[] before){
double[] after = new double[m_Converter.length];
for(int i=0; i < m_Converter.length; i++)
after[i] = before[(int)m_Converter[i]];
return after;
}
/**
* Return the original internal class value given the randomized
* class value, i.e. the string presentations of the two indices
* are the same. It's useful when the filter is used within a classifier
* so that the filtering procedure should be transparent to the
* evaluation
*
* @param value the given value
* @return the original internal value, -1 if not found
* @exception if the coverter table is not set yet
*/
public double originalValue(double value)throws Exception{
if(m_Converter == null)
throw new IllegalStateException("Coverter table not defined yet!");
for(int i=0; i < m_Converter.length; i++)
if((int)value == (int)m_Converter[i])
return (double)i;
return -1;
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter: use -h for help
*/
public static void main(String [] argv) {
try {
if (Utils.getFlag('b', argv)) {
Filter.batchFilterFile(new ClassOrder(), argv);
} else {
Filter.filterFile(new ClassOrder(), argv);
}
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}