/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* PKIDiscretize.java
* Copyright (C) 2003 Richard Kirkby
*
*/
package weka.filters.unsupervised.attribute;
import weka.filters.*;
import weka.core.*;
import java.util.*;
/**
* Discretizes numeric attributes using equal frequency binning where the
* number of bins is equal to the square root of the number of non-missing
* values.<p>
*
* Valid filter-specific options are: <p>
*
* -R col1,col2-col4,... <br>
* Specifies list of columns to Discretize. First
* and last are valid indexes. (default: first-last) <p>
*
* -V <br>
* Invert matching sense.<p>
*
* -D <br>
* Make binary nominal attributes. <p>
*
* @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class PKIDiscretize extends Discretize {
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the input format can't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
// alter child behaviour to do what we want
m_FindNumBins = true;
return super.setInputFormat(instanceInfo);
}
/**
* Finds the number of bins to use and creates the cut points.
*
* @param index the attribute index
*/
protected void findNumBins(int index) {
Instances toFilter = getInputFormat();
// Find number of instances for attribute where not missing
int numOfInstances = toFilter.numInstances();
for (int i = 0; i < toFilter.numInstances(); i++) {
if (toFilter.instance(i).isMissing(index))
numOfInstances--;
}
m_NumBins = (int)(Math.sqrt(numOfInstances));
if (m_NumBins > 0) {
calculateCutPointsByEqualFrequencyBinning(index);
}
}
/**
* Gets an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(7);
newVector.addElement(new Option(
"\tSpecifies list of columns to Discretize. First"
+ " and last are valid indexes.\n"
+ "\t(default: first-last)",
"R", 1, "-R <col1,col2-col4,...>"));
newVector.addElement(new Option(
"\tInvert matching sense of column indexes.",
"V", 0, "-V"));
newVector.addElement(new Option(
"\tOutput binary attributes for discretized attributes.",
"D", 0, "-D"));
return newVector.elements();
}
/**
* Parses the options for this object. Valid options are: <p>
*
* -R col1,col2-col4,... <br>
* Specifies list of columns to Discretize. First
* and last are valid indexes. (default none) <p>
*
* -V <br>
* Invert matching sense.<p>
*
* -D <br>
* Make binary nominal attributes. <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setMakeBinary(Utils.getFlag('D', options));
setInvertSelection(Utils.getFlag('V', options));
String convertList = Utils.getOption('R', options);
if (convertList.length() != 0) {
setAttributeIndices(convertList);
} else {
setAttributeIndices("first-last");
}
if (getInputFormat() != null) {
setInputFormat(getInputFormat());
}
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [12];
int current = 0;
if (getMakeBinary()) {
options[current++] = "-D";
}
if (getInvertSelection()) {
options[current++] = "-V";
}
if (!getAttributeIndices().equals("")) {
options[current++] = "-R"; options[current++] = getAttributeIndices();
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Discretizes numeric attributes using equal frequency binning,"
+ " where the number of bins is equal to the square root of the"
+ " number of non-missing values.";
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String findNumBinsTipText() {
return "Ignored.";
}
/**
* Get the value of FindNumBins.
*
* @return Value of FindNumBins.
*/
public boolean getFindNumBins() {
return false;
}
/**
* Set the value of FindNumBins.
*
* @param newFindNumBins Value to assign to FindNumBins.
*/
public void setFindNumBins(boolean newFindNumBins) {
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String useEqualFrequencyTipText() {
return "Always true.";
}
/**
* Get the value of UseEqualFrequency.
*
* @return Value of UseEqualFrequency.
*/
public boolean getUseEqualFrequency() {
return true;
}
/**
* Set the value of UseEqualFrequency.
*
* @param newUseEqualFrequency Value to assign to UseEqualFrequency.
*/
public void setUseEqualFrequency(boolean newUseEqualFrequency) {
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String binsTipText() {
return "Ignored.";
}
/**
* Ignored
*
* @return the number of bins.
*/
public int getBins() {
return 0;
}
/**
* Ignored
*
* @param numBins the number of bins
*/
public void setBins(int numBins) {
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter: use -h for help
*/
public static void main(String [] argv) {
try {
if (Utils.getFlag('b', argv)) {
Filter.batchFilterFile(new PKIDiscretize(), argv);
} else {
Filter.filterFile(new PKIDiscretize(), argv);
}
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}