/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * SortLabels.java * Copyright (C) 2009-2012 University of Waikato, Hamilton, New Zealand */ package weka.filters.unsupervised.attribute; import java.io.Serializable; import java.util.Collections; import java.util.Comparator; import java.util.Enumeration; import java.util.Vector; import weka.core.Attribute; import weka.core.Capabilities; import weka.core.Capabilities.Capability; import weka.core.DenseInstance; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.Range; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.Tag; import weka.core.Utils; import weka.filters.SimpleStreamFilter; /** <!-- globalinfo-start --> * A simple filter for sorting the labels of nominal attributes. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -R <index1,index2-index4,...> * Specify list of string attributes to convert to words. * (default: select all relational attributes)</pre> * * <pre> -V * Inverts the matching sense of the selection.</pre> * * <pre> -S <CASE|NON-CASE> * Determines the type of sorting: * CASE = Case-sensitive * NON-CASE = Case-insensitive * (default: CASE)</pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ public class SortLabels extends SimpleStreamFilter { /** for serialization. */ private static final long serialVersionUID = 7815204879694105691L; /** * Represents a case-sensitive comparator for two strings. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ public static class CaseSensitiveComparator implements Comparator, Serializable { /** for serialization. */ private static final long serialVersionUID = 7071450356783873277L; /** * compares the two strings, returns -1 if o1 is smaller than o2, 0 * if equal and +1 if greater. * * @param o1 the first string to compare * @param o2 the second string to compare * @return returns -1 if o1 is smaller than o2, 0 if equal and +1 * if greater */ public int compare(Object o1, Object o2) { String s1; String s2; if ((o1 == null) && (o2 == null)) return 0; else if (o1 == null) return -1; else if (o2 == null) return +1; s1 = (String) o1; s2 = (String) o2; return s1.compareTo(s2); } } /** * Represents a case-insensitive comparator for two strings. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ public static class CaseInsensitiveComparator implements Comparator, Serializable { /** for serialization. */ private static final long serialVersionUID = -4515292733342486066L; /** * compares the two strings, returns -1 if o1 is smaller than o2, 0 * if equal and +1 if greater. * * @param o1 the first string to compare * @param o2 the second string to compare * @return returns -1 if o1 is smaller than o2, 0 if equal and +1 * if greater */ public int compare(Object o1, Object o2) { String s1; String s2; if ((o1 == null) && (o2 == null)) return 0; else if (o1 == null) return -1; else if (o2 == null) return +1; s1 = (String) o1; s2 = (String) o2; return s1.toLowerCase().compareTo(s2.toLowerCase()); } } /** sorts the strings case-sensitive. */ public final static int SORT_CASESENSITIVE = 0; /** sorts the strings case-insensitive. */ public final static int SORT_CASEINSENSITIVE = 1; /** Tag allowing selection of sort type. */ public final static Tag[] TAGS_SORTTYPE = { new Tag(SORT_CASESENSITIVE, "case", "Case-sensitive"), new Tag(SORT_CASEINSENSITIVE, "non-case", "Case-insensitive") }; /** the range of attributes to process (only relational ones will be processed). */ protected Range m_AttributeIndices = new Range("first-last"); /** the new order for the labels. */ protected int[][] m_NewOrder = null; /** the sort type. */ protected int m_SortType = SORT_CASEINSENSITIVE; /** the comparator to use for sorting. */ protected Comparator m_Comparator = new CaseSensitiveComparator(); /** * Returns a string describing this filter. * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A simple filter for sorting the labels of nominal attributes."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result; Enumeration en; String desc; int i; SelectedTag tag; result = new Vector(); en = super.listOptions(); while (en.hasMoreElements()) result.addElement(en.nextElement()); result.addElement(new Option( "\tSpecify list of string attributes to convert to words.\n" + "\t(default: select all relational attributes)", "R", 1, "-R <index1,index2-index4,...>")); result.addElement(new Option( "\tInverts the matching sense of the selection.", "V", 0, "-V")); desc = ""; for (i = 0; i < TAGS_SORTTYPE.length; i++) { tag = new SelectedTag(TAGS_SORTTYPE[i].getID(), TAGS_SORTTYPE); desc += "\t" + tag.getSelectedTag().getIDStr() + " = " + tag.getSelectedTag().getReadable() + "\n"; } result.addElement(new Option( "\tDetermines the type of sorting:\n" + desc + "\t(default: " + new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE) + ")", "S", 1, "-S " + Tag.toOptionList(TAGS_SORTTYPE))); return result.elements(); } /** * Parses the options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -R <index1,index2-index4,...> * Specify list of string attributes to convert to words. * (default: select all relational attributes)</pre> * * <pre> -V * Inverts the matching sense of the selection.</pre> * * <pre> -S <CASE|NON-CASE> * Determines the type of sorting: * CASE = Case-sensitive * NON-CASE = Case-insensitive * (default: CASE)</pre> * <!-- options-end --> * * @param options the options to use * @throws Exception if setting of options fails */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('R', options); if (tmpStr.length() != 0) setAttributeIndices(tmpStr); else setAttributeIndices("first-last"); setInvertSelection(Utils.getFlag('V', options)); tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) setSortType(new SelectedTag(tmpStr, TAGS_SORTTYPE)); else setSortType(new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE)); super.setOptions(options); } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { int i; Vector<String> result; String[] options; result = new Vector<String>(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-R"); result.add(getAttributeIndices().getRanges()); if (getInvertSelection()) result.add("-V"); result.add("-S"); result.add("" + getSortType()); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String attributeIndicesTipText() { return "Specify range of attributes to act on; " + "this is a comma separated list of attribute indices, with " + "\"first\" and \"last\" valid values; Specify an inclusive " + "range with \"-\"; eg: \"first-3,5,6-10,last\"."; } /** * Set the range of attributes to process. * * @param value the new range. */ public void setAttributeIndices(String value) { m_AttributeIndices = new Range(value); } /** * Gets the current selected attributes. * * @return current selection. */ public Range getAttributeIndices() { return m_AttributeIndices; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String invertSelectionTipText() { return "Set attribute selection mode. If false, only selected " + "attributes in the range will be worked on; if " + "true, only non-selected attributes will be processed."; } /** * Sets whether selected columns should be processed or skipped. * * @param value the new invert setting */ public void setInvertSelection(boolean value) { m_AttributeIndices.setInvert(value); } /** * Gets whether the supplied columns are to be processed or skipped. * * @return true if the supplied columns will be kept */ public boolean getInvertSelection() { return m_AttributeIndices.getInvert(); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String sortTypeTipText() { return "The type of sorting to use."; } /** * Sets the sort type to be used. * * @param type the type of sorting */ public void setSortType(SelectedTag type) { if (type.getTags() == TAGS_SORTTYPE) { m_SortType = type.getSelectedTag().getID(); if (m_SortType == SORT_CASESENSITIVE) m_Comparator = new CaseSensitiveComparator(); else if (m_SortType == SORT_CASEINSENSITIVE) m_Comparator = new CaseInsensitiveComparator(); else throw new IllegalStateException("Unhandled sort type '" + type + "'!"); } } /** * Gets the sort type to be used. * * @return the sort type */ public SelectedTag getSortType() { return new SelectedTag(m_SortType, TAGS_SORTTYPE); } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Determines the output format based on the input format and returns * this. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Attribute att; Attribute attSorted; FastVector atts; FastVector values; Vector<String> sorted; int i; int n; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); // determine sorted indices atts = new FastVector(); m_NewOrder = new int[inputFormat.numAttributes()][]; for (i = 0; i < inputFormat.numAttributes(); i++) { att = inputFormat.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) { m_NewOrder[i] = new int[0]; atts.addElement(inputFormat.attribute(i).copy()); continue; } // sort labels sorted = new Vector<String>(); for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n)); Collections.sort(sorted, m_Comparator); // determine new indices m_NewOrder[i] = new int[att.numValues()]; values = new FastVector(); for (n = 0; n < att.numValues(); n++) { m_NewOrder[i][n] = sorted.indexOf(att.value(n)); values.addElement(sorted.get(n)); } attSorted = new Attribute(att.name(), values); attSorted.setWeight(att.weight()); atts.addElement(attSorted); } // generate new header result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; } /** * processes the given instance (may change the provided instance) and * returns the modified version. * * @param instance the instance to process * @return the modified data * @throws Exception in case the processing goes wrong */ protected Instance process(Instance instance) throws Exception { Instance result; Attribute att; double[] values; int i; // adjust indices values = new double[instance.numAttributes()]; for (i = 0; i < instance.numAttributes(); i++) { att = instance.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i)) values[i] = instance.value(i); else values[i] = m_NewOrder[i][(int) instance.value(i)]; } // create new instance result = new DenseInstance(instance.weight(), values); return result; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } /** * runs the filter with the given arguments. * * @param args the commandline arguments */ public static void main(String[] args) { runFilter(new SortLabels(), args); } }