/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * RandomSearch.java * Copyright (C) 1999 Mark Hall * */ package weka.attributeSelection; import java.io.*; import java.util.*; import weka.core.*; /** * Class for performing a random search. <p> * * Valid options are: <p> * * -P <start set> <br> * Specify a starting set of attributes. Eg 1,4,7-9. <p> * * -F <percent) <br> * Percentage of the search space to consider. (default = 25). <p> * * -V <br> * Verbose output. Output new best subsets as the search progresses. <p> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */ public class RandomSearch extends ASSearch implements StartSetHandler, OptionHandler { /** * holds a starting set as an array of attributes. */ private int[] m_starting; /** holds the start set as a range */ private Range m_startRange; /** the best feature set found during the search */ private BitSet m_bestGroup; /** the merit of the best subset found */ private double m_bestMerit; /** * only accept a feature set as being "better" than the best if its * merit is better or equal to the best, and it contains fewer * features than the best (this allows LVF to be implimented). */ private boolean m_onlyConsiderBetterAndSmaller; /** does the data have a class */ private boolean m_hasClass; /** holds the class index */ private int m_classIndex; /** number of attributes in the data */ private int m_numAttribs; /** seed for random number generation */ private int m_seed; /** percentage of the search space to consider */ private double m_searchSize; /** the number of iterations performed */ private int m_iterations; /** random number object */ private Random m_random; /** output new best subsets as the search progresses */ private boolean m_verbose; /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "RandomSearch : \n\nPerforms a Random search in " +"the space of attribute subsets. If no start set is supplied, Random " +"search starts from a random point and reports the best subset found. " +"If a start set is supplied, Random searches randomly for subsets " +"that are as good or better than the start point with the same or " +"or fewer attributes. Using RandomSearch in conjunction with a start " +"set containing all attributes equates to the LVF algorithm of Liu " +"and Setiono (ICML-96).\n"; } /** * Constructor */ public RandomSearch () { resetOptions(); } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(3); newVector.addElement(new Option("\tSpecify a starting set of attributes." + "\n\tEg. 1,3,5-7." +"\n\tIf a start point is supplied," +"\n\trandom search evaluates the start" +"\n\tpoint and then randomly looks for" +"\n\tsubsets that are as good as or better" +"\n\tthan the start point with the same" +"\n\tor lower cardinality." ,"P",1 , "-P <start set>")); newVector.addElement(new Option("\tPercent of search space to consider." +"\n\t(default = 25%)." , "F", 1 , "-F <percent> ")); newVector.addElement(new Option("\tOutput subsets as the search progresses." +"\n\t(default = false)." , "V", 0 , "-V")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are: <p> * * -P <start set> <br> * Specify a starting set of attributes. Eg 1,4,7-9. <p> * * -F <percent) <br> * Percentage of the search space to consider. (default = 25). <p> * * -V <br> * Verbose output. Output new best subsets as the search progresses. <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('F',options); if (optionString.length() != 0) { setSearchPercent((new Double(optionString)).doubleValue()); } setVerbose(Utils.getFlag('V',options)); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String startSetTipText() { return "Set the start point for the search. This is specified as a comma " +"seperated list off attribute indexes starting at 1. It can include " +"ranges. Eg. 1,2,5-9,17. If specified, Random searches for subsets " +"of attributes that are as good as or better than the start set with " +"the same or lower cardinality."; } /** * Sets a starting set of attributes for the search. It is the * search method's responsibility to report this start set (if any) * in its toString() method. * @param startSet a string containing a list of attributes (and or ranges), * eg. 1,2,6,10-15. "" indicates no start point. * If a start point is supplied, random search evaluates the * start point and then looks for subsets that are as good as or better * than the start point with the same or lower cardinality. * @exception Exception if start set can't be set. */ public void setStartSet (String startSet) throws Exception { m_startRange.setRanges(startSet); } /** * Returns a list of attributes (and or attribute ranges) as a String * @return a list of attributes (and or attribute ranges) */ public String getStartSet () { return m_startRange.getRanges(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String verboseTipText() { return "Print progress information. Sends progress info to the terminal " +"as the search progresses."; } /** * set whether or not to output new best subsets as the search proceeds * @param v true if output is to be verbose */ public void setVerbose(boolean v) { m_verbose = v; } /** * get whether or not output is verbose * @return true if output is set to verbose */ public boolean getVerbose() { return m_verbose; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String searchPercentTipText() { return "Percentage of the search space to explore."; } /** * set the percentage of the search space to consider * @param p percent of the search space ( 0 < p <= 100) */ public void setSearchPercent(double p) { p = Math.abs(p); if (p == 0) { p = 25; } if (p > 100.0) { p = 100; } m_searchSize = (p/100.0); } /** * get the percentage of the search space to consider * @return the percent of the search space explored */ public double getSearchPercent() { return m_searchSize; } /** * Gets the current settings of RandomSearch. * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[5]; int current = 0; if (m_verbose) { options[current++] = "-V"; } if (!(getStartSet().equals(""))) { options[current++] = "-P"; options[current++] = ""+startSetToString(); } options[current++] = "-F"; options[current++] = "" + m_searchSize; while (current < options.length) { options[current++] = ""; } return options; } /** * converts the array of starting attributes to a string. This is * used by getOptions to return the actual attributes specified * as the starting set. This is better than using m_startRanges.getRanges() * as the same start set can be specified in different ways from the * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that * is stored in a database is comparable. * @return a comma seperated list of individual attribute numbers as a String */ private String startSetToString() { StringBuffer FString = new StringBuffer(); boolean didPrint; if (m_starting == null) { return getStartSet(); } for (int i = 0; i < m_starting.length; i++) { didPrint = false; if ((m_hasClass == false) || (m_hasClass == true && i != m_classIndex)) { FString.append((m_starting[i] + 1)); didPrint = true; } if (i == (m_starting.length - 1)) { FString.append(""); } else { if (didPrint) { FString.append(","); } } } return FString.toString(); } /** * prints a description of the search * @return a description of the search as a string */ public String toString() { StringBuffer text = new StringBuffer(); text.append("\tRandom search.\n\tStart set: "); if (m_starting == null) { text.append("no attributes\n"); } else { text.append(startSetToString()+"\n"); } text.append("\tNumber of iterations: "+m_iterations+" (" +(m_searchSize * 100.0)+"% of the search space)\n"); text.append("\tMerit of best subset found: " +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n"); return text.toString(); } /** * Searches the attribute subset space using a genetic algorithm. * * @param ASEvaluator the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @exception Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { double best_merit; int sizeOfBest = m_numAttribs; BitSet temp; m_bestGroup = new BitSet(m_numAttribs); m_onlyConsiderBetterAndSmaller = false; if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } m_random = new Random(m_seed); if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs-1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // If a starting subset has been supplied, then initialise the bitset if (m_starting != null) { for (int i = 0; i < m_starting.length; i++) { if ((m_starting[i]) != m_classIndex) { m_bestGroup.set(m_starting[i]); } } m_onlyConsiderBetterAndSmaller = true; best_merit = ASEvaluator.evaluateSubset(m_bestGroup); sizeOfBest = countFeatures(m_bestGroup); } else { // do initial random subset m_bestGroup = generateRandomSubset(); best_merit = ASEvaluator.evaluateSubset(m_bestGroup); } if (m_verbose) { System.out.println("Initial subset (" +Utils.doubleToString(Math. abs(best_merit),8,5) +"): "+printSubset(m_bestGroup)); } int i; if (m_hasClass) { i = m_numAttribs -1; } else { i = m_numAttribs; } m_iterations = (int)((m_searchSize * Math.pow(2, i))); int tempSize; double tempMerit; // main loop for (i=0;i<m_iterations;i++) { temp = generateRandomSubset(); if (m_onlyConsiderBetterAndSmaller) { tempSize = countFeatures(temp); if (tempSize <= sizeOfBest) { tempMerit = ASEvaluator.evaluateSubset(temp); if (tempMerit >= best_merit) { sizeOfBest = tempSize; m_bestGroup = temp; best_merit = tempMerit; if (m_verbose) { System.out.print("New best subset (" +Utils.doubleToString(Math. abs(best_merit),8,5) +"): "+printSubset(m_bestGroup) + " :"); System.out.println(Utils. doubleToString((((double)i)/ ((double)m_iterations)* 100.0),5,1) +"% done"); } } } } else { tempMerit = ASEvaluator.evaluateSubset(temp); if (tempMerit > best_merit) { m_bestGroup = temp; best_merit = tempMerit; if (m_verbose) { System.out.print("New best subset (" +Utils.doubleToString(Math.abs(best_merit),8,5) +"): "+printSubset(m_bestGroup) + " :"); System.out.println(Utils. doubleToString((((double)i)/ ((double)m_iterations) *100.0),5,1) +"% done"); } } } } m_bestMerit = best_merit; return attributeList(m_bestGroup); } /** * prints a subset as a series of attribute numbers * @param temp the subset to print * @return a subset as a String of attribute numbers */ private String printSubset(BitSet temp) { StringBuffer text = new StringBuffer(); for (int j=0;j<m_numAttribs;j++) { if (temp.get(j)) { text.append((j+1)+" "); } } return text.toString(); } /** * converts a BitSet into a list of attribute indexes * @param group the BitSet to convert * @return an array of attribute indexes **/ private int[] attributeList (BitSet group) { int count = 0; // count how many were selected for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { count++; } } int[] list = new int[count]; count = 0; for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { list[count++] = i; } } return list; } /** * generates a random subset * @return a random subset as a BitSet */ private BitSet generateRandomSubset() { BitSet temp = new BitSet(m_numAttribs); double r; for (int i=0;i<m_numAttribs;i++) { r = m_random.nextDouble(); if (r <= 0.5) { if (m_hasClass && i == m_classIndex) { } else { temp.set(i); } } } return temp; } /** * counts the number of features in a subset * @param featureSet the feature set for which to count the features * @return the number of features in the subset */ private int countFeatures(BitSet featureSet) { int count = 0; for (int i=0;i<m_numAttribs;i++) { if (featureSet.get(i)) { count++; } } return count; } /** * resets to defaults */ private void resetOptions() { m_starting = null; m_startRange = new Range(); m_searchSize = 0.25; m_seed = 1; m_onlyConsiderBetterAndSmaller = false; m_verbose = false; } }