/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ExhaustiveSearch.java
* Copyright (C) 1999 Mark Hall
*
*/
package weka.attributeSelection;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* Class for performing an exhaustive search. <p>
*
* Valid options are: <p>
*
* -P <start set> <br>
* Specify a starting set of attributes. Eg 1,4,7-9. <p>
*
* -V <br>
* Verbose output. Output new best subsets as the search progresses. <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class ExhaustiveSearch extends ASSearch
implements StartSetHandler, OptionHandler {
/**
* holds a starting set as an array of attributes.
*/
private int[] m_starting;
/** the start set as a Range */
private Range m_startRange;
/** the best feature set found during the search */
private BitSet m_bestGroup;
/** the merit of the best subset found */
private double m_bestMerit;
/** does the data have a class */
private boolean m_hasClass;
/** holds the class index */
private int m_classIndex;
/** number of attributes in the data */
private int m_numAttribs;
/** if true, then ouput new best subsets as the search progresses */
private boolean m_verbose;
/**
* stop after finding the first subset equal to or better than the
* supplied start set (set to true if start set is supplied).
*/
private boolean m_stopAfterFirst;
/** the number of subsets evaluated during the search */
private int m_evaluations;
/**
* Returns a string describing this search method
* @return a description of the search suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "ExhaustiveSearch : \n\nPerforms an exhaustive search through "
+"the space of attribute subsets starting from the empty set of "
+"attrubutes. Reports the best subset found. If a start set is "
+"supplied, the algorithm searches backward from the start point "
+"and reports the smallest subset with as good or better evaluation "
+"as the start point.\n";
}
/**
* Constructor
*/
public ExhaustiveSearch () {
resetOptions();
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tSpecify a starting set of attributes."
+ "\n\tEg. 1,3,5-7."
+"\n\tIf a start point is supplied,"
+"\n\tExhaustive search stops after"
+"\n\tfinding the smallest possible subset"
+"\n\twith merit as good as or better than"
+"\n\tthe start set."
,"P",1
, "-P <start set>"));
newVector.addElement(new Option("\tOutput subsets as the search progresses."
+"\n\t(default = false)."
, "V", 0
, "-V"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are: <p>
*
* -P <start set> <br>
* Specify a starting set of attributes. Eg 1,4,7-9. <p>
*
* -V <br>
* Verbose output. Output new best subsets as the search progresses. <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('P', options);
if (optionString.length() != 0) {
setStartSet(optionString);
}
setVerbose(Utils.getFlag('V',options));
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String startSetTipText() {
return "Set the start point for the search. This is specified as a comma "
+"seperated list off attribute indexes starting at 1. It can include "
+"ranges. Eg. 1,2,5-9,17.";
}
/**
* Sets a starting set of attributes for the search. It is the
* search method's responsibility to report this start set (if any)
* in its toString() method.
* @param startSet a string containing a list of attributes (and or ranges),
* eg. 1,2,6,10-15. "" indicates no start set.
* If a start point is supplied, Exhaustive search stops after finding
* the smallest possible subset with merit as good as or better than the
* start set. Otherwise, the search space is explored FULLY, and the
* best subset returned.
* @exception Exception if start set can't be set.
*/
public void setStartSet (String startSet) throws Exception {
m_startRange.setRanges(startSet);
}
/**
* Returns a list of attributes (and or attribute ranges) as a String
* @return a list of attributes (and or attribute ranges)
*/
public String getStartSet () {
return m_startRange.getRanges();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String verboseTipText() {
return "Print progress information. Sends progress info to the terminal "
+"as the search progresses.";
}
/**
* set whether or not to output new best subsets as the search proceeds
* @param v true if output is to be verbose
*/
public void setVerbose(boolean v) {
m_verbose = v;
}
/**
* get whether or not output is verbose
* @return true if output is set to verbose
*/
public boolean getVerbose() {
return m_verbose;
}
/**
* Gets the current settings of RandomSearch.
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[3];
int current = 0;
if (!(getStartSet().equals(""))) {
options[current++] = "-P";
options[current++] = ""+startSetToString();
}
if (m_verbose) {
options[current++] = "-V";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* converts the array of starting attributes to a string. This is
* used by getOptions to return the actual attributes specified
* as the starting set. This is better than using m_startRanges.getRanges()
* as the same start set can be specified in different ways from the
* command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
* is stored in a database is comparable.
* @return a comma seperated list of individual attribute numbers as a String
*/
private String startSetToString() {
StringBuffer FString = new StringBuffer();
boolean didPrint;
if (m_starting == null) {
return getStartSet();
}
for (int i = 0; i < m_starting.length; i++) {
didPrint = false;
if ((m_hasClass == false) ||
(m_hasClass == true && i != m_classIndex)) {
FString.append((m_starting[i] + 1));
didPrint = true;
}
if (i == (m_starting.length - 1)) {
FString.append("");
}
else {
if (didPrint) {
FString.append(",");
}
}
}
return FString.toString();
}
/**
* prints a description of the search
* @return a description of the search as a string
*/
public String toString() {
StringBuffer text = new StringBuffer();
text.append("\tExhaustive Search.\n\tStart set: ");
if (m_starting == null) {
text.append("no attributes\n");
}
else {
text.append(startSetToString()+"\n");
}
text.append("\tNumber of evaluations: "+m_evaluations+"\n");
text.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
return text.toString();
}
/**
* Searches the attribute subset space using an exhaustive search.
*
* @param ASEvaluator the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @exception Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
double best_merit;
double tempMerit;
int setSize;
boolean done = false;
int sizeOfBest;
int tempSize;
m_numAttribs = data.numAttributes();
m_bestGroup = new BitSet(m_numAttribs);
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = data.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
m_numAttribs = data.numAttributes();
m_startRange.setUpper(m_numAttribs-1);
if (!(getStartSet().equals(""))) {
m_starting = m_startRange.getSelection();
}
// If a starting subset has been supplied, then initialise the bitset
if (m_starting != null) {
m_stopAfterFirst = true;
for (int i = 0; i < m_starting.length; i++) {
if ((m_starting[i]) != m_classIndex) {
m_bestGroup.set(m_starting[i]);
}
}
}
best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
m_evaluations++;
sizeOfBest = countFeatures(m_bestGroup);
if (m_verbose) {
if (m_stopAfterFirst) {
System.out.println("Initial subset ("
+Utils.doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup));
}
}
BitSet tempGroup = new BitSet(m_numAttribs);
tempMerit = ASEvaluator.evaluateSubset(tempGroup);
if (m_verbose) {
System.out.println("Zero feature subset ("
+Utils.doubleToString(Math.
abs(tempMerit),8,5)
+")");
}
if (tempMerit >= best_merit) {
tempSize = countFeatures(tempGroup);
if (tempMerit > best_merit ||
(tempSize < sizeOfBest)) {
best_merit = tempMerit;
m_bestGroup = (BitSet)(tempGroup.clone());
sizeOfBest = tempSize;
}
if (m_stopAfterFirst) {
done = true;
}
}
int i,j;
int subset;
if (!done) {
enumerateSizes: for (setSize = 1;setSize<=m_numAttribs;setSize++) {
// set up and evaluate initial subset of this size
subset = 0;
tempGroup = new BitSet(m_numAttribs);
for (i=0;i<setSize;i++) {
subset = (subset ^ (1<<i));
tempGroup.set(i);
if (m_hasClass && i == m_classIndex) {
tempGroup.clear(i);
}
}
tempMerit = ASEvaluator.evaluateSubset(tempGroup);
m_evaluations++;
if (tempMerit >= best_merit) {
tempSize = countFeatures(tempGroup);
if (tempMerit > best_merit ||
(tempSize < sizeOfBest)) {
best_merit = tempMerit;
m_bestGroup = (BitSet)(tempGroup.clone());
sizeOfBest = tempSize;
if (m_verbose) {
System.out.println("New best subset ("
+Utils.doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup));
}
}
if (m_stopAfterFirst) {
done = true;
break enumerateSizes;
}
}
// generate all the other subsets of this size
while (subset > 0) {
subset = generateNextSubset(subset, setSize, tempGroup);
if (subset > 0) {
tempMerit = ASEvaluator.evaluateSubset(tempGroup);
m_evaluations++;
if (tempMerit >= best_merit) {
tempSize = countFeatures(tempGroup);
if (tempMerit > best_merit ||
(tempSize < sizeOfBest)) {
best_merit = tempMerit;
m_bestGroup = (BitSet)(tempGroup.clone());
sizeOfBest = tempSize;
if (m_verbose) {
System.out.println("New best subset ("
+Utils.
doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup));
}
}
if (m_stopAfterFirst) {
done = true;
break enumerateSizes;
}
}
}
}
}
}
m_bestMerit = best_merit;
return attributeList(m_bestGroup);
}
/**
* counts the number of features in a subset
* @param featureSet the feature set for which to count the features
* @return the number of features in the subset
*/
private int countFeatures(BitSet featureSet) {
int count = 0;
for (int i=0;i<m_numAttribs;i++) {
if (featureSet.get(i)) {
count++;
}
}
return count;
}
/**
* prints a subset as a series of attribute numbers
* @param temp the subset to print
* @return a subset as a String of attribute numbers
*/
private String printSubset(BitSet temp) {
StringBuffer text = new StringBuffer();
for (int j=0;j<m_numAttribs;j++) {
if (temp.get(j)) {
text.append((j+1)+" ");
}
}
return text.toString();
}
/**
* converts a BitSet into a list of attribute indexes
* @param group the BitSet to convert
* @return an array of attribute indexes
**/
private int[] attributeList (BitSet group) {
int count = 0;
// count how many were selected
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
count++;
}
}
int[] list = new int[count];
count = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
list[count++] = i;
}
}
return list;
}
/**
* generates the next subset of size "size" given the subset "set"
* coded as an integer. The next subset is returned (as an Integer)
* and temp contains this subset as a BitSet.
* @param set the current subset coded as an integer
* @param size the size of the feature subset (eg. 2 means that the
* current subset contains two features and the next generated subset
* should also contain 2 features).
* @param temp will hold the generated subset as a BitSet
*/
private int generateNextSubset(int set, int size, BitSet temp) {
int i,j;
int counter = 0;
boolean done = false;
for (i=0;i<m_numAttribs;i++) {
temp.clear(i);
}
while ((!done) && (counter < size)) {
for (i=m_numAttribs-1-counter;i>=0;i--) {
if ((set & (1<<i)) !=0) {
// erase and move
set = (set ^ (1<<i));
if (i != (m_numAttribs-1-counter)) {
set = (set ^ (1<<i+1));
for (j=0;j<counter;j++) {
set = (set ^ (1<<(i+2+j)));
}
done = true;
break;
} else {
counter++;
break;
}
}
}
}
for (i=m_numAttribs-1;i>=0;i--) {
if ((set & (1<<i)) != 0) {
if (i != m_classIndex) {
temp.set(i);
}
}
}
return set;
}
/**
* resets to defaults
*/
private void resetOptions() {
m_starting = null;
m_startRange = new Range();
m_stopAfterFirst = false;
m_verbose = false;
m_evaluations = 0;
}
}