/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* BestFirst.java
* Copyright (C) 1999 Mark Hall
*
*/
package weka.attributeSelection;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* Class for performing a best first search. <p>
*
* Valid options are: <p>
*
* -P <start set> <br>
* Specify a starting set of attributes. Eg 1,4,7-9. <p>
*
* -D <-1 = backward | 0 = bidirectional | 1 = forward> <br>
* Direction of the search. (default = 1). <p>
*
* -N <num> <br>
* Number of non improving nodes to consider before terminating search.
* (default = 5). <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class BestFirst extends ASSearch
implements OptionHandler, StartSetHandler
{
// Inner classes
/**
* Class for a node in a linked list. Used in best first search.
* @author Mark Hall (mhall@cs.waikato.ac.nz)
**/
public class Link2 {
BitSet group;
double merit;
// Constructor
public Link2 (BitSet gr, double mer) {
group = (BitSet)gr.clone();
merit = mer;
}
/** Get a group */
public BitSet getGroup () {
return group;
}
public String toString () {
return ("Node: " + group.toString() + " " + merit);
}
}
/**
* Class for handling a linked list. Used in best first search.
* Extends the Vector class.
* @author Mark Hall (mhall@cs.waikato.ac.nz)
**/
public class LinkedList2
extends FastVector
{
// Max number of elements in the list
int m_MaxSize;
// ================
// Public methods
// ================
public LinkedList2 (int sz) {
super();
m_MaxSize = sz;
}
/**
* removes an element (Link) at a specific index from the list.
* @param index the index of the element to be removed.
**/
public void removeLinkAt (int index)
throws Exception {
if ((index >= 0) && (index < size())) {
removeElementAt(index);
}
else {
throw new Exception("index out of range (removeLinkAt)");
}
}
/**
* returns the element (Link) at a specific index from the list.
* @param index the index of the element to be returned.
**/
public Link2 getLinkAt (int index)
throws Exception {
if (size() == 0) {
throw new Exception("List is empty (getLinkAt)");
}
else {if ((index >= 0) && (index < size())) {
return ((Link2)(elementAt(index)));
}
else {
throw new Exception("index out of range (getLinkAt)");
}
}
}
/**
* adds an element (Link) to the list.
* @param gr the attribute set specification
* @param mer the "merit" of this attribute set
**/
public void addToList (BitSet gr, double mer)
throws Exception {
Link2 newL = new Link2(gr, mer);
if (size() == 0) {
addElement(newL);
}
else {if (mer > ((Link2)(firstElement())).merit) {
if (size() == m_MaxSize) {
removeLinkAt(m_MaxSize - 1);
}
//----------
insertElementAt(newL, 0);
}
else {
int i = 0;
int size = size();
boolean done = false;
//------------
// don't insert if list contains max elements an this
// is worst than the last
if ((size == m_MaxSize) && (mer <= ((Link2)(lastElement())).merit)) {
}
//---------------
else {
while ((!done) && (i < size)) {
if (mer > ((Link2)(elementAt(i))).merit) {
if (size == m_MaxSize) {
removeLinkAt(m_MaxSize - 1);
}
// ---------------------
insertElementAt(newL, i);
done = true;
}
else {if (i == size - 1) {
addElement(newL);
done = true;
}
else {
i++;
}
}
}
}
}
}
}
}
// member variables
/** maximum number of stale nodes before terminating search */
private int m_maxStale;
/** 0 == backward search, 1 == forward search, 2 == bidirectional */
private int m_searchDirection;
/** search directions */
private static final int SELECTION_BACKWARD = 0;
private static final int SELECTION_FORWARD = 1;
private static final int SELECTION_BIDIRECTIONAL = 2;
public static final Tag [] TAGS_SELECTION = {
new Tag(SELECTION_BACKWARD, "Backward"),
new Tag(SELECTION_FORWARD, "Forward"),
new Tag(SELECTION_BIDIRECTIONAL, "Bi-directional"),
};
/** holds an array of starting attributes */
private int[] m_starting;
/** holds the start set for the search as a Range */
private Range m_startRange;
/** does the data have a class */
private boolean m_hasClass;
/** holds the class index */
private int m_classIndex;
/** number of attributes in the data */
private int m_numAttribs;
/** total number of subsets evaluated during a search */
private int m_totalEvals;
/** for debugging */
private boolean m_debug;
/** holds the merit of the best subset found */
private double m_bestMerit;
/**
* Returns a string describing this search method
* @return a description of the search method suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "BestFirst:\n\n"
+"Searches the space of attribute subsets by greedy hillclimbing "
+"augmented with a backtracking facility. Setting the number of "
+"consecutive non-improving nodes allowed controls the level of "
+"backtracking done. Best first may start with the empty set of "
+"attributes and search forward, or start with the full set of "
+"attributes and search backward, or start at any point and search "
+"in both directions (by considering all possible single attribute "
+"additions and deletions at a given point).\n";
}
/**
*Constructor
*/
public BestFirst () {
resetOptions();
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
*
**/
public Enumeration listOptions () {
Vector newVector = new Vector(3);
newVector.addElement(new Option("\tSpecify a starting set of attributes."
+ "\n\tEg. 1,3,5-7."
,"P",1
, "-P <start set>"));
newVector.addElement(new Option("\tDirection of search. (default = 1)."
, "D", 1
, "-D <0 = backward | 1 = forward "
+ "| 2 = bi-directional>"));
newVector.addElement(new Option("\tNumber of non-improving nodes to"
+ "\n\tconsider before terminating search."
, "N", 1, "-N <num>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are: <p>
*
* -P <start set> <br>
* Specify a starting set of attributes. Eg 1,4,7-9. <p>
*
* -D <-1 = backward | 0 = bidirectional | 1 = forward> <br>
* Direction of the search. (default = 1). <p>
*
* -N <num> <br>
* Number of non improving nodes to consider before terminating search.
* (default = 5). <p>
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('P', options);
if (optionString.length() != 0) {
setStartSet(optionString);
}
optionString = Utils.getOption('D', options);
if (optionString.length() != 0) {
setDirection(new SelectedTag(Integer.parseInt(optionString),
TAGS_SELECTION));
} else {
setDirection(new SelectedTag(SELECTION_FORWARD, TAGS_SELECTION));
}
optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setSearchTermination(Integer.parseInt(optionString));
}
m_debug = Utils.getFlag('Z', options);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String startSetTipText() {
return "Set the start point for the search. This is specified as a comma "
+"seperated list off attribute indexes starting at 1. It can include "
+"ranges. Eg. 1,2,5-9,17.";
}
/**
* Sets a starting set of attributes for the search. It is the
* search method's responsibility to report this start set (if any)
* in its toString() method.
* @param startSet a string containing a list of attributes (and or ranges),
* eg. 1,2,6,10-15.
* @exception Exception if start set can't be set.
*/
public void setStartSet (String startSet) throws Exception {
m_startRange.setRanges(startSet);
}
/**
* Returns a list of attributes (and or attribute ranges) as a String
* @return a list of attributes (and or attribute ranges)
*/
public String getStartSet () {
return m_startRange.getRanges();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String searchTerminationTipText() {
return "Set the amount of backtracking. Specify the number of ";
}
/**
* Set the numnber of non-improving nodes to consider before terminating
* search.
*
* @param t the number of non-improving nodes
* @exception Exception if t is less than 1
*/
public void setSearchTermination (int t)
throws Exception {
if (t < 1) {
throw new Exception("Value of -N must be > 0.");
}
m_maxStale = t;
}
/**
* Get the termination criterion (number of non-improving nodes).
*
* @return the number of non-improving nodes
*/
public int getSearchTermination () {
return m_maxStale;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String directionTipText() {
return "Set the direction of the search.";
}
/**
* Set the search direction
*
* @param d the direction of the search
*/
public void setDirection (SelectedTag d) {
if (d.getTags() == TAGS_SELECTION) {
m_searchDirection = d.getSelectedTag().getID();
}
}
/**
* Get the search direction
*
* @return the direction of the search
*/
public SelectedTag getDirection () {
return new SelectedTag(m_searchDirection, TAGS_SELECTION);
}
/**
* Gets the current settings of BestFirst.
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[6];
int current = 0;
if (!(getStartSet().equals(""))) {
options[current++] = "-P";
options[current++] = ""+startSetToString();
}
options[current++] = "-D";
options[current++] = "" + m_searchDirection;
options[current++] = "-N";
options[current++] = "" + m_maxStale;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* converts the array of starting attributes to a string. This is
* used by getOptions to return the actual attributes specified
* as the starting set. This is better than using m_startRanges.getRanges()
* as the same start set can be specified in different ways from the
* command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
* is stored in a database is comparable.
* @return a comma seperated list of individual attribute numbers as a String
*/
private String startSetToString() {
StringBuffer FString = new StringBuffer();
boolean didPrint;
if (m_starting == null) {
return getStartSet();
}
for (int i = 0; i < m_starting.length; i++) {
didPrint = false;
if ((m_hasClass == false) ||
(m_hasClass == true && i != m_classIndex)) {
FString.append((m_starting[i] + 1));
didPrint = true;
}
if (i == (m_starting.length - 1)) {
FString.append("");
}
else {
if (didPrint) {
FString.append(",");
}
}
}
return FString.toString();
}
/**
* returns a description of the search as a String
* @return a description of the search
*/
public String toString () {
StringBuffer BfString = new StringBuffer();
BfString.append("\tBest first.\n\tStart set: ");
if (m_starting == null) {
BfString.append("no attributes\n");
}
else {
BfString.append(startSetToString()+"\n");
}
BfString.append("\tSearch direction: ");
if (m_searchDirection == SELECTION_BACKWARD) {
BfString.append("backward\n");
}
else {if (m_searchDirection == SELECTION_FORWARD) {
BfString.append("forward\n");
}
else {
BfString.append("bi-directional\n");
}
}
BfString.append("\tStale search after "
+ m_maxStale + " node expansions\n");
BfString.append("\tTotal number of subsets evaluated: "
+ m_totalEvals + "\n");
BfString.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
return BfString.toString();
}
private void printGroup (BitSet tt, int numAttribs) {
int i;
for (i = 0; i < numAttribs; i++) {
if (tt.get(i) == true) {
System.out.print((i + 1) + " ");
}
}
System.out.println();
}
/**
* Searches the attribute subset space by best first search
*
* @param ASEvaluator the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @exception Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
m_totalEvals = 0;
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = data.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
m_numAttribs = data.numAttributes();
int i, j;
int best_size = 0;
int size = 0;
int done;
int sd = m_searchDirection;
int evals = 0;
BitSet best_group, temp_group;
int stale;
double best_merit;
boolean ok = true;
double merit;
boolean z;
boolean added;
Link2 tl;
Hashtable lookup = new Hashtable((int)(200.0*m_numAttribs*1.5));
LinkedList2 bfList = new LinkedList2(m_maxStale);
best_merit = -Double.MAX_VALUE;
stale = 0;
best_group = new BitSet(m_numAttribs);
m_startRange.setUpper(m_numAttribs-1);
if (!(getStartSet().equals(""))) {
m_starting = m_startRange.getSelection();
}
// If a starting subset has been supplied, then initialise the bitset
if (m_starting != null) {
for (i = 0; i < m_starting.length; i++) {
if ((m_starting[i]) != m_classIndex) {
best_group.set(m_starting[i]);
}
}
best_size = m_starting.length;
m_totalEvals++;
}
else {
if (m_searchDirection == SELECTION_BACKWARD) {
setStartSet("1-last");
m_starting = new int[m_numAttribs];
// init initial subset to all attributes
for (i = 0, j = 0; i < m_numAttribs; i++) {
if (i != m_classIndex) {
best_group.set(i);
m_starting[j++] = i;
}
}
best_size = m_numAttribs - 1;
m_totalEvals++;
}
}
// evaluate the initial subset
best_merit = ASEvaluator.evaluateSubset(best_group);
// add the initial group to the list and the hash table
bfList.addToList(best_group, best_merit);
BitSet tt = (BitSet)best_group.clone();
lookup.put(tt, "");
while (stale < m_maxStale) {
added = false;
if (m_searchDirection == SELECTION_BIDIRECTIONAL) {
// bi-directional search
done = 2;
sd = SELECTION_FORWARD;
}
else {
done = 1;
}
// finished search?
if (bfList.size() == 0) {
stale = m_maxStale;
break;
}
// copy the attribute set at the head of the list
tl = bfList.getLinkAt(0);
temp_group = (BitSet)(tl.getGroup().clone());
// remove the head of the list
bfList.removeLinkAt(0);
// count the number of bits set (attributes)
int kk;
for (kk = 0, size = 0; kk < m_numAttribs; kk++) {
if (temp_group.get(kk)) {
size++;
}
}
do {
for (i = 0; i < m_numAttribs; i++) {
if (sd == SELECTION_FORWARD) {
z = ((i != m_classIndex) && (!temp_group.get(i)));
}
else {
z = ((i != m_classIndex) && (temp_group.get(i)));
}
if (z) {
// set the bit (attribute to add/delete)
if (sd == SELECTION_FORWARD) {
temp_group.set(i);
}
else {
temp_group.clear(i);
}
/* if this subset has been seen before, then it is already
in the list (or has been fully expanded) */
tt = (BitSet)temp_group.clone();
if (lookup.containsKey(tt) == false) {
merit = ASEvaluator.evaluateSubset(temp_group);
m_totalEvals++;
if (m_debug) {
System.out.print("Group: ");
printGroup(tt, m_numAttribs);
System.out.println("Merit: " + merit);
}
// is this better than the best?
if (sd == SELECTION_FORWARD) {
z = ((merit - best_merit) > 0.00001);
}
else {
z = ((merit >= best_merit) && ((size) < best_size));
}
if (z) {
added = true;
stale = 0;
best_merit = merit;
best_size = (size + best_size);
best_group = (BitSet)(temp_group.clone());
}
// insert this one in the list and in the hash table
bfList.addToList(tt, merit);
lookup.put(tt, "");
}
// unset this addition(deletion)
if (sd == SELECTION_FORWARD) {
temp_group.clear(i);
}
else {
temp_group.set(i);
}
}
}
if (done == 2) {
sd = SELECTION_BACKWARD;
}
done--;
} while (done > 0);
/* if we haven't added a new attribute subset then full expansion
of this node hasen't resulted in anything better */
if (!added) {
stale++;
}
}
m_bestMerit = best_merit;
return attributeList(best_group);
}
/**
* Reset options to default values
*/
protected void resetOptions () {
m_maxStale = 5;
m_searchDirection = SELECTION_FORWARD;
m_starting = null;
m_startRange = new Range();
m_classIndex = -1;
m_totalEvals = 0;
m_debug = false;
}
/**
* converts a BitSet into a list of attribute indexes
* @param group the BitSet to convert
* @return an array of attribute indexes
**/
private int[] attributeList (BitSet group) {
int count = 0;
// count how many were selected
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
count++;
}
}
int[] list = new int[count];
count = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
list[count++] = i;
}
}
return list;
}
}