RankSearch.java example

Explorer
TimeSeriesClassification-master
- TimeSeriesClassification
  - src
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RankSearch.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;

/** 
 <!-- globalinfo-start -->
 * RankSearch : <br/>
 * <br/>
 * Uses an attribute/subset evaluator to rank all attributes. If a subset evaluator is specified, 
 * then a forward selection search is used to generate a ranked list. From the 
 * ranked list of attributes, subsets of increasing size are evaluated, ie. 
 * The best attribute, the best attribute plus the next best attribute, etc.... 
 * The best attribute set is reported. RankSearch is linear in the number of 
 * attributes if a simple attribute evaluator is used such as GainRatioAttributeEval. 
 * For more information see:<br/>
 * <br/>
 * Mark Hall, Geoffrey Holmes (2003). Benchmarking attribute selection techniques 
 * for discrete class data mining. IEEE Transactions on Knowledge and Data Engineering. 15(6):1437-1447.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -A <attribute evaluator>
 *  class name of attribute evaluator to use for ranking. Place any
 *  evaluator options LAST on the command line following a "--".
 *  eg.:
 *   -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
 *  (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
 * 
 * <pre> -S <step size>
 *  number of attributes to be added from the
 *  ranking in each iteration (default = 1).</pre>
 * 
 * <pre> -R <start point>
 *  point in the ranking to start evaluating from. 
 *  (default = 0, ie. the head of the ranking).</pre>
 * 
 * <pre> 
 * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
 * </pre>
 * 
 * <pre> -M
 *  treat missing values as a seperate value.</pre>
 * 
 <!-- options-end -->
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 6253 $
 */
public class RankSearch 
  extends ASSearch 
  implements OptionHandler, TechnicalInformationHandler {
  
  /** for serialization */
  static final long serialVersionUID = -7992268736874353755L;

  /** does the data have a class */
  private boolean m_hasClass;
 
  /** holds the class index */
  private int m_classIndex;
 
  /** number of attributes in the data */
  private int m_numAttribs;

  /** the best subset found */
  private BitSet m_best_group;

  /** the attribute evaluator to use for generating the ranking */
  private ASEvaluation m_ASEval;

  /** the subset evaluator with which to evaluate the ranking */
  private ASEvaluation m_SubsetEval;

  /** the training instances */
  private Instances m_Instances;

  /** the merit of the best subset found */
  private double m_bestMerit;

  /** will hold the attribute ranking */
  private int [] m_Ranking;

  /** add this many attributes in each iteration from the ranking */
  protected int m_add = 1;

  /** start from this point in the ranking */
  protected int m_startPoint = 0;

  /**
   * Returns a string describing this search method
   * @return a description of the search method suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "RankSearch : \n\n"
      +"Uses an attribute/subset evaluator to rank all attributes. "
      +"If a subset evaluator is specified, then a forward selection "
      +"search is used to generate a ranked list. From the ranked "
      +"list of attributes, subsets of increasing size are evaluated, ie. "
      +"The best attribute, the best attribute plus the next best attribute, "
      +"etc.... The best attribute set is reported. RankSearch is linear in "
      +"the number of attributes if a simple attribute evaluator is used "
      +"such as GainRatioAttributeEval. For more information see:\n\n"
      + getTechnicalInformation().toString();
  }
  
  /**
   * Returns an instance of a TechnicalInformation object, containing 
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation        result;
    
    result = new TechnicalInformation(Type.ARTICLE);
    result.setValue(Field.AUTHOR, "Mark Hall and Geoffrey Holmes");
    result.setValue(Field.YEAR, "2003");
    result.setValue(Field.TITLE, "Benchmarking attribute selection techniques for " +
    		"discrete class data mining");
    result.setValue(Field.JOURNAL, "IEEE Transactions on Knowledge and Data Engineering");
    result.setValue(Field.VOLUME, "15");
    result.setValue(Field.NUMBER, "6");
    result.setValue(Field.PAGES, "1437-1447");
    result.setValue(Field.PUBLISHER, "IEEE Computer Society");
    
    return result;
  }

  /**
   * Constructor
   */
  public RankSearch () {
    resetOptions();
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String attributeEvaluatorTipText() {
    return "Attribute evaluator to use for generating a ranking.";    
  }

  /**
   * Set the attribute evaluator to use for generating the ranking.
   * @param newEvaluator the attribute evaluator to use.
   */
  public void setAttributeEvaluator(ASEvaluation newEvaluator) {
    m_ASEval = newEvaluator;
  }

  /**
   * Get the attribute evaluator used to generate the ranking.
   * @return the evaluator used to generate the ranking.
   */
  public ASEvaluation getAttributeEvaluator() {
    return m_ASEval;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String stepSizeTipText() {
    return "Add this many attributes from the ranking in each iteration.";
  }

  /**
   * Set the number of attributes to add from the rankining
   * in each iteration
   * @param ss the number of attribes to add.
   */
  public void setStepSize(int ss) {
    if (ss > 0) {
      m_add = ss;
    }
  }

  /**
   * Get the number of attributes to add from the rankining
   * in each iteration
   * @return the number of attributes to add.
   */
  public int getStepSize() {
    return m_add;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String startPointTipText() {
    return "Start evaluating from this point in the ranking.";
  }

  /**
   * Set the point at which to start evaluating the ranking
   * @param sp the position in the ranking to start at
   */
  public void setStartPoint(int sp) {
    if (sp >= 0) {
      m_startPoint = sp;
    }
  }

  /**
   * Get the point at which to start evaluating the ranking
   * @return the position in the ranking to start at
   */
  public int getStartPoint() {
    return m_startPoint;
  }

  /**
   * Returns an enumeration describing the available options.
   * @return an enumeration of all the available options.
   **/
  public Enumeration listOptions () {
    Vector newVector = new Vector(4);
    
    newVector.addElement(new Option(
        "\tclass name of attribute evaluator to use for ranking. Place any\n" 
        + "\tevaluator options LAST on the command line following a \"--\".\n" 
        + "\teg.:\n"
        + "\t\t-A weka.attributeSelection.GainRatioAttributeEval ... -- -M\n"
        + "\t(default: weka.attributeSelection.GainRatioAttributeEval)", 
        "A", 1, "-A <attribute evaluator>"));
    
    newVector.addElement(new Option(
        "\tnumber of attributes to be added from the"
        +"\n\tranking in each iteration (default = 1).", 
        "S", 1,"-S <step size>"));
    
    newVector.addElement(new Option(
        "\tpoint in the ranking to start evaluating from. "
        +"\n\t(default = 0, ie. the head of the ranking).", 
        "R", 1,"-R <start point>"));

    if ((m_ASEval != null) && 
        (m_ASEval instanceof OptionHandler)) {
      newVector.addElement(new Option("", "", 0, "\nOptions specific to " 
                                      + "evaluator " 
                                      + m_ASEval.getClass().getName() 
                                      + ":"));
      Enumeration enu = ((OptionHandler)m_ASEval).listOptions();

      while (enu.hasMoreElements()) {
        newVector.addElement(enu.nextElement());
      }
    }

    return newVector.elements();
  }


  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -A <attribute evaluator>
   *  class name of attribute evaluator to use for ranking. Place any
   *  evaluator options LAST on the command line following a "--".
   *  eg.:
   *   -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
   *  (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
   * 
   * <pre> -S <step size>
   *  number of attributes to be added from the
   *  ranking in each iteration (default = 1).</pre>
   * 
   * <pre> -R <start point>
   *  point in the ranking to start evaluating from. 
   *  (default = 0, ie. the head of the ranking).</pre>
   * 
   * <pre> 
   * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
   * </pre>
   * 
   * <pre> -M
   *  treat missing values as a seperate value.</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions (String[] options)
    throws Exception {
    String optionString;
    resetOptions();

    optionString = Utils.getOption('S', options);
    if (optionString.length() != 0) {
      setStepSize(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setStartPoint(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('A', options);
    if (optionString.length() == 0)
      optionString = GainRatioAttributeEval.class.getName();
    setAttributeEvaluator(ASEvaluation.forName(optionString, 
                                     Utils.partitionOptions(options)));
  }

  /**
   * Gets the current settings of WrapperSubsetEval.
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    String[] evaluatorOptions = new String[0];

    if ((m_ASEval != null) && 
        (m_ASEval instanceof OptionHandler)) {
      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
    }

    String[] options = new String[8 + evaluatorOptions.length];
    int current = 0;

    options[current++] = "-S"; options[current++] = ""+getStepSize();

    options[current++] = "-R"; options[current++] = ""+getStartPoint();

    if (getAttributeEvaluator() != null) {
      options[current++] = "-A";
      options[current++] = getAttributeEvaluator().getClass().getName();
    }

    if (evaluatorOptions.length > 0) {
      options[current++] = "--";
      System.arraycopy(evaluatorOptions, 0, options, current, 
          evaluatorOptions.length);
      current += evaluatorOptions.length;
    }

    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }

  /**
   * Reset the search method.
   */
  protected void resetOptions () {
    m_ASEval = new GainRatioAttributeEval();
    m_Ranking = null;
  }

  /**
   * Ranks attributes using the specified attribute evaluator and then
   * searches the ranking using the supplied subset evaluator.
   *
   * @param ASEval the subset evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search (ASEvaluation ASEval, Instances data)
    throws Exception {
    
    double best_merit = -Double.MAX_VALUE;
    double temp_merit;
    BitSet temp_group, best_group=null;
    
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw  new Exception(ASEval.getClass().getName() 
                           + " is not a " 
                           + "Subset evaluator!");
    }

    m_SubsetEval = ASEval;
    m_Instances = data;
    m_numAttribs = m_Instances.numAttributes();

    /*    if (m_ASEval instanceof AttributeTransformer) {
      throw new Exception("Can't use an attribute transformer "
                          +"with RankSearch");
                          } */
    if (m_ASEval instanceof UnsupervisedAttributeEvaluator || 
        m_ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
      /*      if (!(m_SubsetEval instanceof UnsupervisedSubsetEvaluator)) {
        throw new Exception("Must use an unsupervised subset evaluator.");
        } */
    }
    else {
      m_hasClass = true;
      m_classIndex = m_Instances.classIndex();
    }

    if (m_ASEval instanceof AttributeEvaluator) {
      // generate the attribute ranking first
      Ranker ranker = new Ranker();
      m_ASEval.buildEvaluator(m_Instances);
      if (m_ASEval instanceof AttributeTransformer) {
        // get the transformed data a rebuild the subset evaluator
        m_Instances = ((AttributeTransformer)m_ASEval).
          transformedData(m_Instances);
        ((ASEvaluation)m_SubsetEval).buildEvaluator(m_Instances);
      }
      m_Ranking = ranker.search(m_ASEval, m_Instances);
    } else {
      GreedyStepwise fs = new GreedyStepwise();
      double [][]rankres; 
      fs.setGenerateRanking(true);
      ((ASEvaluation)m_ASEval).buildEvaluator(m_Instances);
      fs.search(m_ASEval, m_Instances);
      rankres = fs.rankedAttributes();
      m_Ranking = new int[rankres.length];
      for (int i=0;i<rankres.length;i++) {
        m_Ranking[i] = (int)rankres[i][0];
      }
    }

    // now evaluate the attribute ranking
    for (int i=m_startPoint;i<m_Ranking.length;i+=m_add) {
      temp_group = new BitSet(m_numAttribs);
      for (int j=0;j<=i;j++) {
        temp_group.set(m_Ranking[j]);
      }
      temp_merit = ((SubsetEvaluator)m_SubsetEval).evaluateSubset(temp_group);

      if (temp_merit > best_merit) {
        best_merit = temp_merit;;
        best_group = temp_group;
      }
    }
    m_bestMerit = best_merit;
    return attributeList(best_group);
  }
    
  /**
   * converts a BitSet into a list of attribute indexes 
   * @param group the BitSet to convert
   * @return an array of attribute indexes
   **/
  private int[] attributeList (BitSet group) {
    int count = 0;
    
    // count how many were selected
    for (int i = 0; i < m_numAttribs; i++) {
      if (group.get(i)) {
        count++;
      }
    }

    int[] list = new int[count];
    count = 0;

    for (int i = 0; i < m_numAttribs; i++) {
      if (group.get(i)) {
        list[count++] = i;
      }
    }

    return  list;
  }

   /**
   * returns a description of the search as a String
   * @return a description of the search
   */
  public String toString () {
    StringBuffer text = new StringBuffer();
    text.append("\tRankSearch :\n");
    text.append("\tAttribute evaluator : "
                + getAttributeEvaluator().getClass().getName() +" ");
    if (m_ASEval instanceof OptionHandler) {
      String[] evaluatorOptions = new String[0];
      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
      for (int i=0;i<evaluatorOptions.length;i++) {
        text.append(evaluatorOptions[i]+' ');
      }
    }
    text.append("\n");
    text.append("\tAttribute ranking : \n");
    int rlength = (int)(Math.log(m_Ranking.length) / Math.log(10) + 1);
    for (int i=0;i<m_Ranking.length;i++) {
      text.append("\t "+Utils.doubleToString((double)(m_Ranking[i]+1),
                                             rlength,0)
                  +" "+m_Instances.attribute(m_Ranking[i]).name()+'\n');
    }
    text.append("\tMerit of best subset found : ");
    int fieldwidth = 3;
    double precision = (m_bestMerit - (int)m_bestMerit);
    if (Math.abs(m_bestMerit) > 0) {
      fieldwidth = (int)Math.abs((Math.log(Math.abs(m_bestMerit)) / Math.log(10)))+2;
    }
    if (Math.abs(precision) > 0) {
      precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3;
    } else {
      precision = 2;
    }

    text.append(Utils.doubleToString(Math.abs(m_bestMerit),
                                     fieldwidth+(int)precision,
                                     (int)precision)+"\n");
    return text.toString();
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 6253 $");
  }
}