RandomProjection.java example

Explorer
weka-master
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RandomProjection.java
 *    Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance; 
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/** 
 <!-- globalinfo-start -->
 * Reduces the dimensionality of the data by projecting it onto a lower dimensional subspace using a random matrix with columns of unit length (i.e. It will reduce the number of attributes in the data while preserving much of its variation like PCA, but at a much less computational cost).<br/>
 * It first applies the  NominalToBinary filter to convert all attributes to numeric before reducing the dimension. It preserves the class attribute.<br/>
 * <br/>
 * For more information, see:<br/>
 * <br/>
 * Dmitriy Fradkin, David Madigan: Experiments with random projections for machine learning. In: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining, New York, NY, USA, 517-522, 003.
 * <p/>
 <!-- globalinfo-end -->
 * 
 <!-- technical-bibtex-start -->
 * BibTeX:
 * <pre>
 * @inproceedings{Fradkin003,
 *    address = {New York, NY, USA},
 *    author = {Dmitriy Fradkin and David Madigan},
 *    booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining},
 *    pages = {517-522},
 *    publisher = {ACM Press},
 *    title = {Experiments with random projections for machine learning},
 *    year = {003}
 * }
 * </pre>
 * <p/>
 <!-- technical-bibtex-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -N <number>
 *  The number of dimensions (attributes) the data should be reduced to
 *  (default 10; exclusive of the class attribute, if it is set).</pre>
 * 
 * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN]
 *  The distribution to use for calculating the random matrix.
 *  Sparse1 is:
 *    sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}
 *  Sparse2 is:
 *    {-1 with prob(1/2), +1 with prob(1/2)}
 * </pre>
 * 
 * <pre> -P <percent>
 *  The percentage of dimensions (attributes) the data should
 *  be reduced to (exclusive of the class attribute, if it is set). This -N
 *  option is ignored if this option is present or is greater
 *  than zero.</pre>
 * 
 * <pre> -M
 *  Replace missing values using the ReplaceMissingValues filter</pre>
 * 
 * <pre> -R <num>
 *  The random seed for the random number generator used for
 *  calculating the random matrix (default 42).</pre>
 * 
 <!-- options-end -->
 *
 * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) 
 * @version $Revision$ [1.0 - 22 July 2003 - Initial version (Ashraf M. Kibriya)]
 */
public class RandomProjection 
  extends Filter 
  implements UnsupervisedFilter, OptionHandler, TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = 4428905532728645880L;

  /** Stores the number of dimensions to reduce the data to */
  protected int m_k = 10;

  /** Stores the dimensionality the data should be reduced to as percentage of the original dimension */
  protected double m_percent = 0.0;

  /** Is the random matrix will be computed using 
      Gaussian distribution or not */
  protected boolean m_useGaussian = false;

  /** distribution type: sparse 1 */
  public static final int SPARSE1 = 1;
  /** distribution type: sparse 2 */
  public static final int SPARSE2 = 2;
  /** distribution type: gaussian */
  public static final int GAUSSIAN = 3;

  /** The types of distributions that can be used for 
  calculating the random matrix */
  public static final Tag [] TAGS_DSTRS_TYPE = {
    new Tag(SPARSE1, "Sparse 1"),
    new Tag(SPARSE2, "Sparse 2"),
    new Tag(GAUSSIAN, "Gaussian"),
  };

  /** Stores the distribution to use for calculating the
      random matrix */
  protected int m_distribution = SPARSE1;
 
  /** Should the missing values be replaced using 
      unsupervised.ReplaceMissingValues filter */
  protected boolean m_useReplaceMissing = false;

  /** Keeps track of output format if it is defined or not */
  protected boolean m_OutputFormatDefined = false;

  /** The NominalToBinary filter applied to the data before this filter */
  protected Filter m_ntob; // = new weka.filters.unsupervised.attribute.NominalToBinary();

  /** The ReplaceMissingValues filter */
  protected Filter m_replaceMissing;
    
  /** Stores the random seed used to generate the random matrix */
  protected long m_rndmSeed = 42;

  /** The random matrix */
  protected double m_rmatrix[][];

  /** The random number generator used for generating the random matrix */
  protected Random m_random;

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(2);

    newVector.addElement(new Option(
	      "\tThe number of dimensions (attributes) the data should be reduced to\n"
             +"\t(default 10; exclusive of the class attribute, if it is set).",
	      "N", 1, "-N <number>"));

    newVector.addElement(new Option(
	      "\tThe distribution to use for calculating the random matrix.\n"
	     +"\tSparse1 is:\n"
	     +"\t  sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}\n"
	     +"\tSparse2 is:\n"
	     +"\t  {-1 with prob(1/2), +1 with prob(1/2)}\n",
	      "D", 1, "-D [SPARSE1|SPARSE2|GAUSSIAN]"));

    //newVector.addElement(new Option(
    //	      "\tUse Gaussian distribution for calculating the random matrix.",
    //	      "G", 0, "-G"));

    newVector.addElement(new Option(
	      "\tThe percentage of dimensions (attributes) the data should\n"
	     +"\tbe reduced to (exclusive of the class attribute, if it is set). This -N\n"
	     +"\toption is ignored if this option is present or is greater\n"
	     +"\tthan zero.",
	      "P", 1, "-P <percent>"));

    newVector.addElement(new Option(
	      "\tReplace missing values using the ReplaceMissingValues filter",
	      "M", 0, "-M"));

    newVector.addElement(new Option(
	      "\tThe random seed for the random number generator used for\n"
	     +"\tcalculating the random matrix (default 42).",
	      "R", 0, "-R <num>"));
 
    return newVector.elements();
  }

  /**
   * Parses a given list of options. <p/>
   * 
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -N <number>
   *  The number of dimensions (attributes) the data should be reduced to
   *  (default 10; exclusive of the class attribute, if it is set).</pre>
   * 
   * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN]
   *  The distribution to use for calculating the random matrix.
   *  Sparse1 is:
   *    sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}
   *  Sparse2 is:
   *    {-1 with prob(1/2), +1 with prob(1/2)}
   * </pre>
   * 
   * <pre> -P <percent>
   *  The percentage of dimensions (attributes) the data should
   *  be reduced to (exclusive of the class attribute, if it is set). This -N
   *  option is ignored if this option is present or is greater
   *  than zero.</pre>
   * 
   * <pre> -M
   *  Replace missing values using the ReplaceMissingValues filter</pre>
   * 
   * <pre> -R <num>
   *  The random seed for the random number generator used for
   *  calculating the random matrix (default 42).</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {


    String mString = Utils.getOption('P', options);
    if (mString.length() != 0) {
	setPercent((double) Double.parseDouble(mString)); //setNumberOfAttributes((int) Integer.parseInt(mString));
    } else {
        setPercent(0);
	mString = Utils.getOption('N', options);
	if (mString.length() != 0) 
	    setNumberOfAttributes(Integer.parseInt(mString));	    
	else	    
	    setNumberOfAttributes(10);
    }    
    
    mString = Utils.getOption('R', options);
    if(mString.length()!=0) {
	setRandomSeed( Long.parseLong(mString) );
    }

    mString = Utils.getOption('D', options);
    if(mString.length()!=0) {
	if(mString.equalsIgnoreCase("sparse1"))
	   setDistribution( new SelectedTag(SPARSE1, TAGS_DSTRS_TYPE) );
	else if(mString.equalsIgnoreCase("sparse2"))
	   setDistribution( new SelectedTag(SPARSE2, TAGS_DSTRS_TYPE) );
	else if(mString.equalsIgnoreCase("gaussian"))
	   setDistribution( new SelectedTag(GAUSSIAN, TAGS_DSTRS_TYPE) );	   
    }

    if(Utils.getFlag('M', options))
	setReplaceMissingValues(true);
    else
	setReplaceMissingValues(false);


   //if(Utils.getFlag('G', options))
   //    setUseGaussian(true);
   //else
   //    setUseGaussian(false);
    
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [10];
    int current = 0;

    //if (getUseGaussian()) {
    //  options[current++] = "-G";
    //}

    if (getReplaceMissingValues()) {
      options[current++] = "-M";
    }

    if (getPercent() == 0) {
      options[current++] = "-N";
      options[current++] = "" + getNumberOfAttributes();
    }
    else {
      options[current++] = "-P";
      options[current++] = "" + getPercent();
    }
    
    options[current++] = "-R";
    options[current++] = "" + getRandomSeed();
    
    SelectedTag t = getDistribution();
    options[current++] = "-D";
    options[current++] = ""+t.getSelectedTag().getReadable();


    while (current < options.length) {
      options[current++] = "";
    }

    return options;
  }
    
   
  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return "Reduces the dimensionality of the data by projecting"
	 + " it onto a lower dimensional subspace using a random"
	 + " matrix with columns of unit length (i.e. It will reduce"
	 + " the number of attributes in the data while preserving"
	 + " much of its variation like PCA, but at a much less"
	 + " computational cost).\n"
	 + "It first applies the  NominalToBinary filter to" 
	 + " convert all attributes to numeric before reducing the"
	 + " dimension. It preserves the class attribute.\n\n"
	 + "For more information, see:\n\n"
	 + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing 
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation 	result;
    
    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "Dmitriy Fradkin and David Madigan");
    result.setValue(Field.TITLE, "Experiments with random projections for machine learning");
    result.setValue(Field.BOOKTITLE, "KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining");
    result.setValue(Field.YEAR, "003");
    result.setValue(Field.PAGES, "517-522");
    result.setValue(Field.PUBLISHER, "ACM Press");
    result.setValue(Field.ADDRESS, "New York, NY, USA");
    
    return result;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numberOfAttributesTipText() {

    return "The number of dimensions (attributes) the data should"
         + " be reduced to.";
  }

  /** 
   * Sets the number of attributes (dimensions) the data should be reduced to
   * 
   * @param newAttNum the goal for the dimensions
   */
  public void setNumberOfAttributes(int newAttNum) {
      m_k = newAttNum;
  }
  
  /** 
   * Gets the current number of attributes (dimensionality) to which the data 
   * will be reduced to.
   *  
   * @return the number of dimensions
   */
  public int getNumberOfAttributes() {
      return m_k;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String percentTipText() {

      return  " The percentage of dimensions (attributes) the data should"
            + " be reduced to  (inclusive of the class attribute). This "
	    + " NumberOfAttributes option is ignored if this option is"
	    + " present or is greater than zero.";
  }

  /** 
   * Sets the percent the attributes (dimensions) of the data should be reduced to
   * 
   * @param newPercent the percentage of attributes
   */
  public void setPercent(double newPercent) {
      if(newPercent > 0)
	  newPercent /= 100;
      m_percent = newPercent;
  }

  /** 
   * Gets the percent the attributes (dimensions) of the data will be reduced to
   * 
   * @return the percentage of attributes
   */
  public double getPercent() {
      return m_percent * 100;
  }


  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String randomSeedTipText() {
      return  "The random seed used by the random"
	     +" number generator used for generating"
	     +" the random matrix ";
  }

  /** 
   * Sets the random seed of the random number generator
   * 
   * @param seed the random seed value
   */
  public void setRandomSeed(long seed) {
      m_rndmSeed = seed;
  }

  /** 
   * Gets the random seed of the random number generator
   * 
   * @return the random seed value
   */
  public long getRandomSeed() {
      return m_rndmSeed;
  }


  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String  distributionTipText() {
      return "The distribution to use for calculating the random matrix.\n"
	    +"Sparse1 is:\n"
	    +" sqrt(3) * { -1 with prob(1/6), \n"
	    +"               0 with prob(2/3),  \n"
            +"              +1 with prob(1/6) } \n"
	    +"Sparse2 is:\n"
	    +" { -1 with prob(1/2), \n"
	    +"   +1 with prob(1/2) } ";
      
  }
  /** 
   * Sets the distribution to use for calculating the random matrix
   * 
   * @param newDstr the distribution to use
   */
  public void setDistribution(SelectedTag newDstr) {

      if (newDstr.getTags() == TAGS_DSTRS_TYPE) {
	  m_distribution = newDstr.getSelectedTag().getID();
      }
  }

  /** 
   * Returns the current distribution that'll be used for calculating the 
   * random matrix
   * 
   * @return the current distribution
   */
  public SelectedTag getDistribution() {
      return new SelectedTag(m_distribution, TAGS_DSTRS_TYPE);
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String replaceMissingValuesTipText() {

    return "If set the filter uses weka.filters.unsupervised.attribute.ReplaceMissingValues"
	 + " to replace the missing values";
  }

  /** 
   * Sets either to use replace missing values filter or not
   * 
   * @param t if true then the replace missing values is used
   */
  public void setReplaceMissingValues(boolean t) {
      m_useReplaceMissing = t;
  }

  /** 
   * Gets the current setting for using ReplaceMissingValues filter
   * 
   * @return true if the replace missing values filter is used
   */
  public boolean getReplaceMissingValues() {
      return m_useReplaceMissing;
  }

  /** 
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enableAllAttributes();
    result.enable(Capability.MISSING_VALUES);
    
    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
    
    return result;
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input 
   * instance structure (any instances contained in the object are 
   * ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the input format can't be set 
   * successfully
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {      
    super.setInputFormat(instanceInfo);
    /*
    if (instanceInfo.classIndex() < 0) {
      throw new UnassignedClassException("No class has been assigned to the instances");
    }
    */
    
    for(int i=0; i<instanceInfo.numAttributes(); i++) {        
	if( i!=instanceInfo.classIndex() && instanceInfo.attribute(i).isNominal() ) {
            if(instanceInfo.classIndex()>=0)
                m_ntob = new weka.filters.supervised.attribute.NominalToBinary();
            else
                m_ntob = new weka.filters.unsupervised.attribute.NominalToBinary();
            
            break;
	}
    }

    //r.setSeed(m_rndmSeed); //in case the setRandomSeed() is not
                           //called we better set the seed to its 
                           //default value of 42.
    boolean temp=true;
    if(m_replaceMissing!=null) {
	m_replaceMissing = new weka.filters.unsupervised.attribute.ReplaceMissingValues();
	if(m_replaceMissing.setInputFormat(instanceInfo))
	    temp=true;
	else
	    temp=false;
    }
    
    if(m_ntob!=null) {
	if(m_ntob.setInputFormat(instanceInfo)) {
	    setOutputFormat();
	    return temp && true;
	}
	else { 
	    return false;
	}
    }
    else {
	setOutputFormat();
	return temp && true;
    }
  }

   
  /**
   * Input an instance for filtering.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be
   * collected with output().
   * @throws IllegalStateException if no input format has been set
   */
  public boolean input(Instance instance) throws Exception {

    Instance newInstance=null;

    if (getInputFormat()==null) {
	throw new IllegalStateException("No input instance format defined");
    }
    if(m_NewBatch) {
      resetQueue();
      //if(ntob!=null) 
      //	  ntob.m_NewBatch=true;
      m_NewBatch = false;
    }
    
    boolean replaceDone=false;
    if(m_replaceMissing!=null) {
	if(m_replaceMissing.input(instance)) {
	    if(m_OutputFormatDefined == false)
		setOutputFormat();
	    newInstance = m_replaceMissing.output();
	    replaceDone = true;
	}
	else
	    return false;;
    }

    if(m_ntob!=null) {
	if(replaceDone==false)
	    newInstance = instance;
	if(m_ntob.input(newInstance)) {
	    if(m_OutputFormatDefined == false) 
		setOutputFormat();
	    newInstance = m_ntob.output();
	    newInstance = convertInstance(newInstance);
	    push(newInstance);
	    return true;	
	}
	else {
	    return false;
	}
    }
    else {
	if(replaceDone==false)
	    newInstance = instance;
	newInstance = convertInstance(newInstance);
	push(newInstance);
	return true;
    }
  }


  /**
   * Signify that this batch of input to the filter is finished.
   *
   * @return true if there are instances pending output
   * @throws NullPointerException if no input structure has been defined,
   * @throws Exception if there was a problem finishing the batch.
   */
  public boolean batchFinished() throws Exception {
      if (getInputFormat() == null) {
	  throw new NullPointerException("No input instance format defined");
      }
      
      boolean conversionDone=false;
      if(m_replaceMissing!=null) {
	  if(m_replaceMissing.batchFinished()) {
	      Instance newInstance, instance;
	      
	      while((instance=m_replaceMissing.output())!=null) {
		  if(!m_OutputFormatDefined)
		      setOutputFormat();
		  if(m_ntob!=null) {
		      m_ntob.input(instance);
		  }
		  else {
		      newInstance = convertInstance(instance);
		      push(newInstance);
		  }
	      }

	      if(m_ntob!=null) {
		  if(m_ntob.batchFinished()) {
		      //Instance newInstance, instance;
		      while((instance=m_ntob.output())!=null) {
			  if(!m_OutputFormatDefined)
			      setOutputFormat();
			  newInstance = convertInstance(instance);
			  push(newInstance);
		      }
		      m_ntob = null;		      
		  }
	      }
	      m_replaceMissing = null;
	      conversionDone=true;
	  }
      }

      if(conversionDone==false && m_ntob!=null) {
	  if(m_ntob.batchFinished()) {
	      Instance newInstance, instance;
	      while((instance=m_ntob.output())!=null) {
		  if(!m_OutputFormatDefined)
		      setOutputFormat();
		  newInstance = convertInstance(instance);
		  push(newInstance);
	      }
	      m_ntob = null;
	  }
      }
      m_OutputFormatDefined=false;
      return super.batchFinished();
  }
    

  /** Sets the output format */  
  protected void setOutputFormat() {
      Instances currentFormat;
      if(m_ntob!=null) {
	  currentFormat = m_ntob.getOutputFormat();
      }
      else 
	  currentFormat = getInputFormat();
      
      if(m_percent>0)
	  { m_k = (int) ((getInputFormat().numAttributes()-1)*m_percent); 
	  // System.out.print("numAtts: "+currentFormat.numAttributes());
	  // System.out.print("percent: "+m_percent);
	  // System.out.print("percent*numAtts: "+(currentFormat.numAttributes()*m_percent));
	  // System.out.println("m_k: "+m_k);
	  }

      Instances newFormat;
      int newClassIndex=-1;
      FastVector attributes = new FastVector();
      for(int i=0; i<m_k; i++) {
	  attributes.addElement( new Attribute("K"+(i+1)) );
      }
      if(currentFormat.classIndex()!=-1)  {  //if classindex is set
	  //attributes.removeElementAt(attributes.size()-1);
	  attributes.addElement(currentFormat.attribute(currentFormat.classIndex()));
	  newClassIndex = attributes.size()-1;
      }

      newFormat = new Instances(currentFormat.relationName(), attributes, 0);
      if(newClassIndex!=-1)
	  newFormat.setClassIndex(newClassIndex);
      m_OutputFormatDefined=true;

      m_random = new Random();
      m_random.setSeed(m_rndmSeed);

      m_rmatrix = new double[m_k][currentFormat.numAttributes()];
      if(m_distribution==GAUSSIAN) {
	  for(int i=0; i<m_rmatrix.length; i++) 
	      for(int j=0; j<m_rmatrix[i].length; j++) 
		  m_rmatrix[i][j] = m_random.nextGaussian();
      }
      else {
	  boolean useDstrWithZero = (m_distribution==SPARSE1);
	  for(int i=0; i<m_rmatrix.length; i++) 
	      for(int j=0; j<m_rmatrix[i].length; j++) 
		  m_rmatrix[i][j] = rndmNum(useDstrWithZero);
      }

      setOutputFormat(newFormat);
  }

  /**
   * converts a single instance to the required format
   *
   * @param currentInstance     the instance to convert
   * @return                    the converted instance
   */
  protected Instance convertInstance(Instance currentInstance) {

      Instance newInstance;
      double vals[] = new double[getOutputFormat().numAttributes()];
      int classIndex = (m_ntob==null) ? getInputFormat().classIndex():m_ntob.getOutputFormat().classIndex();

      for(int i = 0; i < m_k; i++) {
        vals[i] = computeRandomProjection(i,classIndex,currentInstance);
      }
      if (classIndex != -1) {
        vals[m_k] = currentInstance.value(classIndex);
      }

      newInstance = new DenseInstance(currentInstance.weight(), vals);
      newInstance.setDataset(getOutputFormat());

      return newInstance;
  }


  /**
   * computes one random projection for a given instance (skip missing values)
   *
   * @param rpIndex     offset the new random projection attribute
   * @param classIndex  classIndex of the input instance
   * @param instance    the instance to convert
   * @return    the random sum
   */

  protected double computeRandomProjection(int rpIndex, int classIndex, Instance instance) {

    double sum = 0.0;
    for(int i = 0; i < instance.numValues(); i++) {
      int index = instance.index(i);
      if (index != classIndex) {
        double value = instance.valueSparse(i);
        if (!Utils.isMissingValue(value)) {
          sum += m_rmatrix[rpIndex][index] * value;
        }
      }
    }
    return sum;
  }

  private static final int weights[] = {1, 1, 4};
  private static final int vals[] = {-1, 1, 0};
  private static final int weights2[] = {1, 1};
  private static final int vals2[] = {-1, 1};
  private static final double sqrt3 = Math.sqrt(3);

  /**
   * returns a double x such that <br/>
   *      x = sqrt(3) * { -1 with prob. 1/6, 0 with prob. 2/3, 1 with prob. 1/6 }
   *      
   * @param useDstrWithZero
   * @return the generated number
   */
  protected double rndmNum(boolean useDstrWithZero) {
      if(useDstrWithZero)
	  return sqrt3 * vals[weightedDistribution(weights)];
      else
	  return vals2[weightedDistribution(weights2)];
  }

  /** 
   * Calculates a weighted distribution
   * 
   * @param weights the weights to use
   * @return
   */
  protected int weightedDistribution(int [] weights) {
      int sum=0; 
      
      for(int i=0; i<weights.length; i++) 
	  sum += weights[i];
      
      int val = (int)Math.floor(m_random.nextDouble()*sum);
      
      for(int i=0; i<weights.length; i++) {
	  val -= weights[i];
	  if(val<0)
	      return i;
      }
      return -1;
  }  
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision$");
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments to the filter: 
   * use -h for help
   */
  public static void main(String [] argv) {
    runFilter(new RandomProjection(), argv);
  }
}