JointCriterionSelector.java example

Explorer
ComplexRapidMiner-master
- operator
- src

package de.tud.inf.operator.mm;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.tools.Ontology;

import de.tud.inf.operator.mm.util.ClusteringInfo;
import de.tud.inf.operator.mm.util.MetaConfig;
import de.tud.inf.operator.mm.util.SortingIndex;


/**
 * This class implements the Joint-Criterion strategy.
 * 
 * {@link http://www.siam.org/proceedings/datamining/2008/dm08_71_fern.pdf}
 * 
 * @version $Revision$
 * @author Andre Jaehnig
 */
public class JointCriterionSelector extends AbstractSelector {

   /************************************************************************************************
    * FIELDS
    ***********************************************************************************************/

   /** Column name with the indices of the selected order. */
   private static final String JOINT_CRITERION_COLUMN_NAME_ORDER = "jc_order";

   /** Column name with the selected flag. */
   private static final String JOINT_CRITERION_COLUMN_NAME_SELECTED = "jc_selected";

   /** Weighting-factor for the calculation. */
   private static final double ALPHA = 0.5d;

   /************************************************************************************************
    * GETTER & SETTER
    ***********************************************************************************************/

   /*
    * none
    */

   /************************************************************************************************
    * CONSTRUCTOR
    ***********************************************************************************************/

   /**
    * Constructor.
    * 
    * @param description
    */
   public JointCriterionSelector(OperatorDescription description) {
      super(description);
   }

   /************************************************************************************************
    * PUBLIC METHODS
    ***********************************************************************************************/

   /*
    * (non-Javadoc)
    * 
    * @see com.rapidminer.operator.Operator#apply()
    */
   @Override
   public IOObject[] apply() throws OperatorException {
      // get example set
      ExampleSet exampleSet = this.getInput(ExampleSet.class);
      int exampleSetSize = exampleSet.size();
      this.logNote("Input example-set has " + exampleSetSize + " elements.");

      // get parameters
      String metaFileName = this.getParameterAsString(PARAMETER_META_FILENAME);
      String selectorFileName = this.getParameterAsString(PARAMETER_SELECTOR_FILENAME);
      MetaConfig mc = MetaConfig.load(metaFileName);
      String snmiColumnName = mc.getSnmiColumnName();
      String clusterColumnPrefix = mc.getClusteringColumnPrefix();
      
      int sampleSize = this.getParameterAsInt(PARAMETER_SAMPLE_SIZE);
      if (sampleSize < 1 || sampleSize > exampleSetSize) {
         throw new UserError(this, 116, new Object[] { PARAMETER_SAMPLE_SIZE, sampleSize });
      }
      this.logNote("Requested clustering sample size: " + sampleSize);

      // create attributes for the selection flag and for a general order of selection
      Attribute jcOrderAttr = AttributeFactory.createAttribute(JOINT_CRITERION_COLUMN_NAME_ORDER, Ontology.INTEGER);
      Attribute jcSelectedAttr = AttributeFactory.createAttribute(JOINT_CRITERION_COLUMN_NAME_SELECTED,
            Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(jcOrderAttr);
      exampleSet.getExampleTable().addAttribute(jcSelectedAttr);

      // add attribute to view
      exampleSet.getAttributes().setSpecialAttribute(jcOrderAttr, JOINT_CRITERION_COLUMN_NAME_ORDER);
      exampleSet.getAttributes().setSpecialAttribute(jcSelectedAttr, JOINT_CRITERION_COLUMN_NAME_SELECTED);

      // add dummy attribute-column (only to the table)
      Attribute workingSNMIAttr = AttributeFactory.createAttribute(WORKING_COLUMN_NAME + "1", Ontology.REAL);
      Attribute workingNMIAttr = AttributeFactory.createAttribute(WORKING_COLUMN_NAME + "2", Ontology.REAL);
      Attribute workingSumAttr = AttributeFactory.createAttribute(WORKING_COLUMN_NAME + "3", Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(workingSNMIAttr);
      exampleSet.getExampleTable().addAttribute(workingNMIAttr);
      exampleSet.getExampleTable().addAttribute(workingSumAttr);

      // get a sorted iterator over the snmi-column of the nmi-csv-file
      Attribute snmiAttr = exampleSet.getAttributes().get(snmiColumnName);
      if (snmiAttr == null) {
         throw new UserError(this, 111, snmiColumnName);
      }
      List<SortingIndex> sortedIndex = new ArrayList<SortingIndex>(exampleSetSize);
      int counter = 0;
      Iterator<Example> it = exampleSet.iterator();
      Example example = null;
      while (it.hasNext()) {
         example = it.next();
         sortedIndex.add(new SortingIndex(Double.valueOf(example.getNumericalValue(snmiAttr)), counter));
         counter++;
      }
      Collections.sort(sortedIndex);

      // setting first selected element-index (the one with the highest snmi)
      int selectedIndex = sortedIndex.get(exampleSetSize - 1).getIndex();

      // a list with all (so far) selected indices
      List<Integer> selectedIndices = new LinkedList<Integer>();

      // fill selection attributes
      int order = 0;
      double sumSNMIs = 0.0d;
      double sumNMIs = 0.0d;
      double curSNMI, curNMI, newSumSNMIs, newSumNMIs;
      DataRow selectedRow = null;
      DataRow curRow = null;
      for (int i = 0; i < exampleSetSize; i++) {
         for (int j = 0; j < exampleSetSize; j++) {
            if (selectedIndices.contains(j)) {
               // not relevant anymore
               continue;
            }

            curRow = exampleSet.getExampleTable().getDataRow(j);
            curSNMI = curRow.get(snmiAttr);

            curNMI = 0.0d;
            for (Attribute attr : exampleSet.getAttributes()) {
               if (!attr.getName().startsWith(clusterColumnPrefix)) {
                  // not relevant
                  continue;
               }
               if (selectedIndices.contains(Integer.valueOf(attr.getName().substring(clusterColumnPrefix.length())))) {
                  curNMI += 1.0d - curRow.get(attr);
               }
            }

            newSumSNMIs = sumSNMIs + curSNMI;
            newSumNMIs = sumNMIs + curNMI;
            curRow.set(workingSNMIAttr, newSumSNMIs);
            curRow.set(workingNMIAttr, newSumNMIs);
            curRow.set(workingSumAttr, ALPHA * newSumSNMIs + (1.0d - ALPHA) * newSumNMIs);
         }

         // find maximum
         sortedIndex = new ArrayList<SortingIndex>(exampleSetSize);
         counter = 0;
         it = exampleSet.iterator();
         while (it.hasNext()) {
            example = it.next();
            sortedIndex.add(new SortingIndex(Double.valueOf(example.getNumericalValue(workingSumAttr)), counter));
            counter++;
         }
         Collections.sort(sortedIndex);

         // set new selected index
         selectedIndex = sortedIndex.get(exampleSetSize - 1).getIndex();
         selectedIndices.add(selectedIndex);

         // get new basic values for the calculation
         selectedRow = exampleSet.getExampleTable().getDataRow(selectedIndex);
         sumSNMIs = selectedRow.get(workingSNMIAttr);
         sumNMIs = selectedRow.get(workingNMIAttr);

         // reset working values of this selected index so that they are no longer relevant
         selectedRow.set(workingSNMIAttr, Double.NEGATIVE_INFINITY);
         selectedRow.set(workingNMIAttr, Double.NEGATIVE_INFINITY);
         selectedRow.set(workingSumAttr, Double.NEGATIVE_INFINITY);

         // set ordering-index
         selectedRow.set(jcOrderAttr, order);

         // set selected flag
         if (order < sampleSize) {
            exampleSet.getExample(selectedIndex).setValue(jcSelectedAttr, "true");
         }
         else {
            exampleSet.getExample(selectedIndex).setValue(jcSelectedAttr, "false");
         }

         order++;
      }

      // remove working attributes
      exampleSet.getExampleTable().removeAttribute(workingSNMIAttr);
      exampleSet.getExampleTable().removeAttribute(workingNMIAttr);
      exampleSet.getExampleTable().removeAttribute(workingSumAttr);
      
      // write meta config
      mc.setSelectorFileName(selectorFileName);
      ClusteringInfo ci = new ClusteringInfo();
      ci.setInfoColumnName(JOINT_CRITERION_COLUMN_NAME_ORDER);
      ci.setSelectedColumnName(JOINT_CRITERION_COLUMN_NAME_SELECTED);
      ci.setSampleSize(sampleSize);
      mc.getClusteringInfo().put(this.getClass().getName(), ci);
      mc.save(metaFileName);

      return new IOObject[] { exampleSet };
   }

   /************************************************************************************************
    * PRIVATE METHODS
    ***********************************************************************************************/

   /*
    * none
    */

}