package de.tud.inf.operator.mm; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.tools.Ontology; import de.tud.inf.operator.mm.util.ClusteringInfo; import de.tud.inf.operator.mm.util.MetaConfig; import de.tud.inf.operator.mm.util.SortingIndex; /** * This class implements the Quality strategy. * * {@link http://www.siam.org/proceedings/datamining/2008/dm08_71_fern.pdf} * * @version $Revision$ * @author Andre Jaehnig */ public class QualitySelector extends AbstractSelector { /************************************************************************************************ * FIELDS ***********************************************************************************************/ /** Column name with the selected flag. */ private static final String QUALITY_COLUMN_NAME_SELECTED = "quality_selected"; /** Column name with the order of the selection. */ private static final String QUALITY_COLUMN_NAME_ORDER = "quality_order"; /************************************************************************************************ * GETTER & SETTER ***********************************************************************************************/ /* * none */ /************************************************************************************************ * CONSTRUCTOR ***********************************************************************************************/ /** * Constructor. * * @param description */ public QualitySelector(OperatorDescription description) { super(description); } /************************************************************************************************ * PUBLIC METHODS ***********************************************************************************************/ /* * (non-Javadoc) * * @see com.rapidminer.operator.Operator#apply() */ @Override public IOObject[] apply() throws OperatorException { // get example set ExampleSet exampleSet = this.getInput(ExampleSet.class); int exampleSetSize = exampleSet.size(); this.logNote("Input example-set has " + exampleSetSize + " elements."); // get parameters String metaFileName = this.getParameterAsString(PARAMETER_META_FILENAME); String selectorFileName = this.getParameterAsString(PARAMETER_SELECTOR_FILENAME); MetaConfig mc = MetaConfig.load(metaFileName); String snmiColumnName = mc.getSnmiColumnName(); int sampleSize = this.getParameterAsInt(PARAMETER_SAMPLE_SIZE); if (sampleSize < 1 || sampleSize > exampleSetSize) { throw new UserError(this, 116, new Object[] { PARAMETER_SAMPLE_SIZE, sampleSize }); } this.logNote("Requested clustering sample size: " + sampleSize); // create attributes for the selection flag and for a general order of selection Attribute qualityOrderAttr = AttributeFactory.createAttribute(QUALITY_COLUMN_NAME_ORDER, Ontology.INTEGER); Attribute qualitySelectedAttr = AttributeFactory.createAttribute(QUALITY_COLUMN_NAME_SELECTED, Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(qualityOrderAttr); exampleSet.getExampleTable().addAttribute(qualitySelectedAttr); // add attribute to view exampleSet.getAttributes().setSpecialAttribute(qualityOrderAttr, QUALITY_COLUMN_NAME_ORDER); exampleSet.getAttributes().setSpecialAttribute(qualitySelectedAttr, QUALITY_COLUMN_NAME_SELECTED); // get a sorted iterator over the snmi-column of the nmi-csv-file Attribute snmiAttr = exampleSet.getAttributes().get(snmiColumnName); if (snmiAttr == null) { throw new UserError(this, 111, snmiColumnName); } List<SortingIndex> sortedIndex = new ArrayList<SortingIndex>(exampleSetSize); int counter = 0; Iterator<Example> it = exampleSet.iterator(); Example example = null; while (it.hasNext()) { example = it.next(); sortedIndex.add(new SortingIndex(Double.valueOf(example.getNumericalValue(snmiAttr)), counter)); counter++; } Collections.sort(sortedIndex); // fill selection attributes counter = 0; Iterator<SortingIndex> sortedIt = sortedIndex.iterator(); int index, orderIndex; while (sortedIt.hasNext()) { index = sortedIt.next().getIndex(); orderIndex = exampleSetSize - counter - 1; example = exampleSet.getExample(index); // set the order attribute example.setValue(qualityOrderAttr, orderIndex); // set the selection attribute if (orderIndex < sampleSize) { example.setValue(qualitySelectedAttr, "true"); } else { example.setValue(qualitySelectedAttr, "false"); } counter++; } // write meta config mc.setSelectorFileName(selectorFileName); ClusteringInfo ci = new ClusteringInfo(); ci.setInfoColumnName(QUALITY_COLUMN_NAME_ORDER); ci.setSelectedColumnName(QUALITY_COLUMN_NAME_SELECTED); ci.setSampleSize(sampleSize); mc.getClusteringInfo().put(this.getClass().getName(), ci); mc.save(metaFileName); return new IOObject[] { exampleSet }; } /************************************************************************************************ * PRIVATE METHODS ***********************************************************************************************/ /* * none */ }