package de.tud.inf.operator.mm; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorCreationException; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorService; import com.rapidminer.tools.att.AttributeSet; import de.tud.inf.operator.mm.util.ClusteringInfo; import de.tud.inf.operator.mm.util.MetaConfig; /** * This class implements the Convex-Hull strategy. * * {@link http://www.siam.org/proceedings/datamining/2008/dm08_71_fern.pdf} * * @version $Revision$ * @author Andre Jaehnig */ public class ConvexHullSelector extends AbstractSelector { /************************************************************************************************ * FIELDS ***********************************************************************************************/ /** Column name for the indicator which clustering is selected. */ private static final String CH_COLUMN_NAME_SELECTED = "ch_selected"; /************************************************************************************************ * GETTER & SETTER ***********************************************************************************************/ /* * none */ /************************************************************************************************ * CONSTRUCTOR ***********************************************************************************************/ /** * Constructor. * * @param description */ public ConvexHullSelector(OperatorDescription description) { super(description); } /************************************************************************************************ * PUBLIC METHODS ***********************************************************************************************/ /* * (non-Javadoc) * * @see com.rapidminer.operator.Operator#apply() */ @Override public IOObject[] apply() throws OperatorException { // get example set ExampleSet exampleSet = this.getInput(ExampleSet.class); int exampleSetSize = exampleSet.size(); this.logNote("Input example-set has " + exampleSetSize + " elements."); // get parameters String metaFileName = this.getParameterAsString(PARAMETER_META_FILENAME); String selectorFileName = this.getParameterAsString(PARAMETER_SELECTOR_FILENAME); MetaConfig mc = MetaConfig.load(metaFileName); String snmiColumnName = mc.getSnmiColumnName(); String clusterColumnPrefix = mc.getClusteringColumnPrefix(); // create attribute for the order of clusterings Attribute chSelectedAttr = AttributeFactory.createAttribute(CH_COLUMN_NAME_SELECTED, Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(chSelectedAttr); // add attribute to view exampleSet.getAttributes().setSpecialAttribute(chSelectedAttr, CH_COLUMN_NAME_SELECTED); /* * create new example set with the points for the convex hull computation */ // create attributes for each clustering AttributeSet attributeSet = new AttributeSet(); Attribute attrCi = AttributeFactory.createAttribute("Ci", Ontology.STRING); Attribute attrCj = AttributeFactory.createAttribute("Cj", Ontology.STRING); Attribute attrNMI = AttributeFactory.createAttribute("NMI", Ontology.REAL); Attribute attrAvgSNMI = AttributeFactory.createAttribute("AvgSNMI", Ontology.REAL); attributeSet.setSpecialAttribute("Ci", attrCi); attributeSet.setSpecialAttribute("Cj", attrCj); attributeSet.addAttribute(attrNMI); attributeSet.addAttribute(attrAvgSNMI); // get snmi-column-attribute of the nmi-csv-file Attribute snmiAttr = exampleSet.getAttributes().get(snmiColumnName); if (snmiAttr == null) { throw new UserError(this, 111, snmiColumnName); } // create table for the output MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes()); DataRowFactory drf = new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.'); // create convex hull calculator example set ExampleSet convexHullExampleSet = table.createExampleSet(attributeSet); Object[] values = new Object[4]; int counter; int rowNum = 0; Example exampleI; Example exampleJ; Example example; double snmiI, snmiJ, nmi; DataRow row = null; for (int i = 0; i < exampleSetSize; i++) { for (int j = 0; j < exampleSetSize; j++) { if (j >= i) { break; } counter = 0; // get NMI and SNMI values exampleI = exampleSet.getExample(i); exampleJ = exampleSet.getExample(j); snmiI = exampleI.getValue(snmiAttr); snmiJ = exampleJ.getValue(snmiAttr); nmi = exampleI.getValue(exampleSet.getAttributes().get(clusterColumnPrefix + j)); values[counter++] = nmi; values[counter++] = (snmiI + snmiJ) / 2.0d; // add values to the table row = drf.create(values, table.getAttributes()); table.addDataRow(row); // add special values example = convexHullExampleSet.getExample(rowNum++); example.setValue(attrCi, clusterColumnPrefix + i); example.setValue(attrCj, clusterColumnPrefix + j); } } // run convex hull calculator Operator chCalculator = null; try { chCalculator = OperatorService.createOperator(ConvexHullCalculator.class); } catch (OperatorCreationException e) { throw new UserError(this, 109); } chCalculator.apply(new IOContainer(convexHullExampleSet)); // extract those clustering names/ids that are part of the convex hull Attribute selectedAttr = convexHullExampleSet.getAttributes().getSpecial( ConvexHullCalculator.CONVEX_HULL_MEMBER_COLUMN_NAME); Iterator<Example> it = convexHullExampleSet.iterator(); Set<Integer> selectedClusteringIds = new HashSet<Integer>(); String clusteringName1, clusteringName2; while (it.hasNext()) { example = it.next(); if (example.getNominalValue(selectedAttr).equalsIgnoreCase("true")) { clusteringName1 = example.getNominalValue(attrCi); clusteringName2 = example.getNominalValue(attrCj); selectedClusteringIds.add(Integer.valueOf(clusteringName1.substring(clusterColumnPrefix.length()))); selectedClusteringIds.add(Integer.valueOf(clusteringName2.substring(clusterColumnPrefix.length()))); } } this.logNote("Sample size will be: " + selectedClusteringIds.size()); // mark the selected clusterings at the the output example set it = exampleSet.iterator(); counter = 0; while (it.hasNext()) { example = it.next(); if (selectedClusteringIds.contains(counter)) { // selected example.setValue(chSelectedAttr, "true"); } else { // not selected example.setValue(chSelectedAttr, "false"); } counter++; } // write meta config mc.setSelectorFileName(selectorFileName); ClusteringInfo ci = new ClusteringInfo(); ci.setSelectedColumnName(CH_COLUMN_NAME_SELECTED); ci.setSampleSize(selectedClusteringIds.size()); mc.getClusteringInfo().put(this.getClass().getName(), ci); mc.save(metaFileName); return new IOObject[] { exampleSet }; } /************************************************************************************************ * PRIVATE METHODS ***********************************************************************************************/ /* * none */ }