package de.tud.inf.operator.mm;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.tools.Ontology;
import de.tud.inf.operator.mm.util.ClusteringInfo;
import de.tud.inf.operator.mm.util.MetaConfig;
import de.tud.inf.operator.mm.util.SortingIndex;
/**
* This class implements the Diversity strategy.
*
* {@link http://www.siam.org/proceedings/datamining/2008/dm08_71_fern.pdf}
*
* @version $Revision$
* @author Andre Jaehnig
*/
public class DiversitySelector extends AbstractSelector {
/************************************************************************************************
* FIELDS
***********************************************************************************************/
/** Column name with the selected flag. */
private static final String DIVERSITY_COLUMN_NAME_SELECTED = "diversity_selected";
/** Column name with the order of the selection. */
private static final String DIVERSITY_COLUMN_NAME_ORDER = "diversity_order";
/************************************************************************************************
* GETTER & SETTER
***********************************************************************************************/
/*
* none
*/
/************************************************************************************************
* CONSTRUCTOR
***********************************************************************************************/
/**
* Constructor.
*
* @param description
*/
public DiversitySelector(OperatorDescription description) {
super(description);
}
/************************************************************************************************
* PUBLIC METHODS
***********************************************************************************************/
/*
* (non-Javadoc)
*
* @see com.rapidminer.operator.Operator#apply()
*/
@Override
public IOObject[] apply() throws OperatorException {
// get example set
ExampleSet exampleSet = this.getInput(ExampleSet.class);
int exampleSetSize = exampleSet.size();
this.logNote("Input example-set has " + exampleSetSize + " elements.");
// get parameters
String metaFileName = this.getParameterAsString(PARAMETER_META_FILENAME);
String selectorFileName = this.getParameterAsString(PARAMETER_SELECTOR_FILENAME);
MetaConfig mc = MetaConfig.load(metaFileName);
String snmiColumnName = mc.getSnmiColumnName();
String clusterColumnPrefix = mc.getClusteringColumnPrefix();
int sampleSize = this.getParameterAsInt(PARAMETER_SAMPLE_SIZE);
if (sampleSize < 1 || sampleSize > exampleSetSize) {
throw new UserError(this, 116, new Object[] { PARAMETER_SAMPLE_SIZE, sampleSize });
}
this.logNote("Requested clustering sample size: " + sampleSize);
// create attributes for the selection flag and for a general order of selection
Attribute diversityOrderAttr = AttributeFactory.createAttribute(DIVERSITY_COLUMN_NAME_ORDER, Ontology.INTEGER);
Attribute diversitySelectedAttr = AttributeFactory.createAttribute(DIVERSITY_COLUMN_NAME_SELECTED,
Ontology.NOMINAL);
exampleSet.getExampleTable().addAttribute(diversityOrderAttr);
exampleSet.getExampleTable().addAttribute(diversitySelectedAttr);
// add attributes to view
exampleSet.getAttributes().setSpecialAttribute(diversityOrderAttr, DIVERSITY_COLUMN_NAME_ORDER);
exampleSet.getAttributes().setSpecialAttribute(diversitySelectedAttr, DIVERSITY_COLUMN_NAME_SELECTED);
// add dummy attribute-column (only to the table)
Attribute workingAttr = AttributeFactory.createAttribute(WORKING_COLUMN_NAME, Ontology.REAL);
exampleSet.getExampleTable().addAttribute(workingAttr);
// get a sorted iterator over the snmi-column of the nmi-csv-file
Attribute snmiAttr = exampleSet.getAttributes().get(snmiColumnName);
if (snmiAttr == null) {
throw new UserError(this, 111, snmiColumnName);
}
List<SortingIndex> sortedIndex = new ArrayList<SortingIndex>(exampleSetSize);
int counter = 0;
Iterator<Example> it = exampleSet.iterator();
Example example = null;
while (it.hasNext()) {
example = it.next();
sortedIndex.add(new SortingIndex(Double.valueOf(example.getNumericalValue(snmiAttr)), counter));
counter++;
}
Collections.sort(sortedIndex);
// setting first selected element-index (the one with the highest snmi)
int selectedIndex = sortedIndex.get(exampleSetSize - 1).getIndex();
// fill selection attribute
int order = 0;
int workingColumnPos = 0;
Example selectedExample = null;
DataRow curWorkingRow = null;
double oldValue, addValue;
for (int i = 0; i < exampleSetSize; i++) {
selectedExample = exampleSet.getExample(selectedIndex);
// set ordering-index
selectedExample.setValue(diversityOrderAttr, order);
// set selected flag
if (order < sampleSize) {
selectedExample.setValue(diversitySelectedAttr, "true");
}
else {
selectedExample.setValue(diversitySelectedAttr, "false");
}
/*
* copy each nmi-value of this row to the working column (the 1st value of this row to the
* 1st value of the working-column, the 2nd value of this row to the 2nd value of the
* working-column, ...)
*/
workingColumnPos = 0;
for (Attribute attr : exampleSet.getAttributes()) {
if (!attr.getName().startsWith(clusterColumnPrefix)) {
// not relevant
continue;
}
curWorkingRow = exampleSet.getExampleTable().getDataRow(workingColumnPos);
if (workingColumnPos == selectedIndex) {
// this value is not relevant anymore
curWorkingRow.set(workingAttr, Double.POSITIVE_INFINITY);
}
else {
// add nmi-value to working column
oldValue = curWorkingRow.get(workingAttr);
addValue = selectedExample.getNumericalValue(attr);
curWorkingRow.set(workingAttr, oldValue + addValue);
}
workingColumnPos++;
}
// find minimum of the working column ..
sortedIndex = new ArrayList<SortingIndex>(exampleSetSize);
counter = 0;
it = exampleSet.iterator();
while (it.hasNext()) {
example = it.next();
sortedIndex.add(new SortingIndex(Double.valueOf(example.getNumericalValue(workingAttr)), counter));
counter++;
}
Collections.sort(sortedIndex);
// .. and set new selected index for the next iteration
selectedIndex = sortedIndex.get(0).getIndex();
order++;
}
// remove working attribute
exampleSet.getExampleTable().removeAttribute(workingAttr);
// write meta config
mc.setSelectorFileName(selectorFileName);
ClusteringInfo ci = new ClusteringInfo();
ci.setInfoColumnName(DIVERSITY_COLUMN_NAME_ORDER);
ci.setSelectedColumnName(DIVERSITY_COLUMN_NAME_SELECTED);
ci.setSampleSize(sampleSize);
mc.getClusteringInfo().put(this.getClass().getName(), ci);
mc.save(metaFileName);
return new IOObject[] { exampleSet };
}
/************************************************************************************************
* PRIVATE METHODS
***********************************************************************************************/
/*
* none
*/
}