/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.validation.clustering.itemdistribution;
import java.util.List;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.InputDescription;
import com.rapidminer.operator.MissingIOObjectException;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.learner.clustering.FlatClusterModel;
import com.rapidminer.operator.performance.EstimatedPerformance;
import com.rapidminer.operator.performance.PerformanceCriterion;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.tools.ClassNameMapper;
/**
* Evaluates flat cluster models on how well the items are distributed over the clusters.
*
* @author Michael Wurst
* @version $Id: ItemDistributionEvaluator.java,v 1.8 2008/09/12 10:30:06 tobiasmalbrecht Exp $
*
*/
public class ItemDistributionEvaluator extends Operator {
public static final String PARAMETER_MEASURE = "measure";
private final static String[] DEFAULT_MEASURES = { "com.rapidminer.operator.validation.clustering.itemdistribution.SumOfSquares", "com.rapidminer.operator.validation.clustering.itemdistribution.GiniCoefficient" };
private ClassNameMapper MEASURE_MAP;
private double itemDistribution = 0;
/**
* Constructor for ClusterNumberEvaluator.
*/
public ItemDistributionEvaluator(OperatorDescription description) {
super(description);
addValue(new ValueDouble("item_distribution", "The distribution of items over clusters.", false) {
public double getDoubleValue() {
return itemDistribution;
}
});
}
public InputDescription getInputDescription(Class cls) {
if (FlatClusterModel.class.isAssignableFrom(cls)) {
return new InputDescription(cls, true, true);
}
return super.getInputDescription(cls);
}
public IOObject[] apply() throws OperatorException {
FlatClusterModel model = getInput(FlatClusterModel.class);
ItemDistributionMeasure distrMeasure = (ItemDistributionMeasure) MEASURE_MAP.getInstantiation(getParameterAsString(PARAMETER_MEASURE));
int totalNumberOfItems = 0;
int[] count = new int[model.getNumberOfClusters()];
for (int i = 0; i < model.getNumberOfClusters(); i++) {
int numItemsInCluster = model.getClusterAt(i).getNumberOfObjects();
totalNumberOfItems = totalNumberOfItems + numItemsInCluster;
count[i] = numItemsInCluster;
}
PerformanceVector performance = null;
try {
performance = getInput(PerformanceVector.class);
} catch (MissingIOObjectException e) {
// If no performance vector is available create a new one
}
if (performance == null)
performance = new PerformanceVector();
itemDistribution = distrMeasure.evaluate(count, totalNumberOfItems);
PerformanceCriterion pc = new EstimatedPerformance("Item distribution", itemDistribution, 1, false);
performance.addCriterion(pc);
return new IOObject[] { performance };
}
public Class<?>[] getInputClasses() {
return new Class[] { FlatClusterModel.class };
}
public Class<?>[] getOutputClasses() {
return new Class[] { PerformanceVector.class };
}
public List<ParameterType> getParameterTypes() {
MEASURE_MAP = new ClassNameMapper(DEFAULT_MEASURES);
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeStringCategory(PARAMETER_MEASURE, "the item distribution measure to apply", MEASURE_MAP.getShortClassNames());
type.setExpert(false);
types.add(type);
return types;
}
}