/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.validation.clustering; import java.util.List; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.InputDescription; import com.rapidminer.operator.MissingIOObjectException; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.learner.clustering.ClusterModel; import com.rapidminer.operator.learner.clustering.ClustererPreconditions; import com.rapidminer.operator.learner.clustering.FlatClusterModel; import com.rapidminer.operator.performance.EstimatedPerformance; import com.rapidminer.operator.performance.PerformanceCriterion; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.operator.similarity.SimilarityMeasure; import com.rapidminer.tools.IterationArrayList; /** * This operator is used to evaluate a flat cluster model based on diverse density measures. Currently, only the avg. within cluster similarity/distance (depending on the type of SimilarityMeasure input object used) is supported. * * @author Michael Wurst, Ingo Mierswa * @version $Id: ClusterDensityEvaluator.java,v 1.6 2008/07/07 07:06:45 ingomierswa Exp $ * */ public class ClusterDensityEvaluator extends Operator { private double avgClusterSim = 0.0; /** * Constructor for ClusterDensityEvaluator. */ public ClusterDensityEvaluator(OperatorDescription description) { super(description); addValue(new ValueDouble("clusterdensity", "Avg. within cluster similarity/distance", false) { public double getDoubleValue() { return avgClusterSim; } }); } public InputDescription getInputDescription(Class cls) { if (SimilarityMeasure.class.isAssignableFrom(cls)) { return new InputDescription(cls, false, true); } if (ClusterModel.class.isAssignableFrom(cls)) { return new InputDescription(cls, false, true); } return super.getInputDescription(cls); } public Class<?>[] getInputClasses() { return new Class[] { ClusterModel.class, SimilarityMeasure.class }; } public Class<?>[] getOutputClasses() { return new Class[] { PerformanceVector.class }; } public IOObject[] apply() throws OperatorException { SimilarityMeasure sim = getInput(SimilarityMeasure.class); ClusterModel clusterModel = getInput(ClusterModel.class); if (!(clusterModel instanceof FlatClusterModel)) { throw new UserError(this, 122, "flat cluster model"); } FlatClusterModel cm = (FlatClusterModel)clusterModel; ClustererPreconditions.hasClusters(cm); ClustererPreconditions.isNonEmpty(cm); PerformanceVector performance = null; try { performance = getInput(PerformanceVector.class); } catch (MissingIOObjectException e) { // If no performance vector is available create a new one } if (performance == null) performance = new PerformanceVector(); double[] avgWithinClusterSims = withinClusterAvgSim(cm, sim); avgClusterSim = avgWithinClusterSims[cm.getNumberOfClusters()]; PerformanceCriterion withinClusterSim = null; if (sim.isDistance()) withinClusterSim = new EstimatedPerformance("Avg. within cluster distance", avgClusterSim, 1, true); else withinClusterSim = new EstimatedPerformance("Avg. within cluster similarity", avgClusterSim, 1, false); performance.addCriterion(withinClusterSim); for (int i = 0; i < cm.getNumberOfClusters(); i++) { PerformanceCriterion withinSingleClusterSim = null; if (sim.isDistance()) withinSingleClusterSim = new EstimatedPerformance("Avg. within cluster distance for cluster " + cm.getClusterAt(i).getId(), avgWithinClusterSims[i], 1, true); else withinSingleClusterSim = new EstimatedPerformance("Avg. within cluster similarity for cluster " + cm.getClusterAt(i).getId(), avgWithinClusterSims[i], 1, false); performance.addCriterion(withinSingleClusterSim); } return new IOObject[] { performance }; } private double[] withinClusterAvgSim(FlatClusterModel cm, SimilarityMeasure sim) { double sum = 0.0; int count = 0; double[] result = new double[cm.getNumberOfClusters() + 1]; for (int i = 0; i < cm.getNumberOfClusters(); i++) { List<String> objs = new IterationArrayList<String>(cm.getClusterAt(i).getObjects()); double sumForCluster = 0; int countForCluster = 0; for (int j = 0; j < objs.size(); j++) { String d1 = objs.get(j); for (int k = j; k < objs.size(); k++) { String d2 = objs.get(k); if (sim.isSimilarityDefined(d1, d2)) { double v = sim.similarity(d1, d2); sum = sum + v; sumForCluster = sumForCluster + v; count++; countForCluster++; } } } result[i] = sumForCluster / countForCluster; } result[cm.getNumberOfClusters()] = sum / count; return result; } }