/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.clustering; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.TreeSet; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.MissingIOObjectException; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.performance.EstimatedPerformance; import com.rapidminer.operator.performance.PerformanceCriterion; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.WeightedObject; /** * Creates a flat cluster model from a hierarchical one by expanding nodes in the order of their weight until the desired number of clusters is * reached. * * @author Michael Wurst, Ingo Mierswa * @version $Id: FlattenClusterModel.java,v 1.10 2008/09/12 10:30:42 tobiasmalbrecht Exp $ */ public class FlattenClusterModel extends Operator { /** The parameter name for "the maximal number of clusters" */ public static final String PARAMETER_K = "k"; /** The parameter name for "return the highest cluster similarity as performance" */ public static final String PARAMETER_PERFORMANCE = "performance"; public FlattenClusterModel(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { ClusterModel cm = getInput(ClusterModel.class); if (!(cm instanceof HierarchicalClusterModel)) { throw new UserError(this, 122, "hierarchical cluster model"); } HierarchicalClusterModel hcm = (HierarchicalClusterModel)cm; FlatCrispClusterModel result = new FlatCrispClusterModel(); int maxK = getParameterAsInt(PARAMETER_K); TreeSet<WeightedObject<ClusterNode>> cns = new TreeSet<WeightedObject<ClusterNode>>(new Comparator<WeightedObject<ClusterNode>>() { public int compare(WeightedObject<ClusterNode> arg0, WeightedObject<ClusterNode> arg1) { WeightedObject<ClusterNode> obj = arg0; WeightedObject<ClusterNode> objToCompare = arg1; if (obj.getWeight() > objToCompare.getWeight()) return 1; else if (obj.getWeight() < objToCompare.getWeight()) return -1; else return (obj.getObject()).getId().compareTo((objToCompare.getObject()).getId()); } }); cns.add(new WeightedObject<ClusterNode>(hcm.getRootNode(), hcm.getRootNode().getWeight())); boolean terminate = false; while ((cns.size() < maxK) && (!terminate)) { WeightedObject<ClusterNode> wobj = cns.first(); ClusterNode cn = wobj.getObject(); // Find lowest node with children Iterator<WeightedObject<ClusterNode>> clIt = cns.iterator(); while (clIt.hasNext() && (!(cn.getNumberOfSubNodes() == 0))) { wobj = clIt.next(); cn = wobj.getObject(); } if (cn.getNumberOfSubNodes() > 0) { cns.remove(wobj); Iterator<ClusterNode> subNodes = cn.getSubNodes(); while (subNodes.hasNext()) { ClusterNode cn2 = subNodes.next(); cns.add(new WeightedObject<ClusterNode>(cn2, cn2.getWeight())); } } else { terminate = true; } } Iterator<WeightedObject<ClusterNode>> clusterIt = cns.iterator(); double minSimilarity = Double.POSITIVE_INFINITY; while (clusterIt.hasNext()) { ClusterNode cn = clusterIt.next().getObject(); if (cn.getWeight() < minSimilarity) minSimilarity = cn.getWeight(); DefaultCluster cl = new DefaultCluster(cn.getId()); Iterator<String> itemIterator = cn.getObjectsInSubtree(); while (itemIterator.hasNext()) cl.addObject(itemIterator.next()); result.addCluster(cl); } if (getParameterAsBoolean(PARAMETER_PERFORMANCE)) { PerformanceVector performance = null; try { performance = getInput(PerformanceVector.class); } catch (MissingIOObjectException e) { // If no performance vector is available, create a new one } if (performance == null) performance = new PerformanceVector(); PerformanceCriterion pc = new EstimatedPerformance("Maximal distance withing a cluster", 1 - (1 / minSimilarity), 1, false); performance.addCriterion(pc); return new IOObject[] { result, performance }; } else { return new IOObject[] { result }; } } public Class<?>[] getInputClasses() { return new Class[] { ClusterModel.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ClusterModel.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeInt(PARAMETER_K, "the maximal number of clusters", 2, Integer.MAX_VALUE, 2)); types.add(new ParameterTypeBoolean(PARAMETER_PERFORMANCE, "return the highest cluster similarity as performance", false)); return types; } }