/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.clustering; import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.GenerateNewMDRule; import com.rapidminer.operator.ports.metadata.PassThroughRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeInt; /** * Creates a flat cluster model from a hierarchical one by expanding nodes in the order of their distance until the desired number of clusters is * reached. * * @author Sebastian Land */ public class FlattenClusterModel extends Operator { public static final String PARAMETER_NUMBER_OF_CLUSTER = "number_of_clusters"; public static final String PARAMETER_REMOVE_UNLABELED = "remove_unlabeled"; public static final String PARAMETER_ADD_AS_LABEL = "add_as_label"; private InputPort hierarchicalInput = getInputPorts().createPort("hierarchical", HierarchicalClusterModel.class); private InputPort exampleSetInput = getInputPorts().createPort("example set", ExampleSet.class); private OutputPort flatOutput = getOutputPorts().createPort("flat"); private OutputPort exampleSetOutput = getOutputPorts().createPort("example set"); public FlattenClusterModel(OperatorDescription description) { super(description); getTransformer().addRule(new GenerateNewMDRule(flatOutput, ClusterModel.class)); getTransformer().addRule(new PassThroughRule(exampleSetInput, exampleSetOutput, true)); } @Override public void doWork() throws OperatorException { HierarchicalClusterModel hierarchicalModel = hierarchicalInput.getData(); ExampleSet exampleSet = exampleSetInput.getData(); flatOutput.deliver(flatten(hierarchicalModel, exampleSet)); } public ClusterModel flatten(HierarchicalClusterModel model, ExampleSet exampleSet) throws OperatorException { HierarchicalClusterNode root = model.getRootNode(); int numberOfClusters = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTER); // creating priorityQueue using reversing comparator PriorityQueue<HierarchicalClusterNode> queue = new PriorityQueue<HierarchicalClusterNode>(numberOfClusters, new Comparator<HierarchicalClusterNode>() { public int compare(HierarchicalClusterNode o1, HierarchicalClusterNode o2) { int value = -1 * Double.compare(o1.getDistance(), o2.getDistance()); if (value != 0) return value; else return -1 * Double.compare(o1.getNumberOfExamplesInSubtree(), o2.getNumberOfExamplesInSubtree()); } }); // Iteratively descend within graph by splitting at greatest node until queue is full or enough leafs are collected LinkedList<HierarchicalClusterNode> leafs = new LinkedList<HierarchicalClusterNode>(); queue.add(root); while (queue.size() < numberOfClusters - leafs.size()) { HierarchicalClusterNode topNode = queue.poll(); if (topNode.getSubNodes().size() > 0) queue.addAll(topNode.getSubNodes()); else leafs.add(topNode); } queue.addAll(leafs); // construct flat cluster model from nodes ClusterModel flatModel = new ClusterModel(exampleSet, numberOfClusters, getParameterAsBoolean(PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(PARAMETER_REMOVE_UNLABELED)); int i = 0; for (HierarchicalClusterNode node: queue) { Cluster flatCluster = flatModel.getCluster(i); for (Object exampleId: node.getExampleIdsInSubtree()) { flatCluster.assignExample(exampleId); } i++; } // delivering adapted example set if (exampleSetOutput.isConnected()) { exampleSetOutput.deliver(flatModel.apply((ExampleSet)exampleSet.clone())); } return flatModel; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_NUMBER_OF_CLUSTER, "Specifies how many flat clusters should be created.", 1, Integer.MAX_VALUE, 3); type.setExpert(false); types.add(type); type = new ParameterTypeBoolean(PARAMETER_ADD_AS_LABEL, "Should the cluster values be added as label.", false); type.setExpert(false); types.add(type); type = new ParameterTypeBoolean(PARAMETER_REMOVE_UNLABELED, "Delete the unlabeled examples.", false); type.setExpert(false); types.add(type); return types; } }