/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.clustering; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.example.set.Condition; import com.rapidminer.example.set.ConditionedExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.InputDescription; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.tools.Ontology; /** * Labels an example set with the cluster ids from a given cluster model. * * @author Michael Wurst, Ingo Mierswa * @version $Id: FlatClusterModel2ExampleSet.java,v 1.12 2008/09/12 10:30:43 tobiasmalbrecht Exp $ */ public class FlatClusterModel2ExampleSet extends Operator { /** The parameter name for "should the cluster values be added as label as well" */ public static final String PARAMETER_ADD_LABEL = "add_label"; /** The parameter name for "delete the unlabelled examples" */ public static final String PARAMETER_DELETE_UNLABELED = "delete_unlabeled"; public FlatClusterModel2ExampleSet(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { ExampleSet es = getInput(ExampleSet.class); Tools.checkAndCreateIds(es); ClusterModel clusterModel = getInput(ClusterModel.class); if(clusterModel instanceof HierarchicalClusterModel) { clusterModel = new FlattendClusterModel((HierarchicalClusterModel)clusterModel); } FlatClusterModel cm = (FlatClusterModel)clusterModel; boolean labelAttribute = getParameterAsBoolean(PARAMETER_ADD_LABEL); String[] uniqueLabels = new String[cm.getNumberOfClusters()]; Map<String, Integer> duplicatesCounter = new HashMap<String, Integer>(); for (int i = 0; i < cm.getNumberOfClusters(); i++) { String description = cm.getClusterAt(i).getDescription(); if (description.length() == 0) description = "cl"; if (duplicatesCounter.get(description) == null) { duplicatesCounter.put(description, 1); uniqueLabels[i] = description; } else { int count = duplicatesCounter.get(description); String baseDescription = (description + "_" + count); String newDescription = baseDescription; int secondaryCounter = 0; while (duplicatesCounter.get(newDescription) != null) { newDescription = baseDescription + "_" + secondaryCounter; secondaryCounter++; } uniqueLabels[i] = newDescription; if (newDescription.equals(baseDescription)) { duplicatesCounter.put(description, count + 1); } else { duplicatesCounter.put(newDescription, 1); } } } Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); Attribute label = AttributeFactory.createAttribute("label", Ontology.NOMINAL); es.getExampleTable().addAttribute(cluster); es.getAttributes().setCluster(cluster); if (labelAttribute) { es.getExampleTable().addAttribute(label); es.getAttributes().setLabel(label); } Iterator<Example> r = es.iterator(); int numExamples = 0; while (r.hasNext()) { Example e = r.next(); int index = getBestIndex(cm, IdUtils.getIdFromExample(e)); log("Index of id " + IdUtils.getIdFromExample(e) + ":" + index); if (index >= 0) { e.setValue(cluster, cluster.getMapping().mapString(uniqueLabels[index])); } else { e.setValue(cluster, Double.NaN); } if (labelAttribute) { if (index >= 0) { e.setLabel(label.getMapping().mapString(uniqueLabels[index])); numExamples++; } else { e.setLabel(Double.NaN); } } } if (getParameterAsBoolean(PARAMETER_DELETE_UNLABELED)) { return new IOObject[] { new ConditionedExampleSet(es, new Condition() { private static final long serialVersionUID = -305063040412493813L; public boolean conditionOk(Example example) { return !Double.isNaN(example.getLabel()); } /** * @deprecated Conditions should not be able to be changed dynamically and hence there is no need for a copy */ @Deprecated public Condition duplicate() { return this; } }) }; } else { return new IOObject[] { es}; } } private int getBestIndex(FlatClusterModel cm, String id) { int result = -1; for (int i = 0; (i < cm.getNumberOfClusters()) && (result < 0); i++) { if (cm.getClusterAt(i).contains(id)) { result = i; } } return result; } public InputDescription getInputDescription(Class cls) { if (ClusterModel.class.isAssignableFrom(cls)) { return new InputDescription(cls, true, true); } else { return super.getInputDescription(cls); } } public Class<?>[] getInputClasses() { return new Class[] { ClusterModel.class, ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeBoolean(PARAMETER_ADD_LABEL, "should the cluster values be added as label as well", false)); types.add(new ParameterTypeBoolean(PARAMETER_DELETE_UNLABELED, "delete the unlabelled examples", false)); return types; } }