/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.clustering.characterization; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.learner.clustering.FlatClusterModel; import com.rapidminer.operator.learner.clustering.IdUtils; import com.rapidminer.operator.learner.clustering.MutableCluster; import com.rapidminer.operator.learner.clustering.clusterer.AbstractClustering; import com.rapidminer.tools.Ontology; /** * This is the abstract superclass for all cluster characterizers * which uses some classification model as a base for its * characterization. * * @author Michael Wurst, Ingo Mierswa * @version $Id: AbstractModelBasedCharacterizer.java,v 1.6 2008/09/12 10:32:22 tobiasmalbrecht Exp $ */ public abstract class AbstractModelBasedCharacterizer { /** * Train the model for a single cluster. * * @param es the example set * @return a model */ public abstract Model trainModel(ExampleSet es); /** * Extract a string representation well suited for a tooltip. * * @param m * the model * @return a String */ public abstract String stringRepresentation(Model m, String desiredLabel); public void addCharacterization(FlatClusterModel cm, ExampleSet es) throws OperatorException { Attribute originalLabel = es.getAttributes().getLabel(); Attribute clusterLabel = AttributeFactory.createAttribute("label", Ontology.NOMINAL); es.getExampleTable().addAttribute(clusterLabel); es.getAttributes().setLabel(clusterLabel); for (int i = 0; i < cm.getNumberOfClusters(); i++) { MutableCluster cl = (MutableCluster) cm.getClusterAt(i); if (!AbstractClustering.NOISE_CLUSTER_DESCRIPTION.equals(cl.getDescription())) { for (Example e : es) { String id = IdUtils.getIdFromExample(e); if (cl.contains(id)) { e.setLabel(clusterLabel.getMapping().mapString("yes")); } else { e.setLabel(clusterLabel.getMapping().mapString("no")); } } Model characteristicModel = trainModel(es); cl.setDescription(stringRepresentation(characteristicModel, "yes")); } } es.getExampleTable().removeAttribute(clusterLabel); es.getAttributes().remove(clusterLabel); if (originalLabel != null) es.getAttributes().setLabel(originalLabel); } }