/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.clustering.clusterer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.gui.ExampleVisualizer;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.clustering.DendogramHierarchicalClusterModel;
import com.rapidminer.operator.clustering.HierarchicalClusterLeafNode;
import com.rapidminer.operator.clustering.HierarchicalClusterModel;
import com.rapidminer.operator.clustering.HierarchicalClusterNode;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.DistanceMeasurePrecondition;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.tools.ObjectVisualizerService;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
import com.rapidminer.tools.math.similarity.DistanceMeasureHelper;
import com.rapidminer.tools.math.similarity.DistanceMeasures;
/**
* This operator implements agglomerative clustering, providing the three different strategies
* SingleLink, CompleteLink and AverageLink. The last is also called UPGMA. The result will be a
* hierarchical cluster model, providing distance information to plot as a dendogram.
*
* @author Sebastian Land
*/
public class AgglomerativeClustering extends Operator {
private InputPort exampleSetInput = getInputPorts().createPort("example set", new ExampleSetMetaData());
private OutputPort modelOutput = getOutputPorts().createPort("cluster model");
private OutputPort exampleSetOutput = getOutputPorts().createPort("example set");
private DistanceMeasureHelper measureHelper = new DistanceMeasureHelper(this);
public static final String PARAMETER_MODE = "mode";
public static final String[] modes = new String[] {
"SingleLink",
"CompleteLink",
"AverageLink"};
public AgglomerativeClustering(OperatorDescription description) {
super(description);
exampleSetInput.addPrecondition(new DistanceMeasurePrecondition(exampleSetInput, this));
getTransformer().addRule(new GenerateNewMDRule(modelOutput, new MetaData(HierarchicalClusterModel.class)));
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.EQUAL) {
@Override
public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) {
metaData.addAttribute(new AttributeMetaData(Attributes.ID_NAME, Ontology.INTEGER, Attributes.ID_NAME));
return metaData;
}
});
}
@Override
public void doWork() throws OperatorException {
ExampleSet exampleSet = exampleSetInput.getData();
DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet);
// additional checks
Tools.onlyNonMissingValues(exampleSet, "AgglomerativeClustering");
Tools.checkAndCreateIds(exampleSet);
Attribute idAttribute = exampleSet.getAttributes().getId();
boolean idAttributeIsNominal = idAttribute.isNominal();
DistanceMatrix matrix = new DistanceMatrix(exampleSet.size());
Map<Integer, HierarchicalClusterNode> clusterMap = new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size());
int[] clusterIds = new int[exampleSet.size()];
// filling the distance matrix
int nextClusterId = 0;
for (Example example1: exampleSet) {
checkForStop();
clusterIds[nextClusterId] = nextClusterId;
int y = 0;
for (Example example2: exampleSet) {
if (y > nextClusterId)
matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2));
y++;
}
if (idAttributeIsNominal) {
clusterMap.put(nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute)));
} else {
clusterMap.put(nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute)));
}
nextClusterId++;
}
// creating linkage method
AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds);
if (getParameterAsString(PARAMETER_MODE).equals(modes[1]))
linkage = new CompleteLinkageMethod(matrix, clusterIds);
else if (getParameterAsString(PARAMETER_MODE).equals(modes[2]))
linkage = new AverageLinkageMethod(matrix, clusterIds);
// now building agglomerative tree bottom up
while (clusterMap.size() > 1) {
Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap);
HierarchicalClusterNode newNode = new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance());
newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1()));
newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2()));
clusterMap.remove(agglomeration.getClusterId1());
clusterMap.remove(agglomeration.getClusterId2());
clusterMap.put(nextClusterId, newNode);
nextClusterId++;
}
// creating model
HierarchicalClusterModel model = new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue());
// registering visualizer
ObjectVisualizerService.addObjectVisualizer(model, new ExampleVisualizer((ExampleSet) exampleSet.clone()));
modelOutput.deliver(model);
exampleSetOutput.deliver(exampleSet);
}
@Override
public boolean shouldAutoConnect(OutputPort port) {
if (port == exampleSetOutput) {
return getParameterAsBoolean("keep_example_set");
} else {
return super.shouldAutoConnect(port);
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterTypeStringCategory type = new ParameterTypeStringCategory(PARAMETER_MODE, "Specifies the cluster mode.", modes, modes[0], false);
type.setExpert(false);
types.add(type);
types.addAll(DistanceMeasures.getParameterTypes(this));
return types;
}
}