/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.clustering;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
/**
* This is the superclass for all centroid based cluster models and supports assigning unseen
* examples to the nearest centroid.
*
* @author Sebastian Land
*/
public class CentroidClusterModel extends ClusterModel {
private static final long serialVersionUID = 3780908886210272852L;
private Collection<String> dimensionNames;
private ArrayList<Centroid> centroids;
private DistanceMeasure distanceMeasure;
public CentroidClusterModel(ExampleSet exampleSet, int k, Collection<String> dimensionNames, DistanceMeasure distanceMeasure, boolean addClusterAsLabel, boolean removeUnknown) {
super(exampleSet, k, addClusterAsLabel, removeUnknown);
this.distanceMeasure = distanceMeasure;
this.dimensionNames = dimensionNames;
centroids = new ArrayList<Centroid>(k);
for (int i = 0; i < k; i++) {
centroids.add(new Centroid(dimensionNames.size()));
}
}
public CentroidClusterModel(ExampleSet exampleSet, int k, Attributes attributes, boolean addClusterAsLabel, boolean removeUnknown) {
super(exampleSet, k, addClusterAsLabel, removeUnknown);
List<String> dimensionNames = new LinkedList<String>();
for (Attribute attribute: attributes)
dimensionNames.add(attribute.getName());
this.dimensionNames = dimensionNames;
centroids = new ArrayList<Centroid>(k);
for (int i = 0; i < k; i++) {
centroids.add(new Centroid(dimensionNames.size()));
}
}
@Override
public int[] getClusterAssignments(ExampleSet exampleSet) {
int[] clusterAssignments = new int[exampleSet.size()];
Attribute[] attributes = new Attribute[dimensionNames.size()];
int i = 0;
for (String attributeName: dimensionNames) {
attributes[i] = exampleSet.getAttributes().get(attributeName);
i++;
}
double[] exampleValues = new double[attributes.length];
int exampleIndex = 0;
for (Example example: exampleSet) {
// copying examplevalues into double array
for (i = 0; i < attributes.length; i++)
exampleValues[i] = example.getValue(attributes[i]);
// searching for nearest centroid
int centroidIndex = 0;
int bestIndex = 0;
double minimalDistance = Double.POSITIVE_INFINITY;
for (Centroid centroid: centroids) {
double distance = distanceMeasure.calculateDistance(exampleValues, centroid.getCentroid());
if (distance < minimalDistance) {
bestIndex = centroidIndex;
minimalDistance = distance;
}
centroidIndex++;
}
clusterAssignments[exampleIndex] = bestIndex;
exampleIndex++;
}
return clusterAssignments;
}
/* This model does not need ids*/
@Override
public void checkCapabilities(ExampleSet exampleSet) throws OperatorException {
}
public String[] getAttributeNames() {
return dimensionNames.toArray(new String[dimensionNames.size()]);
}
/**
* Returns the List of all defined centroids.
*/
public List<Centroid> getCentroids() {
return centroids;
}
public double[] getCentroidCoordinates(int i) {
return centroids.get(i).getCentroid();
}
public Centroid getCentroid(int i) {
return centroids.get(i);
}
/**
* This method assigns the given doubleArray to the cluster with the given index.
* Centroids are calculated over all assigned arrays.
*/
public void assignExample(int clusterIndex, double[] example) {
centroids.get(clusterIndex).assignExample(example);
}
public boolean finishAssign() {
boolean stable = true;
for (Centroid centroid: centroids)
stable &= centroid.finishAssign();
return stable;
}
public DistanceMeasure getDistanceMeasure() {
return distanceMeasure;
}
@Override
public String getExtension() {
return "ccm";
}
@Override
public String getFileDescription() {
return "Centroid based cluster model";
}
}