/*
* RapidMiner
*
* Copyright (C) 2001-2007 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package com.rapidminer.operator.learner.clustering.clusterer;
import java.text.DecimalFormat;
import java.util.LinkedList;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.learner.clustering.CentroidBasedClusterModel;
import com.rapidminer.operator.learner.clustering.Cluster;
import com.rapidminer.operator.learner.clustering.FlatCrispClusterModel;
import com.rapidminer.tools.Tools;
/**
* A cluster model used for the k-means clustering.
*
* @author Michael Wurst, Ingo Mierswa
* @version $Id: KMeansClusterModel.java,v 1.3 2007/06/30 23:24:35 ingomierswa Exp $
*/
public class FuzzyKMeansClusterModel extends FlatCrispClusterModel implements CentroidBasedClusterModel {
private static final long serialVersionUID = 3162433985759604081L;
private double[][] centroids;
private ExampleSet es;
private int m;
public FuzzyKMeansClusterModel(double[][] centroids, ExampleSet es, int m) {
// Call copy constructor
super();
// Assign centroids
this.centroids = centroids;
this.es = es;
this.m = m;
}
public double getCentroidDistance(int index1, int index2) {
double sum = 0.0;
for (int i = 0; i < centroids[0].length; i++)
sum = sum + (centroids[index1][i] - centroids[index2][i]) * (centroids[index1][i] - centroids[index2][i]);
return Math.sqrt(sum);
}
public double getDistanceFromCentroid(int index, Example e) {
double sum = 0.0;
int i = 0;
for (Attribute att : e.getAttributes()) {
sum = sum + (centroids[index][i] - e.getValue(att)) * (centroids[index][i] - e.getValue(att));
i++;
}
return Math.sqrt(sum);
}
public void setCentroid(int index, double[] values) {
centroids[index] = values;
}
public double[] getCentroid(int index) {
return centroids[index];
}
public String toString() {
StringBuffer result = new StringBuffer(super.toString() + Tools.getLineSeparator());
result.append("Cluster centroids:" + Tools.getLineSeparator());
for (int i = 0; i < getNumberOfClusters(); i++) {
Cluster cl = getClusterAt(i);
result.append("Cluster " + cl.getId() + ":\t" + centroidToString(i) + Tools.getLineSeparator());
}
result.append(Tools.getLineSeparator());
result.append(getMemberships());
return result.toString();
}
public String toResultString() {
return toString();
}
public StringBuffer getMemberships(){
StringBuffer res = new StringBuffer();
res.append("Membership Values:"+Tools.getLineSeparator());
int numCl = getNumberOfClusters();
double minCdist = getMinD();
double[] avgC = getAvgC();
StringBuffer head = new StringBuffer();
head.append("Id\t");
for (int i = 0; i < numCl; i++) {
Cluster cl = getClusterAt(i);
head.append("Cluster "+cl.getId()+"\t");
}
head.append("PC\t\tXB\t\tFS\t");
head.append("\n");
res.append(head+Tools.getLineSeparator());
float exp = 2/(this.m-1);
DecimalFormat df = new DecimalFormat("0.00");
double globalSumPC = 0;
double rowSumPC = 0;
double globalSumXB = 0;
double rowSumXB = 0;
double globalSumFS = 0;
double rowSumFS = 0;
for (int i = 0; i < es.size(); i++) {
Example e = es.getExample(i);
StringBuffer entry = new StringBuffer();
entry.append(e.getId()+"\t");
rowSumPC = 0;
rowSumXB = 0;
rowSumFS = 0;
LinkedList<Double> dists = new LinkedList<Double>();
for (int j = 0; j < getNumberOfClusters(); j++) {
dists.add(getDistanceFromCentroid(j, e));
}
for (int k = 0; k < getNumberOfClusters(); k++) {
double sum = 0;
for (int l = 0; l < getNumberOfClusters(); l++) {
sum+=Math.pow(dists.get(k)/dists.get(l),exp);
}
double mem = 1/sum;
entry.append(df.format(mem)+"\t\t");
rowSumPC+=Math.pow(mem,2);
rowSumXB+=Math.pow(mem,2)*Math.pow(dists.get(k),2);
rowSumFS+=Math.pow(mem, this.m)*(Math.pow(dists.get(k),2)-Math.pow(getCentroidToAvgDistance(k, avgC), 2));
}
globalSumPC+=rowSumPC;
globalSumXB+=rowSumXB;
globalSumFS+=rowSumFS;
entry.append(df.format(rowSumPC)+"\t\t");
entry.append(df.format(rowSumXB/Math.pow(minCdist, 2))+"\t\t");
entry.append(df.format(rowSumFS)+"\t\t");
entry.append("\n");
res.append(entry);
}
res.append("\n\n"+Tools.getLineSeparator());
res.append("Global Partition Coefficient: "+df.format(globalSumPC/es.size())+Tools.getLineSeparator());
res.append("Global Sum Xie Beni: "+globalSumXB+Tools.getLineSeparator());
res.append("Global Xie Beni: "+df.format(globalSumXB/(es.size()*Math.pow(minCdist, 2)))+Tools.getLineSeparator());
res.append("Global Fukuyama Sugeno: "+df.format(globalSumFS)+Tools.getLineSeparator());
res.append("min centroid distance: "+df.format(minCdist)+Tools.getLineSeparator());
res.append("avgCentroid: "+avgToString(avgC)+Tools.getLineSeparator());
res.append("|Examples|: "+es.size()+Tools.getLineSeparator());
return res;
}
private double[] getAvgC() {
double[] avg = new double[this.centroids[0].length];
for (int i = 0; i < getNumberOfClusters(); i++) {
double[] cent = this.centroids[i];
for (int j = 0; j < cent.length; j++) {
avg[j]+=cent[j];
}
}
for (int i = 0; i < avg.length; i++) {
avg[i]=avg[i]/getNumberOfClusters();
}
return avg;
}
private double getMinD() {
double min = Integer.MAX_VALUE;
for (int i = 0; i < getNumberOfClusters(); i++) {
for (int j = 0; j < getNumberOfClusters(); j++) {
double tmp = getCentroidDistance(i, j);
if (tmp<min&&tmp!=0) {
min=tmp;
}
}
}
return min;
}
public double getCentroidToAvgDistance(int index1, double[] avg ) {
double sum = 0.0;
for (int i = 0; i < centroids[0].length; i++)
sum = sum + (centroids[index1][i] - avg[i]) * (centroids[index1][i] - avg[i]);
return Math.sqrt(sum);
}
private String avgToString(double[] avg) {
StringBuffer s = new StringBuffer();
for (int j = 0; j < avg.length; j++) {
s.append(Tools.formatNumber(avg[j]) + " ");
}
return s.toString();
}
private String centroidToString(int index) {
StringBuffer s = new StringBuffer();
int i = 0;
for (Attribute att : es.getAttributes()) {
s.append(att.getName() + " = " + Tools.formatNumber(centroids[index][i++]) + " ");
}
return s.toString();
}
public String[] getDimensionNames() {
return com.rapidminer.example.Tools.getRegularAttributeNames(es);
}
}