/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package tr.gov.ulakbim.jDenetX.evaluation; import tr.gov.ulakbim.jDenetX.cluster.Cluster; import tr.gov.ulakbim.jDenetX.cluster.Clustering; import tr.gov.ulakbim.jDenetX.gui.visualization.DataPoint; import java.util.ArrayList; /** * @author jansen */ public class SilhouetteCoefficient extends MeasureCollection { private static final long serialVersionUID = 1L; private double pointInclusionProbThreshold = 0.8; public SilhouetteCoefficient() { super(); } @Override protected boolean[] getDefaultEnabled() { boolean[] defaults = {false}; return defaults; } @Override public String[] getNames() { String[] names = {"SilhCoeff"}; return names; } public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) { int numFCluster = clustering.size(); double[][] pointInclusionProbFC = new double[points.size()][numFCluster]; for (int p = 0; p < points.size(); p++) { DataPoint point = points.get(p); for (int fc = 0; fc < numFCluster; fc++) { Cluster cl = clustering.get(fc); pointInclusionProbFC[p][fc] = cl.getInclusionProbability(point); } } double silhCoeff = 0.0; int totalCount = 0; for (int p = 0; p < points.size(); p++) { DataPoint point = points.get(p); ArrayList<Integer> ownClusters = new ArrayList<Integer>(); for (int fc = 0; fc < numFCluster; fc++) { if (pointInclusionProbFC[p][fc] > pointInclusionProbThreshold) { ownClusters.add(fc); } } if (ownClusters.size() > 0) { double[] distanceByClusters = new double[numFCluster]; int[] countsByClusters = new int[numFCluster]; //calculate averageDistance of p to all cluster for (int p1 = 0; p1 < points.size(); p1++) { DataPoint point1 = points.get(p1); if (p1 != p && point1.classValue() != -1) { for (int fc = 0; fc < numFCluster; fc++) { double distance = distance(point, point1); if (pointInclusionProbFC[p1][fc] > pointInclusionProbThreshold) { distanceByClusters[fc] += distance; countsByClusters[fc]++; } } } } //find closest OWN cluster as clusters might overlap double minAvgDistanceOwn = Double.MAX_VALUE; int minOwnIndex = -1; for (int fc : ownClusters) { double normDist = distanceByClusters[fc] / (double) countsByClusters[fc]; if (normDist < minAvgDistanceOwn && pointInclusionProbFC[p][fc] > pointInclusionProbThreshold) { minAvgDistanceOwn = normDist; minOwnIndex = fc; } } //find closest other (or other own) cluster double minAvgDistanceOther = Double.MAX_VALUE; for (int fc = 0; fc < numFCluster; fc++) { if (fc != minOwnIndex) { double normDist = distanceByClusters[fc] / (double) countsByClusters[fc]; if (normDist < minAvgDistanceOther) { minAvgDistanceOther = normDist; } } } double silhP = (minAvgDistanceOther - minAvgDistanceOwn) / Math.max(minAvgDistanceOther, minAvgDistanceOwn); point.setMeasureValue("SC - own", minAvgDistanceOwn); point.setMeasureValue("SC - other", minAvgDistanceOther); point.setMeasureValue("SC", silhP); silhCoeff += silhP; totalCount++; //System.out.println(point.getTimestamp()+" Silh "+silhP+" / "+avgDistanceOwn+" "+minAvgDistanceOther+" (C"+minIndex+")"); } } if (totalCount > 0) silhCoeff /= (double) totalCount; //normalize from -1, 1 to 0,1 silhCoeff = (silhCoeff + 1) / 2.0; addValue(0, silhCoeff); } private double distance(DataPoint inst1, DataPoint inst2) { double distance = 0.0; int numDims = inst1.numAttributes(); for (int i = 0; i < numDims; i++) { double d = inst1.value(i) - inst2.value(i); distance += d * d; } return Math.sqrt(distance); } }