package org.apache.samoa.evaluation.measures; /* * #%L * SAMOA * %% * Copyright (C) 2014 - 2015 Apache Software Foundation * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.util.ArrayList; import org.apache.samoa.moa.cluster.Cluster; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.DataPoint; import org.apache.samoa.moa.evaluation.MeasureCollection; public class SilhouetteCoefficient extends MeasureCollection { private static final double pointInclusionProbThreshold = 0.8; public SilhouetteCoefficient() { super(); } @Override protected boolean[] getDefaultEnabled() { return new boolean[] { false }; } @Override public String[] getNames() { return new String[] { "SilhCoeff" }; } public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) { int numFCluster = clustering.size(); double[][] pointInclusionProbFC = new double[points.size()][numFCluster]; for (int p = 0; p < points.size(); p++) { DataPoint point = points.get(p); for (int fc = 0; fc < numFCluster; fc++) { Cluster cl = clustering.get(fc); pointInclusionProbFC[p][fc] = cl.getInclusionProbability(point); } } double silhCoeff = 0.0; int totalCount = 0; for (int p = 0; p < points.size(); p++) { DataPoint point = points.get(p); ArrayList<Integer> ownClusters = new ArrayList<>(); for (int fc = 0; fc < numFCluster; fc++) { if (pointInclusionProbFC[p][fc] > pointInclusionProbThreshold) { ownClusters.add(fc); } } if (ownClusters.size() > 0) { double[] distanceByClusters = new double[numFCluster]; int[] countsByClusters = new int[numFCluster]; // calculate averageDistance of p to all cluster for (int p1 = 0; p1 < points.size(); p1++) { DataPoint point1 = points.get(p1); if (p1 != p && point1.classValue() != -1) { for (int fc = 0; fc < numFCluster; fc++) { if (pointInclusionProbFC[p1][fc] > pointInclusionProbThreshold) { double distance = point.getDistance(point1); distanceByClusters[fc] += distance; countsByClusters[fc]++; } } } } // find closest OWN cluster as clusters might overlap double minAvgDistanceOwn = Double.MAX_VALUE; int minOwnIndex = -1; for (int fc : ownClusters) { double normDist = distanceByClusters[fc] / (double) countsByClusters[fc]; if (normDist < minAvgDistanceOwn) {// && pointInclusionProbFC[p][fc] > pointInclusionProbThreshold){ minAvgDistanceOwn = normDist; minOwnIndex = fc; } } // find closest other (or other own) cluster double minAvgDistanceOther = Double.MAX_VALUE; for (int fc = 0; fc < numFCluster; fc++) { if (fc != minOwnIndex) { double normDist = distanceByClusters[fc] / (double) countsByClusters[fc]; if (normDist < minAvgDistanceOther) { minAvgDistanceOther = normDist; } } } double silhP = (minAvgDistanceOther - minAvgDistanceOwn) / Math.max(minAvgDistanceOther, minAvgDistanceOwn); point.setMeasureValue("SC - own", minAvgDistanceOwn); point.setMeasureValue("SC - other", minAvgDistanceOther); point.setMeasureValue("SC", silhP); silhCoeff += silhP; totalCount++; // System.out.println(point.getTimestamp()+" Silh "+silhP+" / "+avgDistanceOwn+" "+minAvgDistanceOther+" (C"+minIndex+")"); } } if (totalCount > 0) silhCoeff /= (double) totalCount; // normalize from -1, 1 to 0,1 silhCoeff = (silhCoeff + 1) / 2.0; addValue(0, silhCoeff); } }