package tr.gov.ulakbim.jDenetX.evaluation; import tr.gov.ulakbim.jDenetX.cluster.Clustering; import tr.gov.ulakbim.jDenetX.cluster.SphereCluster; import tr.gov.ulakbim.jDenetX.gui.visualization.DataPoint; import weka.core.Instance; import java.util.ArrayList; /** * @author jansen */ public class General extends MeasureCollection { private static final long serialVersionUID = 1L; private int numPoints; private int numFClusters; private int numDims; private double pointInclusionProbThreshold = 0.8; private Clustering clustering; private ArrayList<DataPoint> points; public General() { super(); } @Override protected String[] getNames() { String[] names = {"GPrecision", "GRecall", "Redundancy", "Compactness", "Overlap"}; return names; } @Override public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) throws Exception { this.points = points; this.clustering = clustering; numPoints = points.size(); numFClusters = clustering.size(); numDims = points.get(0).numAttributes() - 1; int totalRedundancy = 0; int trueCoverage = 0; int totalCoverage = 0; int numNoise = 0; for (int p = 0; p < numPoints; p++) { int coverage = 0; for (int c = 0; c < numFClusters; c++) { //contained in cluster c? if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) { coverage++; } } if (points.get(p).classValue() == -1) { numNoise++; } else { if (coverage > 0) trueCoverage++; } if (coverage > 0) totalCoverage++; //points covered by clustering (incl. noise) if (coverage > 1) totalRedundancy++; //include noise } addValue("Redundancy", ((double) totalRedundancy / (double) numPoints)); addValue("GPrecision", (totalCoverage == 0 ? 0 : ((double) trueCoverage / (double) (totalCoverage)))); addValue("GRecall", ((double) trueCoverage / (double) (numPoints - numNoise))); addValue("Compactness", computeCompactness()); addValue("Overlap", computeOverlap()); } private double computeOverlap() { for (int c = 0; c < numFClusters; c++) { if (!(clustering.get(c) instanceof SphereCluster)) { System.out.println("Compactness only supports Sphere Cluster. Found: " + clustering.get(c).getClass()); return Double.NaN; } } boolean[] overlap = new boolean[numFClusters]; for (int c0 = 0; c0 < numFClusters; c0++) { if (overlap[c0]) continue; SphereCluster s0 = (SphereCluster) clustering.get(c0); for (int c1 = c0; c1 < clustering.size(); c1++) { if (c1 == c0) continue; SphereCluster s1 = (SphereCluster) clustering.get(c1); if (s0.overlapRadiusDegree(s1) > 0) { overlap[c0] = overlap[c1] = true; } } } double totalOverlap = 0; for (int c0 = 0; c0 < numFClusters; c0++) { if (overlap[c0]) totalOverlap++; } // if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause(); return totalOverlap / (double) numFClusters; } private double computeCompactness() { if (numFClusters == 0) return 0; for (int c = 0; c < numFClusters; c++) { if (!(clustering.get(c) instanceof SphereCluster)) { System.out.println("Compactness only supports Sphere Cluster. Found: " + clustering.get(c).getClass()); return Double.NaN; } } //TODO weight radius by number of dimensions double totalCompactness = 0; for (int c = 0; c < numFClusters; c++) { ArrayList<Instance> containedPoints = new ArrayList<Instance>(); for (int p = 0; p < numPoints; p++) { //p in c if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) { containedPoints.add(points.get(p)); } } double compactness = 0; if (containedPoints.size() > 1) { //cluster not empty SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims); double minRadius = minEnclosingCluster.getRadius(); double cfRadius = ((SphereCluster) clustering.get(c)).getRadius(); if (Math.abs(minRadius - cfRadius) < 0.1e-10) { compactness = 1; } else if (minRadius < cfRadius) compactness = minRadius / cfRadius; else { System.out.println("Optimal radius bigger then real one (" + (cfRadius - minRadius) + "), this is really wrong"); compactness = 1; } } else { double cfRadius = ((SphereCluster) clustering.get(c)).getRadius(); if (cfRadius == 0) compactness = 1; } //weight by weight of cluster??? totalCompactness += compactness; clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness)); } return (totalCompactness / numFClusters); } }