/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package tr.gov.ulakbim.jDenetX.evaluation;
import tr.gov.ulakbim.jDenetX.cluster.Cluster;
import tr.gov.ulakbim.jDenetX.cluster.Clustering;
import tr.gov.ulakbim.jDenetX.gui.visualization.DataPoint;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
/**
* @author jansen
*/
public class StatisticalCollection extends MeasureCollection {
private static final long serialVersionUID = 1L;
protected Random instanceRandom;
private boolean debug = false;
public StatisticalCollection() {
super();
instanceRandom = new Random(117);
}
@Override
protected String[] getNames() {
String[] names = {"van Dongen", "Rand statistic", "VarInformation"};
return names;
}
@Override
protected boolean[] getDefaultEnabled() {
boolean[] defaults = {false, false, false, false};
return defaults;
}
@Override
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) throws Exception {
int[][] counts = new int[trueClustering.size() + 1][clustering.size() + 1];
int[] sumsHC = new int[trueClustering.size() + 1];
int[] sumsFC = new int[clustering.size() + 1];
int n = 0;
for (int p = 0; p < points.size(); p++) {
DataPoint point = points.get(p);
boolean hc_noise = true;
for (int i = 0; i < trueClustering.size() + 1; i++) {
boolean check = false;
if (i < trueClustering.size()) {
Cluster hc = trueClustering.get(i);
if (hc.getInclusionProbability(point) >= 1) {
check = true;
hc_noise = false;
}
} else {
if (hc_noise)
check = true;
}
if (check) {
boolean fc_noise = true;
for (int j = 0; j < clustering.size() + 1; j++) {
if (j < clustering.size()) {
Cluster fc = clustering.get(j);
if (fc.getInclusionProbability(point) >= 1) {
counts[i][j]++;
sumsFC[j]++;
sumsHC[i]++;
n++;
fc_noise = false;
}
} else {
if (fc_noise) {
counts[i][j]++;
sumsFC[j]++;
sumsHC[i]++;
n++;
}
}
}
}
}
}
if (debug) {
for (int i = 0; i < counts.length; i++) {
System.out.println("Con " + i + ": " + Arrays.toString(counts[i]));
}
System.out.println("Sum FC" + Arrays.toString(sumsFC));
System.out.println("Sum HC" + Arrays.toString(sumsHC));
}
double mutual = 0;
for (int j = 0; j < clustering.size() + 1; j++) {
for (int i = 0; i < trueClustering.size() + 1; i++) {
if (counts[i][j] == 0) continue;
double m = (double) counts[i][j] / (double) n * Math.log((double) counts[i][j] / (double) sumsFC[j] / (double) sumsHC[i] * (double) n);
if (debug)
System.out.println("(" + i + "/" + j + "): " + m);
mutual += m;
}
}
double mutualraw = mutual;
//mutual/=Math.log(trueClustering.size()+1);
double varInfo = 0;
double varInfoFC = 0;
for (int j = 0; j < clustering.size() + 1; j++) {
if (sumsFC[j] == 0) continue;
varInfoFC += sumsFC[j] / (double) n * Math.log(sumsFC[j] / (double) n);
}
double varInfoHC = 0;
for (int i = 0; i < trueClustering.size() + 1; i++) {
if (sumsHC[i] == 0) continue;
varInfoHC += sumsHC[i] / (double) n * Math.log(sumsHC[i] / (double) n);
}
if (debug) {
System.out.println("FC " + varInfoFC + " / HC " + varInfoHC + " / mutual " + mutual);
}
//varInfo = -varInfoFC - varInfoHC - 2*mutualraw ;
//varInfo = 1-varInfo/(2*Math.log(Math.max(clustering.size()+1, trueClustering.size()+1)));
if (Math.abs(mutualraw + varInfoFC + varInfoHC) < 1e-10) {
varInfo = 1;
} else {
varInfo = 2 * mutualraw / (-varInfoFC - varInfoHC);
}
addValue("VarInformation", varInfo);
double dongen = 0;
double dongenMaxFC = 0;
double dongenMaxSumFC = 0;
for (int j = 0; j < clustering.size() + 1; j++) {
double max = 0;
for (int i = 0; i < trueClustering.size() + 1; i++) {
if (counts[i][j] > max) max = counts[i][j];
}
dongenMaxFC += max;
if (sumsFC[j] > dongenMaxSumFC) dongenMaxSumFC = sumsFC[j];
}
double dongenMaxHC = 0;
double dongenMaxSumHC = 0;
for (int i = 0; i < trueClustering.size() + 1; i++) {
double max = 0;
for (int j = 0; j < clustering.size() + 1; j++) {
if (counts[i][j] > max) max = counts[i][j];
}
dongenMaxHC += max;
if (sumsHC[i] > dongenMaxSumHC) dongenMaxSumHC = sumsHC[i];
}
dongen = 1 - (2 * n - dongenMaxFC - dongenMaxHC) / (2 * n - dongenMaxSumFC - dongenMaxSumHC);
if (debug)
System.out.println("Dongen HC:" + dongenMaxHC + " FC:" + dongenMaxFC + " Total:" + dongen + " n " + n);
addValue("van Dongen", dongen);
//Rand index
//http://www.cais.ntu.edu.sg/~qihe/menu4.html
double m1 = 0;
for (int i = 0; i < trueClustering.size() + 1; i++) {
double v = sumsHC[i];
m1 += v * (v - 1) / 2.0;
}
double m2 = 0;
for (int j = 0; j < clustering.size() + 1; j++) {
double v = sumsFC[j];
m2 += v * (v - 1) / 2.0;
}
double m = 0;
for (int i = 0; i < trueClustering.size() + 1; i++) {
for (int j = 0; j < clustering.size() + 1; j++) {
double v = counts[i][j];
m += v * (v - 1) / 2.0;
}
}
double M = n * (n - 1) / 2.0;
double normalizedRand = (m - m1 * m2 / M) / (m1 / 2.0 + m2 / 2.0 - m1 * m2 / M);
//double rand = (M - m1 - m2 +2*m)/M;
addValue("Rand statistic", normalizedRand);
}
}