/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package tr.gov.ulakbim.jDenetX.evaluation;
import tr.gov.ulakbim.jDenetX.cluster.Cluster;
import tr.gov.ulakbim.jDenetX.cluster.Clustering;
import tr.gov.ulakbim.jDenetX.gui.visualization.DataPoint;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
/**
* @author jansen
*/
public class StatisticalCollection2 extends MeasureCollection {
private static final long serialVersionUID = 1L;
protected Random instanceRandom;
private boolean debug = false;
private final double beta = 0.5;
public StatisticalCollection2() {
super();
instanceRandom = new Random(117);
}
@Override
protected String[] getNames() {
String[] names = {"GT cross entropy", "FC cross entropy", "Homogeneity", "Completeness", "V-Measure"};
return names;
}
@Override
protected boolean[] getDefaultEnabled() {
boolean[] defaults = {false, false, false, false, false};
return defaults;
}
@Override
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) throws Exception {
int[][] counts = new int[trueClustering.size() + 1][clustering.size() + 1];
int[] sumsHC = new int[trueClustering.size() + 1];
int[] sumsFC = new int[clustering.size() + 1];
int n = 0;
for (int p = 0; p < points.size(); p++) {
DataPoint point = points.get(p);
boolean hc_noise = true;
for (int i = 0; i < trueClustering.size() + 1; i++) {
boolean check = false;
if (i < trueClustering.size()) {
Cluster hc = trueClustering.get(i);
if (hc.getInclusionProbability(point) >= 1) {
check = true;
hc_noise = false;
}
} else {
if (hc_noise)
check = true;
}
if (check) {
boolean fc_noise = true;
for (int j = 0; j < clustering.size() + 1; j++) {
if (j < clustering.size()) {
Cluster fc = clustering.get(j);
if (fc.getInclusionProbability(point) >= 1) {
counts[i][j]++;
sumsFC[j]++;
sumsHC[i]++;
n++;
fc_noise = false;
}
} else {
if (fc_noise) {
counts[i][j]++;
sumsFC[j]++;
sumsHC[i]++;
n++;
}
}
}
}
}
}
if (debug) {
for (int i = 0; i < counts.length; i++) {
System.out.println("Con " + i + ": " + Arrays.toString(counts[i]));
}
System.out.println("Sum FC " + Arrays.toString(sumsFC));
System.out.println("Sum HC " + Arrays.toString(sumsHC));
}
double FCentropy = 0;
for (int fc = 0; fc < clustering.size() + 1; fc++) {
double weight = sumsFC[fc] / (double) n;
if (weight > 0)
FCentropy += weight * Math.log10(weight);
}
FCentropy /= (-1 * Math.log10(clustering.size() + 1));
if (debug) {
System.out.println("FC entropy " + FCentropy);
}
double GTentropy = 0;
for (int hc = 0; hc < trueClustering.size() + 1; hc++) {
double weight = sumsHC[hc] / (double) n;
if (weight > 0)
GTentropy += weight * Math.log10(weight);
}
GTentropy /= (-1 * Math.log10(trueClustering.size() + 1));
if (debug) {
System.out.println("GT entropy " + GTentropy);
}
//cluster based entropy
double FCcrossEntropy = 0;
for (int fc = 0; fc < clustering.size() + 1; fc++) {
double e = 0;
if (sumsFC[fc] > 0) {
for (int hc = 0; hc < trueClustering.size() + 1; hc++) {
if (counts[hc][fc] == 0) continue;
double prob = (double) counts[hc][fc] / (double) sumsFC[fc];
e += prob * Math.log10(prob);
}
FCcrossEntropy += ((sumsFC[fc] / (double) n) * e);
}
}
FCcrossEntropy /= -1 * Math.log10(trueClustering.size() + 1);
addValue("FC cross entropy", 1 - FCcrossEntropy);
if (debug) {
System.out.println("FC cross entropy " + (1 - FCcrossEntropy));
}
//class based entropy
double GTcrossEntropy = 0;
for (int hc = 0; hc < trueClustering.size() + 1; hc++) {
double e = 0;
if (sumsHC[hc] > 0) {
for (int fc = 0; fc < clustering.size() + 1; fc++) {
if (counts[hc][fc] == 0) continue;
double prob = (double) counts[hc][fc] / (double) sumsHC[hc];
e += prob * Math.log10(prob);
}
}
GTcrossEntropy += ((sumsHC[hc] / (double) n) * e);
}
GTcrossEntropy /= -1 * Math.log10(trueClustering.size() + 1);
addValue("GT cross entropy", 1 - GTcrossEntropy);
if (debug) {
System.out.println("GT cross entropy " + (1 - GTcrossEntropy));
}
double homogeneity;
if (FCentropy == 0)
homogeneity = 1;
else
homogeneity = 1 - FCcrossEntropy / FCentropy;
//set err values for now, needs to be debugged
if (homogeneity > 1 || homogeneity < 0)
addValue("Homogeneity", -1);
else
addValue("Homogeneity", homogeneity);
double completeness;
if (GTentropy == 0)
completeness = 1;
else
completeness = 1 - GTcrossEntropy / GTentropy;
addValue("Completeness", completeness);
double vmeasure = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness);
if (Double.isNaN(vmeasure)) {
}
if (vmeasure > 1 || homogeneity < 0)
addValue("V-Measure", -1);
else
addValue("V-Measure", vmeasure);
}
}