package com.hkorte.elasticsearch.significance.measures;
public class KullbackLeiblerDivergence extends SignificanceMeasure {
@Override
public String shortName() {
return "kld";
}
/**
* @param n00 docs which do not contain word with negative class
* @param n01 docs which do not contain word with positive class
* @param n10 docs which contain word with negative class
* @param n11 docs which contain word with positive class
* @return The Kullback-Leibler divergence of the given distribution
* @author hkorte
*/
@Override
public double compute(long n00, long n01, long n10, long n11) {
double nx1 = n11 + n01;
double nx0 = n10 + n00;
double p = (n11 + 1) / (nx1 + 1); // relative freq for positive class (smoothed with +1)
double q = (n10 + 1) / (nx0 + 1); // relative freq for negative class (smoothed with +1)
return p * log2(p / q);
}
private static double log2(double d) {
return Math.log(d) / Math.log(2d);
}
}