package com.hkorte.elasticsearch.significance.measures;
/**
* Created by hkorte on 25.04.14.
*/
public class MutualInformation extends SignificanceMeasure {
@Override
public String shortName() {
return "mi";
}
/**
* See Christopher D. Manning, <em>Introduction to Information Retrieval</em> (2009), section 13.5.1, equation
* 13.17.
*
* @param n00 docs which do not contain word with negative class
* @param n01 docs which do not contain word with positive class
* @param n10 docs which contain word with negative class
* @param n11 docs which contain word with positive class
* @return The mutual information of the given distribution
* @author hkorte
*/
@Override
public double compute(long n00, long n01, long n10, long n11) {
// add +1 for smoothing and to avoid division by zero
n00++; n01++; n10++; n11++;
double n1x = n11 + n10;
double nx1 = n11 + n01;
double n0x = n01 + n00;
double nx0 = n10 + n00;
double nxx = n11 + n10 + n01 + n00;
return (n11 / nxx * log2(nxx * n11 / (n1x * nx1))) + n01 / nxx * log2(nxx * n01 / (n0x * nx1)) + n10 / nxx *
log2(nxx * n10 / (n1x * nx0)) + n00 / nxx * log2(nxx * n00 / (n0x * nx0));
}
private static double log2(double d) {
return Math.log(d) / Math.log(2d);
}
}