package func.dtree;
/**
* A splitting criteria that uses information gain as a basis
* for deciding the value of a split
* @author Andrew Guillory gtg008g@mail.gatech.edu
* @version 1.0
*/
public class InformationGainSplitEvaluator extends SplitEvaluator {
/**
* The log of 2
*/
private static final double LOG2 = Math.log(2);
/**
* Calculate the entropy of an array of class probabilites
* @param classProbabilities the probabilites
* @return the entropy
*/
private double entropy(double[] classProbabilities) {
double entropy = 0;
for (int i = 0; i < classProbabilities.length; i++) {
if (classProbabilities[i] != 0)
entropy -= classProbabilities[i]
* Math.log(classProbabilities[i]) / LOG2;
}
return entropy;
}
/**
* @see dtrees.SplitEvaluator#splitValue(dtrees.DecisionTreeSplitStatistics)
*/
public double splitValue(DecisionTreeSplitStatistics stats) {
// the entropy before splitting
double initialEntropy = entropy(stats.getClassProbabilities());
// and now after
double conditionalEntropy = 0;
for (int i = 0; i < stats.getBranchCount(); i++) {
conditionalEntropy += stats.getBranchProbability(i) *
entropy(stats.getConditionalClassProbabilities(i));
}
// the information gain is just initial minus conditional
return initialEntropy - conditionalEntropy;
}
}