package func.dtree;
/**
*
* @author Andrew Guillory gtg008g@mail.gatech.edu
* @version 1.0
*/
public class ChiSquarePruningCriteria extends PruningCriteria {
/**
* A value of 0, 1, 2, 3, 4
* representing a confidence of .1, .05, .025, .01, .001.
*/
private int confidence;
/**
* Create a new chi square pruning criteria
* @param confidence the confidence (a number [0-4])
*/
public ChiSquarePruningCriteria(int confidence) {
this.confidence = confidence;
}
/**
* @see dtrees.PruningCriteria#shouldPrune(dtrees.DecisionTreeSplitStatistics)
*/
public boolean shouldPrune(DecisionTreeSplitStatistics stats) {
// the degrees of freedom
int dof = (stats.getBranchCount() - 1)
* (stats.getClassCount() - 1);
if (dof > MAX_DOF) {
return false;
}
// calculate the deviance
double deviance = 0;
for (int i = 0; i < stats.getBranchCount(); i++) {
// for each
for (int j = 0; j < stats.getClassCount(); j++) {
// the expected (under a null hypothesis)
double expected = stats.getInstanceCount(i)
* stats.getClassProbability(j);
// the actual count
double actual = stats.getInstanceCount(i)
* stats.getConditionalClassProbabilities(i)[j];
deviance += (actual - expected) * (actual - expected) / expected;
}
}
// the chi square value
double chisquare = CHI_SQUARE_TABLE[5*(dof - 1) + confidence];
// if the deviance is big enough, don't prune
if (deviance > chisquare) {
return false;
} else {
return true;
}
}
/**
* The maximum degree of freedom
*/
private static final int MAX_DOF = 100;
/**
* Chi square table values for degrees of freedom
* of 1-50 and signifigance levels of .1, .05, .025, .01, .001
*/
private static final double[] CHI_SQUARE_TABLE = {
2.706, 3.841, 5.024, 6.635, 10.828,
4.605, 5.991, 7.378, 9.210, 13.816,
6.251, 7.815, 9.348, 11.345, 16.266,
7.779, 9.488, 11.143, 13.277, 18.467,
9.236, 11.070, 12.833, 15.086, 20.515,
10.645, 12.592, 14.449, 16.812, 22.458,
12.017, 14.067, 16.013, 18.475, 24.322,
13.362, 15.507, 17.535, 20.090, 26.125,
14.684, 16.919, 19.023, 21.666, 27.877,
15.987, 18.307, 20.483, 23.209, 29.588,
17.275, 19.675, 21.920, 24.725, 31.264,
18.549, 21.026, 23.337, 26.217, 32.910,
19.812, 22.362, 24.736, 27.688, 34.528,
21.064, 23.685, 26.119, 29.141, 36.123,
22.307, 24.996, 27.488, 30.578, 37.697,
23.542, 26.296, 28.845, 32.000, 39.252,
24.769, 27.587, 30.191, 33.409, 40.790,
25.989, 28.869, 31.526, 34.805, 42.312,
27.204, 30.144, 32.852, 36.191, 43.820,
28.412, 31.410, 34.170, 37.566, 45.315,
29.615, 32.671, 35.479, 38.932, 46.797,
30.813, 33.924, 36.781, 40.289, 48.268,
32.007, 35.172, 38.076, 41.638, 49.728,
33.196, 36.415, 39.364, 42.980, 51.179,
34.382, 37.652, 40.646, 44.314, 52.620,
35.563, 38.885, 41.923, 45.642, 54.052,
36.741, 40.113, 43.195, 46.963, 55.476,
37.916, 41.337, 44.461, 48.278, 56.892,
39.087, 42.557, 45.722, 49.588, 58.301,
40.256, 43.773, 46.979, 50.892, 59.703,
41.422, 44.985, 48.232, 52.191, 61.098,
42.585, 46.194, 49.480, 53.486, 62.487,
43.745, 47.400, 50.725, 54.776, 63.870,
44.903, 48.602, 51.966, 56.061, 65.247,
46.059, 49.802, 53.203, 57.342, 66.619,
47.212, 50.998, 54.437, 58.619, 67.985,
48.363, 52.192, 55.668, 59.893, 69.347,
49.513, 53.384, 56.896, 61.162, 70.703,
50.660, 54.572, 58.120, 62.428, 72.055,
51.805, 55.758, 59.342, 63.691, 73.402,
52.949, 56.942, 60.561, 64.950, 74.745,
54.090, 58.124, 61.777, 66.206, 76.084,
55.230, 59.304, 62.990, 67.459, 77.419,
56.369, 60.481, 64.201, 68.710, 78.750,
57.505, 61.656, 65.410, 69.957, 80.077,
58.641, 62.830, 66.617, 71.201, 81.400,
59.774, 64.001, 67.821, 72.443, 82.720,
60.907, 65.171, 69.023, 73.683, 84.037,
62.038, 66.339, 70.222, 74.919, 85.351,
63.167, 67.505, 71.420, 76.154, 86.661,
64.295, 68.669, 72.616, 77.386, 87.968,
65.422, 69.832, 73.810, 78.616, 89.272,
66.548, 70.993, 75.002, 79.843, 90.573,
67.673, 72.153, 76.192, 81.069, 91.872,
68.796, 73.311, 77.380, 82.292, 93.168,
69.919, 74.468, 78.567, 83.513, 94.461,
71.040, 75.624, 79.752, 84.733, 95.751,
72.160, 76.778, 80.936, 85.950, 97.039,
73.279, 77.931, 82.117, 87.166, 98.324,
74.397, 79.082, 83.298, 88.379, 99.607,
75.514, 80.232, 84.476, 89.591, 100.888,
76.630, 81.381, 85.654, 90.802, 102.166,
77.745, 82.529, 86.830, 92.010, 103.442,
78.860, 83.675, 88.004, 93.217, 104.716,
79.973, 84.821, 89.177, 94.422, 105.988,
81.085, 85.965, 90.349, 95.626, 107.258,
82.197, 87.108, 91.519, 96.828, 108.526,
83.308, 88.250, 92.689, 98.028, 109.791,
84.418, 89.391, 93.856, 99.228, 111.055,
85.527, 90.531, 95.023, 100.425, 112.317,
86.635, 91.670, 96.189, 101.621, 113.577,
87.743, 92.808, 97.353, 102.816, 114.835,
88.850, 93.945, 98.516, 104.010, 116.092,
89.956, 95.081, 99.678, 105.202, 117.346,
91.061, 96.217, 100.839, 106.393, 118.599,
92.166, 97.351, 101.999, 107.583, 119.850,
93.270, 98.484, 103.158, 108.771, 121.100,
94.374, 99.617, 104.316, 109.958, 122.348,
95.476, 100.749, 105.473, 111.144, 123.594,
96.578, 101.879, 106.629, 112.329, 124.839,
97.680, 103.010, 107.783, 113.512, 126.083,
98.780, 104.139, 108.937, 114.695, 127.324,
99.880, 105.267, 110.090, 115.876, 128.565,
100.980, 106.395, 111.242, 117.057, 129.804,
102.079, 107.522, 112.393, 118.236, 131.041,
103.177, 108.648, 113.544, 119.414, 132.277,
104.275, 109.773, 114.693, 120.591, 133.512,
105.372, 110.898, 115.841, 121.767, 134.746,
106.469, 112.022, 116.989, 122.942, 135.978,
107.565, 113.145, 118.136, 124.116, 137.208,
108.661, 114.268, 119.282, 125.289, 138.438,
109.756, 115.390, 120.427, 126.462, 139.666,
110.850, 116.511, 121.571, 127.633, 140.893,
111.944, 117.632, 122.715, 128.803, 142.119,
113.038, 118.752, 123.858, 129.973, 143.344,
114.131, 119.871, 125.000, 131.141, 144.567,
115.223, 120.990, 126.141, 132.309, 145.789,
116.315, 122.108, 127.282, 133.476, 147.010,
117.407, 123.225, 128.422, 134.642, 148.230,
118.498, 124.342, 129.561, 135.807, 149.449,
118.498, 124.342, 129.561, 135.807, 149.449,
};
}