package func.dtree; /** * * @author Andrew Guillory gtg008g@mail.gatech.edu * @version 1.0 */ public class ChiSquarePruningCriteria extends PruningCriteria { /** * A value of 0, 1, 2, 3, 4 * representing a confidence of .1, .05, .025, .01, .001. */ private int confidence; /** * Create a new chi square pruning criteria * @param confidence the confidence (a number [0-4]) */ public ChiSquarePruningCriteria(int confidence) { this.confidence = confidence; } /** * @see dtrees.PruningCriteria#shouldPrune(dtrees.DecisionTreeSplitStatistics) */ public boolean shouldPrune(DecisionTreeSplitStatistics stats) { // the degrees of freedom int dof = (stats.getBranchCount() - 1) * (stats.getClassCount() - 1); if (dof > MAX_DOF) { return false; } // calculate the deviance double deviance = 0; for (int i = 0; i < stats.getBranchCount(); i++) { // for each for (int j = 0; j < stats.getClassCount(); j++) { // the expected (under a null hypothesis) double expected = stats.getInstanceCount(i) * stats.getClassProbability(j); // the actual count double actual = stats.getInstanceCount(i) * stats.getConditionalClassProbabilities(i)[j]; deviance += (actual - expected) * (actual - expected) / expected; } } // the chi square value double chisquare = CHI_SQUARE_TABLE[5*(dof - 1) + confidence]; // if the deviance is big enough, don't prune if (deviance > chisquare) { return false; } else { return true; } } /** * The maximum degree of freedom */ private static final int MAX_DOF = 100; /** * Chi square table values for degrees of freedom * of 1-50 and signifigance levels of .1, .05, .025, .01, .001 */ private static final double[] CHI_SQUARE_TABLE = { 2.706, 3.841, 5.024, 6.635, 10.828, 4.605, 5.991, 7.378, 9.210, 13.816, 6.251, 7.815, 9.348, 11.345, 16.266, 7.779, 9.488, 11.143, 13.277, 18.467, 9.236, 11.070, 12.833, 15.086, 20.515, 10.645, 12.592, 14.449, 16.812, 22.458, 12.017, 14.067, 16.013, 18.475, 24.322, 13.362, 15.507, 17.535, 20.090, 26.125, 14.684, 16.919, 19.023, 21.666, 27.877, 15.987, 18.307, 20.483, 23.209, 29.588, 17.275, 19.675, 21.920, 24.725, 31.264, 18.549, 21.026, 23.337, 26.217, 32.910, 19.812, 22.362, 24.736, 27.688, 34.528, 21.064, 23.685, 26.119, 29.141, 36.123, 22.307, 24.996, 27.488, 30.578, 37.697, 23.542, 26.296, 28.845, 32.000, 39.252, 24.769, 27.587, 30.191, 33.409, 40.790, 25.989, 28.869, 31.526, 34.805, 42.312, 27.204, 30.144, 32.852, 36.191, 43.820, 28.412, 31.410, 34.170, 37.566, 45.315, 29.615, 32.671, 35.479, 38.932, 46.797, 30.813, 33.924, 36.781, 40.289, 48.268, 32.007, 35.172, 38.076, 41.638, 49.728, 33.196, 36.415, 39.364, 42.980, 51.179, 34.382, 37.652, 40.646, 44.314, 52.620, 35.563, 38.885, 41.923, 45.642, 54.052, 36.741, 40.113, 43.195, 46.963, 55.476, 37.916, 41.337, 44.461, 48.278, 56.892, 39.087, 42.557, 45.722, 49.588, 58.301, 40.256, 43.773, 46.979, 50.892, 59.703, 41.422, 44.985, 48.232, 52.191, 61.098, 42.585, 46.194, 49.480, 53.486, 62.487, 43.745, 47.400, 50.725, 54.776, 63.870, 44.903, 48.602, 51.966, 56.061, 65.247, 46.059, 49.802, 53.203, 57.342, 66.619, 47.212, 50.998, 54.437, 58.619, 67.985, 48.363, 52.192, 55.668, 59.893, 69.347, 49.513, 53.384, 56.896, 61.162, 70.703, 50.660, 54.572, 58.120, 62.428, 72.055, 51.805, 55.758, 59.342, 63.691, 73.402, 52.949, 56.942, 60.561, 64.950, 74.745, 54.090, 58.124, 61.777, 66.206, 76.084, 55.230, 59.304, 62.990, 67.459, 77.419, 56.369, 60.481, 64.201, 68.710, 78.750, 57.505, 61.656, 65.410, 69.957, 80.077, 58.641, 62.830, 66.617, 71.201, 81.400, 59.774, 64.001, 67.821, 72.443, 82.720, 60.907, 65.171, 69.023, 73.683, 84.037, 62.038, 66.339, 70.222, 74.919, 85.351, 63.167, 67.505, 71.420, 76.154, 86.661, 64.295, 68.669, 72.616, 77.386, 87.968, 65.422, 69.832, 73.810, 78.616, 89.272, 66.548, 70.993, 75.002, 79.843, 90.573, 67.673, 72.153, 76.192, 81.069, 91.872, 68.796, 73.311, 77.380, 82.292, 93.168, 69.919, 74.468, 78.567, 83.513, 94.461, 71.040, 75.624, 79.752, 84.733, 95.751, 72.160, 76.778, 80.936, 85.950, 97.039, 73.279, 77.931, 82.117, 87.166, 98.324, 74.397, 79.082, 83.298, 88.379, 99.607, 75.514, 80.232, 84.476, 89.591, 100.888, 76.630, 81.381, 85.654, 90.802, 102.166, 77.745, 82.529, 86.830, 92.010, 103.442, 78.860, 83.675, 88.004, 93.217, 104.716, 79.973, 84.821, 89.177, 94.422, 105.988, 81.085, 85.965, 90.349, 95.626, 107.258, 82.197, 87.108, 91.519, 96.828, 108.526, 83.308, 88.250, 92.689, 98.028, 109.791, 84.418, 89.391, 93.856, 99.228, 111.055, 85.527, 90.531, 95.023, 100.425, 112.317, 86.635, 91.670, 96.189, 101.621, 113.577, 87.743, 92.808, 97.353, 102.816, 114.835, 88.850, 93.945, 98.516, 104.010, 116.092, 89.956, 95.081, 99.678, 105.202, 117.346, 91.061, 96.217, 100.839, 106.393, 118.599, 92.166, 97.351, 101.999, 107.583, 119.850, 93.270, 98.484, 103.158, 108.771, 121.100, 94.374, 99.617, 104.316, 109.958, 122.348, 95.476, 100.749, 105.473, 111.144, 123.594, 96.578, 101.879, 106.629, 112.329, 124.839, 97.680, 103.010, 107.783, 113.512, 126.083, 98.780, 104.139, 108.937, 114.695, 127.324, 99.880, 105.267, 110.090, 115.876, 128.565, 100.980, 106.395, 111.242, 117.057, 129.804, 102.079, 107.522, 112.393, 118.236, 131.041, 103.177, 108.648, 113.544, 119.414, 132.277, 104.275, 109.773, 114.693, 120.591, 133.512, 105.372, 110.898, 115.841, 121.767, 134.746, 106.469, 112.022, 116.989, 122.942, 135.978, 107.565, 113.145, 118.136, 124.116, 137.208, 108.661, 114.268, 119.282, 125.289, 138.438, 109.756, 115.390, 120.427, 126.462, 139.666, 110.850, 116.511, 121.571, 127.633, 140.893, 111.944, 117.632, 122.715, 128.803, 142.119, 113.038, 118.752, 123.858, 129.973, 143.344, 114.131, 119.871, 125.000, 131.141, 144.567, 115.223, 120.990, 126.141, 132.309, 145.789, 116.315, 122.108, 127.282, 133.476, 147.010, 117.407, 123.225, 128.422, 134.642, 148.230, 118.498, 124.342, 129.561, 135.807, 149.449, 118.498, 124.342, 129.561, 135.807, 149.449, }; }