/******************************************************************************* * Copyright (c) 2010 Haifeng Li * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package smile.classification; import smile.sort.QuickSort; import smile.data.Attribute; import smile.data.NominalAttribute; import smile.data.parser.DelimitedTextParser; import smile.validation.LOOCV; import smile.data.AttributeDataset; import smile.data.parser.ArffParser; import smile.math.Math; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; /** * * @author Haifeng */ public class DecisionTreeTest { public DecisionTreeTest() { } @BeforeClass public static void setUpClass() throws Exception { } @AfterClass public static void tearDownClass() throws Exception { } @Before public void setUp() { } @After public void tearDown() { } /** * Test of learn method, of class DecisionTree. */ @Test public void testWeather() { System.out.println("Weather"); ArffParser arffParser = new ArffParser(); arffParser.setResponseIndex(4); try { AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff")); double[][] x = weather.toArray(new double[weather.size()][]); int[] y = weather.toArray(new int[weather.size()]); int n = x.length; LOOCV loocv = new LOOCV(n); int error = 0; for (int i = 0; i < n; i++) { double[][] trainx = Math.slice(x, loocv.train[i]); int[] trainy = Math.slice(y, loocv.train[i]); DecisionTree tree = new DecisionTree(weather.attributes(), trainx, trainy, 3); if (y[loocv.test[i]] != tree.predict(x[loocv.test[i]])) error++; } System.out.println("Decision Tree error = " + error); assertEquals(5, error); } catch (Exception ex) { System.err.println(ex); } } /** * Test of learn method, of class DecisionTree. */ @Test public void testIris() { System.out.println("Iris"); ArffParser arffParser = new ArffParser(); arffParser.setResponseIndex(4); try { AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff")); double[][] x = iris.toArray(new double[iris.size()][]); int[] y = iris.toArray(new int[iris.size()]); int n = x.length; LOOCV loocv = new LOOCV(n); int error = 0; for (int i = 0; i < n; i++) { double[][] trainx = Math.slice(x, loocv.train[i]); int[] trainy = Math.slice(y, loocv.train[i]); DecisionTree tree = new DecisionTree(iris.attributes(), trainx, trainy, 4); if (y[loocv.test[i]] != tree.predict(x[loocv.test[i]])) error++; } System.out.println("Decision Tree error = " + error); assertEquals(7, error); } catch (Exception ex) { System.err.println(ex); } } /** * Test of learn method, of class DecisionTree. */ @Test public void testUSPS() { System.out.println("USPS"); DelimitedTextParser parser = new DelimitedTextParser(); parser.setResponseIndex(new NominalAttribute("class"), 0); try { AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train")); AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test")); double[][] x = train.toArray(new double[train.size()][]); int[] y = train.toArray(new int[train.size()]); double[][] testx = test.toArray(new double[test.size()][]); int[] testy = test.toArray(new int[test.size()]); DecisionTree tree = new DecisionTree(x, y, 350, DecisionTree.SplitRule.ENTROPY); int error = 0; for (int i = 0; i < testx.length; i++) { if (tree.predict(testx[i]) != testy[i]) { error++; } } System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length); assertEquals(328, error); } catch (Exception ex) { System.err.println(ex); } } /** * Test of learn method, of class DecisionTree. */ @Test public void testUSPSNominal() { System.out.println("USPS nominal"); DelimitedTextParser parser = new DelimitedTextParser(); parser.setResponseIndex(new NominalAttribute("class"), 0); try { AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train")); AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test")); double[][] x = train.toArray(new double[train.size()][]); int[] y = train.toArray(new int[train.size()]); double[][] testx = test.toArray(new double[test.size()][]); int[] testy = test.toArray(new int[test.size()]); for (double[] xi : x) { for (int i = 0; i < xi.length; i++) { xi[i] = Math.round(255*(xi[i]+1)/2); } } for (double[] xi : testx) { for (int i = 0; i < xi.length; i++) { xi[i] = Math.round(127 + 127*xi[i]); } } Attribute[] attributes = new Attribute[256]; String[] values = new String[attributes.length]; for (int i = 0; i < attributes.length; i++) { values[i] = String.valueOf(i); } for (int i = 0; i < attributes.length; i++) { attributes[i] = new NominalAttribute("V"+i, values); } DecisionTree tree = new DecisionTree(attributes, x, y, 350, 2, DecisionTree.SplitRule.ENTROPY); int error = 0; for (int i = 0; i < testx.length; i++) { if (tree.predict(testx[i]) != testy[i]) { error++; } } System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length); double[] importance = tree.importance(); int[] index = QuickSort.sort(importance); for (int i = importance.length; i-- > 0; ) { System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]); } assertEquals(324, error); } catch (Exception ex) { System.err.println(ex); } } }