/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.regression;
import smile.sort.QuickSort;
import smile.validation.Validation;
import smile.validation.CrossValidation;
import smile.data.AttributeDataset;
import smile.data.parser.ArffParser;
import smile.math.Math;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
/**
*
* @author Haifeng Li
*/
public class GradientTreeBoostTest {
public GradientTreeBoostTest() {
}
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}
public void test(GradientTreeBoost.Loss loss, String dataset, String url, int response) {
System.out.println(dataset + "\t" + loss);
ArffParser parser = new ArffParser();
parser.setResponseIndex(response);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile(url));
double[] datay = data.toArray(new double[data.size()]);
double[][] datax = data.toArray(new double[data.size()][]);
int n = datax.length;
int k = 10;
CrossValidation cv = new CrossValidation(n, k);
double rss = 0.0;
double ad = 0.0;
for (int i = 0; i < k; i++) {
double[][] trainx = Math.slice(datax, cv.train[i]);
double[] trainy = Math.slice(datay, cv.train[i]);
double[][] testx = Math.slice(datax, cv.test[i]);
double[] testy = Math.slice(datay, cv.test[i]);
GradientTreeBoost boost = new GradientTreeBoost(data.attributes(), trainx, trainy, loss, 100, 6, 0.05, 0.7);
for (int j = 0; j < testx.length; j++) {
double r = testy[j] - boost.predict(testx[j]);
ad += Math.abs(r);
rss += r * r;
}
}
System.out.format("10-CV RMSE = %.4f \t AbsoluteDeviation = %.4f%n", Math.sqrt(rss/n), ad/n);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of learn method, of class RegressionTree.
*/
@Test
public void testLS() {
test(GradientTreeBoost.Loss.LeastSquares, "CPU", "weka/cpu.arff", 6);
//test(GradientTreeBoost.Loss.LeastSquares, "2dplanes", "weka/regression/2dplanes.arff", 6);
//test(GradientTreeBoost.Loss.LeastSquares, "abalone", "weka/regression/abalone.arff", 8);
//test(GradientTreeBoost.Loss.LeastSquares, "ailerons", "weka/regression/ailerons.arff", 40);
//test(GradientTreeBoost.Loss.LeastSquares, "bank32nh", "weka/regression/bank32nh.arff", 32);
test(GradientTreeBoost.Loss.LeastSquares, "autoMPG", "weka/regression/autoMpg.arff", 7);
test(GradientTreeBoost.Loss.LeastSquares, "cal_housing", "weka/regression/cal_housing.arff", 8);
//test(GradientTreeBoost.Loss.LeastSquares, "puma8nh", "weka/regression/puma8nh.arff", 8);
//test(GradientTreeBoost.Loss.LeastSquares, "kin8nm", "weka/regression/kin8nm.arff", 8);
}
/**
* Test of learn method, of class RegressionTree.
*/
@Test
public void testLAD() {
test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "CPU", "weka/cpu.arff", 6);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "2dplanes", "weka/regression/2dplanes.arff", 6);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "abalone", "weka/regression/abalone.arff", 8);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "ailerons", "weka/regression/ailerons.arff", 40);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "bank32nh", "weka/regression/bank32nh.arff", 32);
test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "autoMPG", "weka/regression/autoMpg.arff", 7);
test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "cal_housing", "weka/regression/cal_housing.arff", 8);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "puma8nh", "weka/regression/puma8nh.arff", 8);
//test(GradientTreeBoost.Loss.LeastAbsoluteDeviation, "kin8nm", "weka/regression/kin8nm.arff", 8);
}
/**
* Test of learn method, of class RegressionTree.
*/
@Test
public void testHuber() {
test(GradientTreeBoost.Loss.Huber, "CPU", "weka/cpu.arff", 6);
//test(GradientTreeBoost.Loss.Huber, "2dplanes", "weka/regression/2dplanes.arff", 6);
//test(GradientTreeBoost.Loss.Huber, "abalone", "weka/regression/abalone.arff", 8);
//test(GradientTreeBoost.Loss.Huber, "ailerons", "weka/regression/ailerons.arff", 40);
//test(GradientTreeBoost.Loss.Huber, "bank32nh", "weka/regression/bank32nh.arff", 32);
test(GradientTreeBoost.Loss.Huber, "autoMPG", "weka/regression/autoMpg.arff", 7);
test(GradientTreeBoost.Loss.Huber, "cal_housing", "weka/regression/cal_housing.arff", 8);
//test(GradientTreeBoost.Loss.Huber, "puma8nh", "weka/regression/puma8nh.arff", 8);
//test(GradientTreeBoost.Loss.Huber, "kin8nm", "weka/regression/kin8nm.arff", 8);
}
/**
* Test of learn method, of class GradientTreeBoost.
*/
@Test
public void testCPU() {
System.out.println("CPU");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] datay = data.toArray(new double[data.size()]);
double[][] datax = data.toArray(new double[data.size()][]);
int n = datax.length;
int m = 3 * n / 4;
int[] index = Math.permutate(n);
double[][] trainx = new double[m][];
double[] trainy = new double[m];
for (int i = 0; i < m; i++) {
trainx[i] = datax[index[i]];
trainy[i] = datay[index[i]];
}
double[][] testx = new double[n-m][];
double[] testy = new double[n-m];
for (int i = m; i < n; i++) {
testx[i-m] = datax[index[i]];
testy[i-m] = datay[index[i]];
}
GradientTreeBoost boost = new GradientTreeBoost(data.attributes(), trainx, trainy, 100);
System.out.format("RMSE = %.4f%n", Validation.test(boost, testx, testy));
double[] rmse = boost.test(testx, testy);
for (int i = 1; i <= rmse.length; i++) {
System.out.format("%d trees RMSE = %.4f%n", i, rmse[i-1]);
}
double[] importance = boost.importance();
index = QuickSort.sort(importance);
for (int i = importance.length; i-- > 0; ) {
System.out.format("%s importance is %.4f%n", data.attributes()[index[i]], importance[i]);
}
} catch (Exception ex) {
System.err.println(ex);
}
}
}