/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.validation;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import smile.classification.ClassifierTrainer;
import smile.classification.DecisionTree;
import smile.classification.LDA;
import smile.regression.RBFNetwork;
import smile.util.SmileUtils;
import smile.data.AttributeDataset;
import smile.data.NominalAttribute;
import smile.data.parser.ArffParser;
import smile.data.parser.DelimitedTextParser;
import smile.math.Math;
import smile.math.distance.EuclideanDistance;
import smile.math.rbf.RadialBasisFunction;
/**
*
* @author Haifeng
*/
public class ValidationTest {
public ValidationTest() {
}
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}
/**
* Test of test method, of class Validation.
*/
@Test
public void testTest_3args_1() {
System.out.println("test");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
LDA lda = new LDA(x, y);
double accuracy = Validation.test(lda, testx, testy);
System.out.println("accuracy = " + accuracy);
assertEquals(0.8724, accuracy, 1E-4);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of test method, of class Validation.
*/
@Test
public void testTest_3args_2() {
System.out.println("test");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] datay = data.toArray(new double[data.size()]);
double[][] datax = data.toArray(new double[data.size()][]);
Math.standardize(datax);
int n = datax.length;
int m = 3 * n / 4;
double[][] x = new double[m][];
double[] y = new double[m];
double[][] testx = new double[n-m][];
double[] testy = new double[n-m];
int[] index = Math.permutate(n);
for (int i = 0; i < m; i++) {
x[i] = datax[index[i]];
y[i] = datay[index[i]];
}
for (int i = m; i < n; i++) {
testx[i-m] = datax[index[i]];
testy[i-m] = datay[index[i]];
}
double[][] centers = new double[20][];
RadialBasisFunction[] rbf = SmileUtils.learnGaussianRadialBasis(x, centers, 2);
RBFNetwork<double[]> rkhs = new RBFNetwork<>(x, y, new EuclideanDistance(), rbf, centers);
double rmse = Validation.test(rkhs, testx, testy);
System.out.println("RMSE = " + rmse);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of test method, of class Validation.
*/
@Test
public void testTest_4args_1() {
System.out.println("test");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
LDA lda = new LDA(x, y);
ClassificationMeasure[] measures = {new Accuracy()};
double[] accuracy = Validation.test(lda, testx, testy, measures);
System.out.println("accuracy = " + accuracy[0]);
assertEquals(0.8724, accuracy[0], 1E-4);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of test method, of class Validation.
*/
@Test
public void testTest_4args_2() {
System.out.println("test");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] datay = data.toArray(new double[data.size()]);
double[][] datax = data.toArray(new double[data.size()][]);
Math.standardize(datax);
int n = datax.length;
int m = 3 * n / 4;
double[][] x = new double[m][];
double[] y = new double[m];
double[][] testx = new double[n-m][];
double[] testy = new double[n-m];
int[] index = Math.permutate(n);
for (int i = 0; i < m; i++) {
x[i] = datax[index[i]];
y[i] = datay[index[i]];
}
for (int i = m; i < n; i++) {
testx[i-m] = datax[index[i]];
testy[i-m] = datay[index[i]];
}
double[][] centers = new double[20][];
RadialBasisFunction[] rbf = SmileUtils.learnGaussianRadialBasis(x, centers, 2);
RBFNetwork<double[]> rkhs = new RBFNetwork<>(x, y, new EuclideanDistance(), rbf, centers);
RegressionMeasure[] measures = {new RMSE(), new AbsoluteDeviation()};
double[] results = Validation.test(rkhs, testx, testy, measures);
System.out.println("RMSE = " + results[0]);
System.out.println("Absolute Deviation = " + results[1]);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of loocv method, of class Validation.
*/
@Test
public void testLoocv_3args_1() {
System.out.println("loocv");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
ClassifierTrainer<double[]> trainer = new LDA.Trainer();
double accuracy = Validation.loocv(trainer, x, y);
System.out.println("LOOCV accuracy = " + accuracy);
assertEquals(0.8533, accuracy, 1E-4);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of loocv method, of class Validation.
*/
@Test
public void testLoocv_3args_2() {
System.out.println("loocv");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
double rmse = Validation.loocv(trainer, x, y);
System.out.println("RMSE = " + rmse);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of loocv method, of class Validation.
*/
@Test
public void testLoocv_4args_1() {
System.out.println("loocv");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
double[][] x = weather.toArray(new double[weather.size()][]);
int[] y = weather.toArray(new int[weather.size()]);
DecisionTree.Trainer trainer = new DecisionTree.Trainer(3);
trainer.setAttributes(weather.attributes());
ClassificationMeasure[] measures = {new Accuracy(), new Recall(), new Precision()};
double[] results = Validation.loocv(trainer, x, y, measures);
for (int i = 0; i < measures.length; i++) {
System.out.println(measures[i] + " = " + results[i]);
}
assertEquals(0.6429, results[0], 1E-4);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of loocv method, of class Validation.
*/
@Test
public void testLoocv_4args_2() {
System.out.println("loocv");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
RegressionMeasure[] measures = {new RMSE(), new AbsoluteDeviation()};
double[] results = Validation.loocv(trainer, x, y, measures);
System.out.println("RMSE = " + results[0]);
System.out.println("Absolute Deviation = " + results[1]);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of cv method, of class Validation.
*/
@Test
public void testCv_4args_1() {
System.out.println("cv");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
ClassifierTrainer<double[]> trainer = new LDA.Trainer();
double accuracy = Validation.cv(10, trainer, x, y);
System.out.println("10-fold CV accuracy = " + accuracy);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of cv method, of class Validation.
*/
@Test
public void testCv_4args_2() {
System.out.println("cv");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
double rmse = Validation.cv(10, trainer, x, y);
System.out.println("RMSE = " + rmse);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of cv method, of class Validation.
*/
@Test
public void testCv_5args_1() {
System.out.println("cv");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
ClassifierTrainer<double[]> trainer = new LDA.Trainer();
ClassificationMeasure[] measures = {new Accuracy()};
double[] results = Validation.cv(10, trainer, x, y, measures);
for (int i = 0; i < measures.length; i++) {
System.out.println(measures[i] + " = " + results[i]);
}
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of cv method, of class Validation.
*/
@Test
public void testCv_5args_2() {
System.out.println("cv");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
RegressionMeasure[] measures = {new RMSE(), new AbsoluteDeviation()};
double[] results = Validation.cv(10, trainer, x, y, measures);
System.out.println("RMSE = " + results[0]);
System.out.println("Absolute Deviation = " + results[1]);
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of bootstrap method, of class Validation.
*/
@Test
public void testBootstrap_4args_1() {
System.out.println("bootstrap");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
ClassifierTrainer<double[]> trainer = new LDA.Trainer();
double[] accuracy = Validation.bootstrap(100, trainer, x, y);
System.out.println("100-fold bootstrap accuracy average = " + Math.mean(accuracy));
System.out.println("100-fold bootstrap accuracy std.dev = " + Math.sd(accuracy));
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of bootstrap method, of class Validation.
*/
@Test
public void testBootstrap_4args_2() {
System.out.println("bootstrap");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
double[] rmse = Validation.bootstrap(100, trainer, x, y);
System.out.println("100-fold bootstrap RMSE average = " + Math.mean(rmse));
System.out.println("100-fold bootstrap RMSE std.dev = " + Math.sd(rmse));
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of bootstrap method, of class Validation.
*/
@Test
public void testBootstrap_5args_1() {
System.out.println("bootstrap");
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
try {
AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
double[][] x = weather.toArray(new double[weather.size()][]);
int[] y = weather.toArray(new int[weather.size()]);
DecisionTree.Trainer trainer = new DecisionTree.Trainer(3);
trainer.setAttributes(weather.attributes());
ClassificationMeasure[] measures = {new Accuracy(), new Recall(), new Precision()};
double[][] results = Validation.bootstrap(100, trainer, x, y, measures);
for (int i = 0; i < 100; i++) {
for (int j = 0; j < measures.length; j++) {
System.out.format("%s = %.4f\t", measures[j], results[i][j]);
}
System.out.println();
}
System.out.println("On average:");
double[] avg = Math.colMean(results);
for (int j = 0; j < measures.length; j++) {
System.out.format("%s = %.4f\t", measures[j], avg[j]);
}
} catch (Exception ex) {
System.err.println(ex);
}
}
/**
* Test of bootstrap method, of class Validation.
*/
@Test
public void testBootstrap_5args_2() {
System.out.println("bootstrap");
ArffParser parser = new ArffParser();
parser.setResponseIndex(6);
try {
AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
double[] y = data.toArray(new double[data.size()]);
double[][] x = data.toArray(new double[data.size()][]);
Math.standardize(x);
RBFNetwork.Trainer<double[]> trainer = new RBFNetwork.Trainer<>(new EuclideanDistance());
trainer.setNumCenters(20);
RegressionMeasure[] measures = {new RMSE(), new AbsoluteDeviation()};
double[][] results = Validation.bootstrap(100, trainer, x, y, measures);
System.out.println("100-fold bootstrap RMSE average = " + Math.mean(results[0]));
System.out.println("100-fold bootstrap RMSE std.dev = " + Math.sd(results[0]));
System.out.println("100-fold bootstrap AbsoluteDeviation average = " + Math.mean(results[1]));
System.out.println("100-fold bootstrap AbsoluteDeviation std.dev = " + Math.sd(results[1]));
} catch (Exception ex) {
System.err.println(ex);
}
}
}