/* * File: LearningExperimentExample.java * Authors: Kevin R. Dixon * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright Dec 19, 2007, Sandia Corporation. Under the terms of Contract * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by * or on behalf of the U.S. Government. Export of this program may require a * license from the United States Government. See CopyrightHistory.txt for * complete details. * */ package examples; import gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerLiuStorey; import gov.sandia.cognition.learning.algorithm.regression.LinearRegression; import gov.sandia.cognition.learning.algorithm.regression.ParameterDifferentiableCostMinimizer; import gov.sandia.cognition.learning.data.InputOutputPair; import gov.sandia.cognition.learning.experiment.CrossFoldCreator; import gov.sandia.cognition.learning.experiment.SupervisedLearnerValidationExperiment; import gov.sandia.cognition.learning.algorithm.nearest.KNearestNeighborExhaustive; import gov.sandia.cognition.learning.data.DefaultInputOutputPair; import gov.sandia.cognition.learning.experiment.SupervisedLearnerComparisonExperiment; import gov.sandia.cognition.learning.function.cost.MeanSquaredErrorCostFunction; import gov.sandia.cognition.learning.function.distance.EuclideanDistanceMetric; import gov.sandia.cognition.learning.function.scalar.AtanFunction; import gov.sandia.cognition.learning.function.scalar.VectorFunctionToScalarFunction; import gov.sandia.cognition.learning.function.vector.ThreeLayerFeedforwardNeuralNetwork; import gov.sandia.cognition.learning.performance.RootMeanSquaredErrorEvaluator; import gov.sandia.cognition.math.NumberAverager; import gov.sandia.cognition.math.matrix.Vector; import gov.sandia.cognition.math.matrix.VectorFactory; import gov.sandia.cognition.statistics.method.ConfidenceInterval; import gov.sandia.cognition.statistics.method.StudentTConfidence; import gov.sandia.cognition.util.ObjectUtil; import java.util.ArrayList; import java.util.Collection; import java.util.Random; /** * This tutorial demonstrates how to construct some sophisticated supervised * learning algorithms and compare their performance using * statistical-validation techniques. * * @author Kevin R. Dixon * @since 2.0 */ public class LearningExperimentExample { /** * Random-number generator, created with the same seed, used to generate * random, but repeatable, numbers for this example */ private Random randomNumberGenerator = new Random( 1 ); /** * Main method * @param argv * We don't take any command-line arguments */ public static void main( String[] argv ) { new LearningExperimentExample(); } /** * This is where the tutorial takes place */ public LearningExperimentExample() { final int num = 100; final int dimensionality = 3; // Step 1.0: create the dataset // Step 1.1: create the (random) inputs. We're not interested in any // particular data, so just create some random stuff. Collection<Vector> inputs = this.createInputData( num, dimensionality ); // Step 1.2: create the corresponding targets using a random neural // network. Collection<Double> targets = this.createTargetData( dimensionality, inputs ); // Step 1.3: merge the inputs and targets to create a supervised // (labeled) training set ArrayList<DefaultInputOutputPair<Vector, Double>> labeledDataset = DefaultInputOutputPair.mergeCollections( inputs, targets ); // Step 2.0: set up the learning experiment parameters // Step 2.1: We need to choose an objective manner to evaluate the // performance of a learning algorithm. A very common measure is // Root Mean Squared (RMS) Error RootMeanSquaredErrorEvaluator<Vector> rms = new RootMeanSquaredErrorEvaluator<Vector>(); // Step 2.2: We need to capture a notion about the confidence interval // about the performance of the algorithm on cross-validation data. // This will tell us, for example, that with 95% confidence the // RMS Error is between 0.1 and 0.2. // A very common measure for computing confidence intervals is // a Student-t distribution and a 95% confidence interval is the // standard amongst social scientists. final double confidence = 0.95; StudentTConfidence.Summary tdistribution = new StudentTConfidence.Summary( confidence ); // Step 2.3: We need to chop up our data to create training sets, // that the learning algorithm uses to tune its parameters, and // cross-validation sets that are withheld during training, but used to // evaluate its performance. This cross-validation performance is // supposed to shed light on how well the learning algorithm will // generalize to a larger universe of data. // A typical strategy for creating cross-validation sets from a large // dataset is to chop the data into n "folds", and train on n-1 folds // and use the remaining fold to test the data. This process is // repeated for each of the n folds. We'll create 10 folds in this // example. final int numFolds = 10; CrossFoldCreator<InputOutputPair<Vector, Double>> foldCreator = new CrossFoldCreator<InputOutputPair<Vector, Double>>( numFolds, this.randomNumberGenerator ); // Step 3.0: Create the experiment framework. Since we have labeled // input-output pairs, this is a Supervised Learner experiment. // The generic parameters mean // - The inputs are Vectors // - The ouputs (labels, targets, etc.) are Doubles // - The performance measure is a Double (RMS) // - The experiment statistic is a ConfidenceInterval // (output of the t-test) SupervisedLearnerValidationExperiment<Vector, Double, Double, ConfidenceInterval> experiment = new SupervisedLearnerValidationExperiment<Vector, Double, Double, ConfidenceInterval>( foldCreator, rms, tdistribution ); // Let's try the simplest learning algorithm first: k-nearest neighbor. // This learning algorithm just returns the average of the outputs // to the nearest "k" inputs we've seen. // We're going to use k=3 neighbors, Euclidean distance being used // to determine "closeness", and a simple arithmetic mean (average) // to average the outputs together final int numNeighbors = 3; KNearestNeighborExhaustive.Learner<Vector, Double> knn = new KNearestNeighborExhaustive.Learner<Vector, Double>( numNeighbors, new EuclideanDistanceMetric(), new NumberAverager() ); ConfidenceInterval knnResult = experiment.evaluatePerformance( knn, labeledDataset ); // This print statement will read: // "Pr{0.3908356360206158<=x(0.46258811033058195)<=0.5343405846405481} >= 0.95, Based on 10 samples" // This means that, with 95% confidence, the RMS error on the interval // [0.391, 0.534] and we used 10 samples (folds) to determine this result // Not bad, eh? System.out.println( ObjectUtil.getShortClassName(knn) + " Result: " + knnResult ); // Let's use a (slightly) more sophisticated learning algorithm and // compare. // Linear regression determines the best (linear) fit to the data. // In this case, we're just fitting a single vector of weights to // the input set using the regression algorithm. LinearRegression regression = new LinearRegression(); ConfidenceInterval regressionResult = experiment.evaluatePerformance( regression, labeledDataset ); // The print statement will read: // "Pr{0.7094902733851925<=x(0.7692147911561242)<=0.8289393089270559} >= 0.95, Based on 10 samples" // We can see that, with 95% confidence, the RMS error for regression is // [0.709, 0.829]. This is statistically significantly worse than // just using 3-nearest neighbor. System.out.println( ObjectUtil.getShortClassName(regression) + " Result: " + regressionResult ); // Let's create a neural network and see how well that performs // This is a neural network with a differentiable with "dimensionality" // inputs, "dimensionality*2" hidden units, and one output ThreeLayerFeedforwardNeuralNetwork ann = new ThreeLayerFeedforwardNeuralNetwork( dimensionality, 2*dimensionality, 1 ); // Let's use the conjugate gradient learning algorithm to optimize the // weights of the neural net. Use a mean-squared error cost function // to evaluate the performance of the neural network. // We have to use "ParameterDifferentiableCostMinimizer" to use // conjugate gradient because CG is a minimization algorithm, not // a parameter-optimization algorithm. The // "ParameterDifferentiableCostMinimizer" acts as a bridge between // minmization algorithms like CG and BFGS for the purpose of finding // minimum-cost parameters for functions like neural nets. ParameterDifferentiableCostMinimizer conjugateGradient = new ParameterDifferentiableCostMinimizer( new FunctionMinimizerLiuStorey() ); conjugateGradient.setObjectToOptimize( ann ); conjugateGradient.setCostFunction( new MeanSquaredErrorCostFunction() ); // However, a neural network maps Vectors to Vectors, whereas our // training data are Vectors to Doubles. So, let's create an adapter // that maps a Vector->Vector function to a Vector->Double function VectorFunctionToScalarFunction.Learner<Vector> adapterLeaner = new VectorFunctionToScalarFunction.Learner<Vector>( conjugateGradient ); ConfidenceInterval annResult = experiment.evaluatePerformance(adapterLeaner, labeledDataset ); // This print statement will read: // "Pr{0.04863023231715411<=x(0.1053300627328921)<=0.1620298931486301} >= 0.95, Based on 10 sampels" // We can see that, with 95% confidence, the RMS error for ANN with CG // is [0.049, 0.162]. This is statistically significantly better than // any other learner we've tried (not surprising, since this is the // functional form used to generate the targets!). System.out.println( ObjectUtil.getShortClassName(ann) + " Result: " + annResult ); // However, this comparison of learners is somewhat ad hoc. Wouldn't // it be great to run an experiment to determine if one of two // learners was significantly better than another? // As luck would have it, we've created this class for you!! StudentTConfidence ttest = new StudentTConfidence(); SupervisedLearnerComparisonExperiment<Vector,Double, Number, ConfidenceInterval> comparison = new SupervisedLearnerComparisonExperiment<Vector, Double, Number, ConfidenceInterval>( foldCreator, rms, ttest, tdistribution ); comparison.evaluate(adapterLeaner, knn, labeledDataset); // We can see from this experiment that the chance that the neural net // performs (statistically) identically to the k-nearest neighbor // learner on this data is is given by the statement // "nullHypothesisProbability = 1.798046487768712E-4" // In other words, the p-value is p<1.8e-4 (0.018%), which is tiny. // Therefore, we can confidently say that using the neural net for this // problem is significantly better than k-nearest neighbor (again, not // surprising since a neural net was used to generate the dataset!). // If you're interested, the ObjectUtil print statement also contains // all the associated information for reporting the results of a // t-test, such as "t = 6.097428929046153" and "degreesOfFreedom = 9.0" // All the confidence statistics from our statistical package will // contain all their necessary information as well. System.out.println( "Confidence Statistic:\n" + ObjectUtil.toString(comparison.getConfidence()) ); } /** * Creates "num" Vectors of equal dimension "dimensionality" * @param num * Number of Vectors to create * @param dimensionality * Dimensionality of each Vector * @return * Collection of "num" Vectors each of dimension "dimensionality" */ public Collection<Vector> createInputData( final int num, final int dimensionality ) { // All we're doing here is creating "num" random Vectors of // equal dimenension "dimensionality" final double randomRange = 10.0; ArrayList<Vector> inputs = new ArrayList<Vector>( num ); for (int n = 0; n < num; n++) { inputs.add( VectorFactory.getDefault().createUniformRandom( dimensionality, -randomRange, randomRange, this.randomNumberGenerator ) ); } return inputs; } /** * Creates "num" random Double targets as the response to a random * neural network * * @param dimensionality * Dimensionality of the input Vectors * @param inputs * Input data * @return * Collection of Doubles that are the response of a random neural net to * the inputs */ public Collection<Double> createTargetData( int dimensionality, Collection<Vector> inputs ) { // Let's just create a neural network to create targets. // This neural net has "dimensionality" inputs, "dimensionality*2" // hidden units, and one output node. It uses an atan (Arctangent) // function on each unit // Let's set all the parameters to random numbers on [-1,+1] ThreeLayerFeedforwardNeuralNetwork ann = new ThreeLayerFeedforwardNeuralNetwork( dimensionality, dimensionality*2, 1, new AtanFunction(), 2,1.0); ArrayList<Double> targets = new ArrayList<Double>( inputs.size() ); for (Vector input : inputs) { // Let the output be the neural net's response to each input Vector output = ann.evaluate( input ); // But we're looking for a Double, not a Vector, so just snarf // the zeroth element from the Vector and add it to the targets targets.add( output.getElement( 0 ) ); } return targets; } }