LearningExperimentExample.java example

Explorer
Foundry-master
- Components
/*
 * File:                LearningExperimentExample.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright Dec 19, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 * 
 */

package examples;

import gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerLiuStorey;
import gov.sandia.cognition.learning.algorithm.regression.LinearRegression;
import gov.sandia.cognition.learning.algorithm.regression.ParameterDifferentiableCostMinimizer;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.experiment.CrossFoldCreator;
import gov.sandia.cognition.learning.experiment.SupervisedLearnerValidationExperiment;
import gov.sandia.cognition.learning.algorithm.nearest.KNearestNeighborExhaustive;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.learning.experiment.SupervisedLearnerComparisonExperiment;
import gov.sandia.cognition.learning.function.cost.MeanSquaredErrorCostFunction;
import gov.sandia.cognition.learning.function.distance.EuclideanDistanceMetric;
import gov.sandia.cognition.learning.function.scalar.AtanFunction;
import gov.sandia.cognition.learning.function.scalar.VectorFunctionToScalarFunction;
import gov.sandia.cognition.learning.function.vector.ThreeLayerFeedforwardNeuralNetwork;
import gov.sandia.cognition.learning.performance.RootMeanSquaredErrorEvaluator;
import gov.sandia.cognition.math.NumberAverager;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.statistics.method.ConfidenceInterval;
import gov.sandia.cognition.statistics.method.StudentTConfidence;
import gov.sandia.cognition.util.ObjectUtil;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Random;

/**
 * This tutorial demonstrates how to construct some sophisticated supervised 
 * learning algorithms and compare their performance using 
 * statistical-validation techniques.
 * 
 * @author Kevin R. Dixon
 * @since 2.0
 */
public class LearningExperimentExample
{

    /**
     * Random-number generator, created with the same seed, used to generate
     * random, but repeatable, numbers for this example
     */
    private Random randomNumberGenerator = new Random( 1 );

    /**
     * Main method
     * @param argv
     * We don't take any command-line arguments
     */
    public static void main( String[] argv )
    {

        new LearningExperimentExample();

    }

    /**
     * This is where the tutorial takes place
     */
    public LearningExperimentExample()
    {

        final int num = 100;
        final int dimensionality = 3;

        // Step 1.0: create the dataset

        // Step 1.1: create the (random) inputs. We're not interested in any
        // particular data, so just create some random stuff.
        Collection<Vector> inputs = this.createInputData( num, dimensionality );

        // Step 1.2: create the corresponding targets using a random neural 
        // network.
        Collection<Double> targets =
            this.createTargetData( dimensionality, inputs );

        // Step 1.3: merge the inputs and targets to create a supervised 
        // (labeled) training set
        ArrayList<DefaultInputOutputPair<Vector, Double>> labeledDataset =
            DefaultInputOutputPair.mergeCollections( inputs, targets );

        // Step 2.0: set up the learning experiment parameters

        // Step 2.1: We need to choose an objective manner to evaluate the 
        // performance of a learning algorithm.  A very common measure is
        // Root Mean Squared (RMS) Error
        RootMeanSquaredErrorEvaluator<Vector> rms =
            new RootMeanSquaredErrorEvaluator<Vector>();

        // Step 2.2: We need to capture a notion about the confidence interval
        // about the performance of the algorithm on cross-validation data.
        // This will tell us, for example, that with 95% confidence the
        // RMS Error is between 0.1 and 0.2.
        // A very common measure for computing confidence intervals is
        // a Student-t distribution and a 95% confidence interval is the
        // standard amongst social scientists.
        final double confidence = 0.95;
        StudentTConfidence.Summary tdistribution =
            new StudentTConfidence.Summary( confidence );

        // Step 2.3: We need to chop up our data to create training sets,
        // that the learning algorithm uses to tune its parameters, and 
        // cross-validation sets that are withheld during training, but used to
        // evaluate its performance.  This cross-validation performance is
        // supposed to shed light on how well the learning algorithm will
        // generalize to a larger universe of data.
        // A typical strategy for creating cross-validation sets from a large
        // dataset is to chop the data into n "folds", and train on n-1 folds
        // and use the remaining fold to test the data.  This process is
        // repeated for each of the n folds.  We'll create 10 folds in this
        // example.
        final int numFolds = 10;
        CrossFoldCreator<InputOutputPair<Vector, Double>> foldCreator =
            new CrossFoldCreator<InputOutputPair<Vector, Double>>(
            numFolds, this.randomNumberGenerator );

        // Step 3.0: Create the experiment framework.  Since we have labeled
        // input-output pairs, this is a Supervised Learner experiment.
        // The generic parameters mean
        //      - The inputs are Vectors
        //      - The ouputs (labels, targets, etc.) are Doubles
        //      - The performance measure is a Double (RMS)
        //      - The experiment statistic is a ConfidenceInterval 
        // (output of the t-test)
        SupervisedLearnerValidationExperiment<Vector, Double, Double, ConfidenceInterval> experiment =
            new SupervisedLearnerValidationExperiment<Vector, Double, Double, ConfidenceInterval>(
                foldCreator, rms, tdistribution  );

        // Let's try the simplest learning algorithm first: k-nearest neighbor.
        // This learning algorithm just returns the average of the outputs
        // to the nearest "k" inputs we've seen.
        // We're going to use k=3 neighbors, Euclidean distance being used
        // to determine "closeness", and a simple arithmetic mean (average)
        // to average the outputs together
        final int numNeighbors = 3;
        KNearestNeighborExhaustive.Learner<Vector, Double> knn =
            new KNearestNeighborExhaustive.Learner<Vector, Double>(
            numNeighbors, new EuclideanDistanceMetric(), new NumberAverager() );
        ConfidenceInterval knnResult = experiment.evaluatePerformance( knn, labeledDataset );

        // This print statement will read:
        // "Pr{0.3908356360206158<=x(0.46258811033058195)<=0.5343405846405481} >= 0.95, Based on 10 samples"
        // This means that, with 95% confidence, the RMS error on the interval
        // [0.391, 0.534] and we used 10 samples (folds) to determine this result
        // Not bad, eh?
        System.out.println( ObjectUtil.getShortClassName(knn) + " Result: " + knnResult );

        // Let's use a (slightly) more sophisticated learning algorithm and 
        // compare.
        // Linear regression determines the best (linear) fit to the data.
        // In this case, we're just fitting a single vector of weights to
        // the input set using the regression algorithm.
        LinearRegression regression = new LinearRegression();
        ConfidenceInterval regressionResult = 
            experiment.evaluatePerformance( regression, labeledDataset );

        // The print statement will read:
        // "Pr{0.7094902733851925<=x(0.7692147911561242)<=0.8289393089270559} >= 0.95, Based on 10 samples"
        // We can see that, with 95% confidence, the RMS error for regression is
        // [0.709, 0.829].  This is statistically significantly worse than
        // just using 3-nearest neighbor.
        System.out.println( ObjectUtil.getShortClassName(regression) + " Result: " + regressionResult );
        
        // Let's create a neural network and see how well that performs
        // This is a neural network with a differentiable with "dimensionality"
        // inputs, "dimensionality*2" hidden units, and one output
        ThreeLayerFeedforwardNeuralNetwork ann =
            new ThreeLayerFeedforwardNeuralNetwork( dimensionality, 2*dimensionality, 1 );

        // Let's use the conjugate gradient learning algorithm to optimize the
        // weights of the neural net.  Use a mean-squared error cost function
        // to evaluate the performance of the neural network.
        // We have to use "ParameterDifferentiableCostMinimizer" to use
        // conjugate gradient because CG is a minimization algorithm, not
        // a parameter-optimization algorithm.  The
        // "ParameterDifferentiableCostMinimizer" acts as a bridge between
        // minmization algorithms like CG and BFGS for the purpose of finding
        // minimum-cost parameters for functions like neural nets.
        ParameterDifferentiableCostMinimizer conjugateGradient =
            new ParameterDifferentiableCostMinimizer(
                new FunctionMinimizerLiuStorey() );
        conjugateGradient.setObjectToOptimize( ann );
        conjugateGradient.setCostFunction( new MeanSquaredErrorCostFunction() );
        
        // However, a neural network maps Vectors to Vectors, whereas our
        // training data are Vectors to Doubles.  So, let's create an adapter
        // that maps a Vector->Vector function to a Vector->Double function
        VectorFunctionToScalarFunction.Learner<Vector> adapterLeaner =  
            new VectorFunctionToScalarFunction.Learner<Vector>( conjugateGradient ); 
        
        ConfidenceInterval annResult = experiment.evaluatePerformance(adapterLeaner, labeledDataset );

        // This print statement will read:
        // "Pr{0.04863023231715411<=x(0.1053300627328921)<=0.1620298931486301} >= 0.95, Based on 10 sampels"
        // We can see that, with 95% confidence, the RMS error for ANN with CG
        // is [0.049, 0.162].  This is statistically significantly better than
        // any other learner we've tried (not surprising, since this is the
        // functional form used to generate the targets!).
        System.out.println( ObjectUtil.getShortClassName(ann) + " Result: " + annResult );
        
        // However, this comparison of learners is somewhat ad hoc.  Wouldn't
        // it be great to run an experiment to determine if one of two
        // learners was significantly better than another?
        // As luck would have it, we've created this class for you!!
        StudentTConfidence ttest = new StudentTConfidence();

        SupervisedLearnerComparisonExperiment<Vector,Double, Number, ConfidenceInterval> comparison =
            new SupervisedLearnerComparisonExperiment<Vector, Double, Number, ConfidenceInterval>(
                foldCreator, rms, ttest, tdistribution );

        comparison.evaluate(adapterLeaner, knn, labeledDataset);

        // We can see from this experiment that the chance that the neural net
        // performs (statistically) identically to the k-nearest neighbor
        // learner on this data is is given by the statement
        // "nullHypothesisProbability = 1.798046487768712E-4"
        // In other words, the p-value is p<1.8e-4 (0.018%), which is tiny.
        // Therefore, we can confidently say that using the neural net for this
        // problem is significantly better than k-nearest neighbor (again, not
        // surprising since a neural net was used to generate the dataset!).
        // If you're interested, the ObjectUtil print statement also contains
        // all the associated information for reporting the results of a 
        // t-test, such as "t = 6.097428929046153" and "degreesOfFreedom = 9.0"
        // All the confidence statistics from our statistical package will 
        // contain all their necessary information as well.
        System.out.println( "Confidence Statistic:\n" + 
            ObjectUtil.toString(comparison.getConfidence()) );
        
    }

    /**
     * Creates "num" Vectors of equal dimension "dimensionality"
     * @param num
     * Number of Vectors to create
     * @param dimensionality
     * Dimensionality of each Vector
     * @return
     * Collection of "num" Vectors each of dimension "dimensionality"
     */
    public Collection<Vector> createInputData(
        final int num,
        final int dimensionality )
    {
        // All we're doing here is creating "num" random Vectors of
        // equal dimenension "dimensionality"
        final double randomRange = 10.0;
        ArrayList<Vector> inputs = new ArrayList<Vector>( num );
        for (int n = 0; n < num; n++)
        {
            inputs.add( VectorFactory.getDefault().createUniformRandom(
                dimensionality, -randomRange, randomRange, this.randomNumberGenerator ) );
        }
        return inputs;
    }

    /**
     * Creates "num" random Double targets as the response to a random
     * neural network
     * 
     * @param dimensionality
     * Dimensionality of the input Vectors
     * @param inputs
     * Input data
     * @return
     * Collection of Doubles that are the response of a random neural net to
     * the inputs
     */
    public Collection<Double> createTargetData(
        int dimensionality,
        Collection<Vector> inputs )
    {

        // Let's just create a neural network to create targets.
        // This neural net has "dimensionality" inputs, "dimensionality*2" 
        // hidden units, and one output node.  It uses an atan (Arctangent)
        // function on each unit
        // Let's set all the parameters to random numbers on [-1,+1]
        ThreeLayerFeedforwardNeuralNetwork ann =
            new ThreeLayerFeedforwardNeuralNetwork(
                dimensionality, dimensionality*2, 1, new AtanFunction(), 2,1.0);

        ArrayList<Double> targets = new ArrayList<Double>( inputs.size() );
        for (Vector input : inputs)
        {
            // Let the output be the neural net's response to each input
            Vector output = ann.evaluate( input );

            // But we're looking for a Double, not a Vector, so just snarf
            // the zeroth element from the Vector and add it to the targets
            targets.add( output.getElement( 0 ) );
        }
        return targets;
    }

}