/*
* File: BayesianRegressionTestHarness.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Apr 1, 2010, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.statistics.bayesian;
import gov.sandia.cognition.evaluator.Evaluator;
import gov.sandia.cognition.learning.algorithm.IncrementalLearner;
import gov.sandia.cognition.learning.algorithm.regression.LinearRegression;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.function.scalar.LinearDiscriminantWithBias;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.math.matrix.Vectorizable;
import gov.sandia.cognition.statistics.ClosedFormComputableDistribution;
import gov.sandia.cognition.statistics.ClosedFormDistribution;
import gov.sandia.cognition.statistics.Distribution;
import gov.sandia.cognition.statistics.UnivariateDistribution;
import gov.sandia.cognition.statistics.SufficientStatistic;
import gov.sandia.cognition.statistics.distribution.UnivariateGaussian;
import gov.sandia.cognition.statistics.method.ConfidenceInterval;
import gov.sandia.cognition.statistics.method.GaussianConfidence;
import gov.sandia.cognition.statistics.method.KolmogorovSmirnovConfidence;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import java.util.ArrayList;
import junit.framework.TestCase;
import java.util.Random;
/**
* Unit tests for BayesianRegressionTestHarness.
* @param <PosteriorType> Posterior type
* @author krdixon
*/
public abstract class BayesianRegressionTestHarness<PosteriorType extends ClosedFormDistribution<Vector>>
extends TestCase
{
/**
* Random number generator to use for a fixed random seed.
*/
public Random RANDOM = new Random( 1 );
/**
* Default tolerance of the regression tests, {@value}.
*/
public double TOLERANCE = 1e-5;
/**
* Default number of samples, {@value}.
*/
public static int NUM_SAMPLES = 10;
/**
* Default dim, {@value}.
*/
public static int DEFAULT_DIM = 10;
/**
* Default confidence, {@value}.
*/
public double CONFIDENCE = 0.95;
/**
* Tests for class BayesianRegressionTestHarness.
* @param testName Name of the test.
*/
public BayesianRegressionTestHarness(
String testName)
{
super(testName);
}
/**
* Creates an instance
* @return
* Instance
*/
abstract public BayesianRegression<Double,PosteriorType> createInstance();
/**
* Model
*/
public static class Model
extends AbstractCloneableSerializable
implements Evaluator<Double,UnivariateGaussian>
{
/**
* Variance
*/
double variance;
/**
* Model
* @param variance
* Variance
*/
public Model(
double variance)
{
this.variance = variance;
}
@Override
public UnivariateGaussian evaluate(
Double input)
{
double mean = Math.sin( 2.0*Math.PI * input );
return new UnivariateGaussian( mean, variance );
}
}
/**
* Creates the model of the data.
* @return
* Model
*/
public Model createModel()
{
return new Model( RANDOM.nextDouble() );
}
/**
* Creates inputs for the model.
* @param random Random
* @return
* Inputs for the model
*/
public static ArrayList<Double> createInputs(
Random random )
{
ArrayList<Double> samples = new ArrayList<Double>( NUM_SAMPLES );
for( int n = 0; n < NUM_SAMPLES; n++ )
{
samples.add( random.nextDouble() );
}
return samples;
}
/**
* RBF
*/
public static class RadialBasisVectorFunction
extends AbstractCloneableSerializable
implements Evaluator<Number,Vector>
{
/**
* Number of RBFs
*/
int num;
/**
* RBF
* @param num
* Number of RBFs
*/
public RadialBasisVectorFunction(
int num)
{
this.num = num;
}
@Override
public Vector evaluate(
Number input)
{
Vector x = VectorFactory.getDefault().createVector(num+1);
x.setElement(num-1, 1.0);
for( int i = 0; i < num; i++ )
{
double mean = i*(2.0/num)-1.0;
double variance = 0.01;
x.setElement(i, evaluate(input.doubleValue(), mean, variance));
}
return x;
}
/**
* Evaluates
* @param input Input
* @param mean mean
* @param variance Variance
* @return
* Output
*/
public static double evaluate(
double input,
double mean,
double variance )
{
double delta = input - mean;
return Math.exp( delta*delta / (-2.0*variance) );
}
}
/**
* Creates data
* @param inputs Inputs
* @param model Model
* @param random Random
* @return
* Data
*/
public static ArrayList<InputOutputPair<Vector,Double>> createData(
ArrayList<Double> inputs,
Evaluator<? super Double,? extends UnivariateDistribution<Double>> model,
Random random )
{
ArrayList<InputOutputPair<Vector,Double>> samples =
new ArrayList<InputOutputPair<Vector, Double>>( inputs.size() );
RadialBasisVectorFunction f = new RadialBasisVectorFunction(DEFAULT_DIM);
for( int n = 0; n < inputs.size(); n++ )
{
Double input = inputs.get(n);
Vector x = f.evaluate(input);
UnivariateDistribution<Double> outputDistribution = model.evaluate( input );
samples.add( DefaultInputOutputPair.create(
x, outputDistribution.sample(random) ) );
}
return samples;
}
/**
* Tests the constructors of class BayesianRegressionTestHarness.
*/
abstract public void testConstructors();
/**
* Clone
*/
public void testClone()
{
System.out.println( "Clone" );
BayesianRegression<Double,? extends Distribution<Vector>> instance =
this.createInstance();
@SuppressWarnings("unchecked")
BayesianRegression<Double,? extends Distribution<Vector>> clone =
(BayesianRegression<Double,? extends Distribution<Vector>>) instance.clone();
assertNotSame( instance, clone );
}
/**
* Learn
*/
public void testLearn()
{
System.out.println( "learn" );
System.out.println("createConditionalDistribution");
BayesianRegression<Double,? extends ClosedFormDistribution<Vector>> instance =
this.createInstance();
ArrayList<Double> inputs = createInputs(RANDOM);
Evaluator<? super Double,? extends UnivariateDistribution<Double>> target = this.createModel();
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target, RANDOM);
ClosedFormDistribution<Vector> posterior = instance.learn(data);
Vector mean = posterior.getMean();
LinearRegression linearRegression = new LinearRegression();
LinearDiscriminantWithBias result = linearRegression.learn(data);
System.out.println( "Mean: " + mean );
System.out.println( "Result: " + result.convertToVector() );
// System.out.println( "=====================" );
// System.out.println( "Estimates!" );
// for( double x = 0.0; x <= 1.0; x += 0.1 )
// {
// UnivariateGaussian y = f.evaluate(x);
// System.out.println( "x = " + x + ", y = " + y );
// }
}
/**
* Test of createConditionalDistribution method, of class BayesianRegression.
*/
public void testCreateConditionalDistribution()
{
System.out.println("createConditionalDistribution");
// This is similar to Bishop's example on p. 157
ArrayList<Double> inputs = createInputs(RANDOM);
Evaluator<? super Double,? extends UnivariateDistribution<Double>> target = this.createModel();
ArrayList<InputOutputPair<Vector,Double>> samples =
createData(inputs, target, RANDOM);
// System.out.println( "Targets:" );
// for( InputOutputPair<Double,Double> sample : samples )
// {
// System.out.println( "x = " + sample.getInput() + ", y = " + sample.getOutput() );
// }
BayesianRegression<Double,PosteriorType> instance = this.createInstance();
PosteriorType posterior = instance.learn(samples);
Vector weights = posterior.getMean();
UnivariateDistribution<Double> conditional = (UnivariateDistribution<Double>)
instance.createConditionalDistribution(samples.get(1).getFirst(), weights );
System.out.println( "Result: " + conditional );
System.out.println( "Target: " + samples.get(1).getSecond() );
ConfidenceInterval interval = GaussianConfidence.computeConfidenceInterval(
conditional, 1, 0.95);
System.out.println( "Interval: " + interval );
assertTrue( interval.withinInterval(samples.get(1).getSecond()) );
}
public static void compareMethods(
Evaluator<? super Vectorizable, ? extends ClosedFormDistribution<Double>> predictive,
LinearDiscriminantWithBias mle,
Model target )
{
System.out.println( "=====================" );
double logMLE = 0.0;
double logBayesian = 0.0;
double logTarget = 0.0;
RadialBasisVectorFunction f = new RadialBasisVectorFunction(DEFAULT_DIM);
for( double x = 0.0; x <= 1.0; x += 0.1 )
{
ClosedFormComputableDistribution<Double> y = target.evaluate(x).getProbabilityFunction();
Vector vx = f.evaluate(x);
ClosedFormDistribution<Double> ybayes = predictive.evaluate( vx );
Double ymle = mle.evaluate( vx );
System.out.printf( "x = %.1f", x );
System.out.println( ", target = " + y + ", Estimate: " + ybayes + ", MLE: " + ymle);
logTarget = y.getProbabilityFunction().logEvaluate( y.getMean() );
logBayesian += y.getProbabilityFunction().logEvaluate( ybayes.getMean() );
logMLE += y.getProbabilityFunction().logEvaluate(ymle);
}
System.out.println( "Log-Likelihood Results: " );
System.out.println( "Target: " + logTarget );
System.out.println( "Bayes: " + logBayesian );
System.out.println( "MLE: " + logMLE );
assertTrue( logTarget > logBayesian );
assertTrue( logBayesian > logMLE );
}
/**
* Test of createPredictiveDistribution method, of class BayesianRegression.
*/
public void testCreatePredictiveDistribution10()
{
System.out.println("createPredictiveDistribution(10)");
NUM_SAMPLES = 10;
ArrayList<Double> inputs = createInputs(RANDOM);
Model target = new Model(0.25);
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target,RANDOM);
BayesianRegression<Double,PosteriorType> instance =
this.createInstance();
Evaluator<? super Vectorizable, ? extends ClosedFormDistribution<Double>> predictive =
instance.createPredictiveDistribution( instance.learn(data) );
LinearRegression regression = new LinearRegression();
LinearDiscriminantWithBias mle = regression.learn(data);
compareMethods(predictive, mle, target);
}
/**
* Test of createPredictiveDistribution method, of class BayesianRegression.
*/
public void testCreatePredictiveDistribution100()
{
System.out.println("createPredictiveDistribution(100)");
NUM_SAMPLES = 100;
ArrayList<Double> inputs = createInputs(RANDOM);
Model target = new Model(0.25);
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target,RANDOM);
BayesianRegression<Double,PosteriorType> instance =
this.createInstance();
Evaluator<? super Vectorizable, ? extends ClosedFormDistribution<Double>> predictive =
instance.createPredictiveDistribution( instance.learn(data) );
LinearRegression regression = new LinearRegression();
LinearDiscriminantWithBias mle = regression.learn(data);
compareMethods(predictive, mle, target);
}
/**
* Test of createPredictiveDistribution method, of class BayesianRegression.
*/
public void testCreatePredictiveDistribution5()
{
System.out.println("createPredictiveDistribution(5)");
NUM_SAMPLES = 5;
ArrayList<Double> inputs = createInputs(RANDOM);
Model target = new Model(0.25);
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target,RANDOM);
BayesianRegression<Double,PosteriorType> instance =
this.createInstance();
Evaluator<? super Vectorizable, ? extends ClosedFormDistribution<Double>> predictive =
instance.createPredictiveDistribution( instance.learn(data) );
LinearRegression regression = new LinearRegression();
LinearDiscriminantWithBias mle = regression.learn(data);
compareMethods(predictive, mle, target);
}
/**
* Test of createPredictiveDistribution method, of class BayesianRegression.
*/
public void testCreatePredictiveDistribution1000()
{
System.out.println("createPredictiveDistribution(100)");
NUM_SAMPLES = 100;
ArrayList<Double> inputs = createInputs(RANDOM);
Model target = new Model(1.0);
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target,RANDOM);
BayesianRegression<Double,PosteriorType> instance =
this.createInstance();
Evaluator<? super Vectorizable, ? extends ClosedFormDistribution<Double>> predictive =
instance.createPredictiveDistribution( instance.learn(data) );
LinearRegression regression = new LinearRegression();
LinearDiscriminantWithBias mle = regression.learn(data);
compareMethods(predictive, mle, target);
}
public <SufficientStatisticType extends SufficientStatistic<InputOutputPair<? extends Vectorizable, Double>,PosteriorType>> void testIncrementalAndBatch(
IncrementalLearner<InputOutputPair<? extends Vectorizable,Double>,SufficientStatisticType> incremental )
{
System.out.println( "Incremental And Batch" );
ArrayList<Double> inputs = createInputs(RANDOM);
NUM_SAMPLES = 100;
Model target = new Model(1.0);
ArrayList<InputOutputPair<Vector,Double>> data = createData(inputs, target,RANDOM);
BayesianRegression<Double,PosteriorType> instance =
this.createInstance();
Evaluator<? super Vectorizable, ? extends Distribution<Double>> batch =
instance.createPredictiveDistribution( instance.learn(data) );
SufficientStatisticType posterior = incremental.createInitialLearnedObject();
for( InputOutputPair<Vector,Double> pair : data )
{
incremental.update(posterior, pair);
}
Evaluator<? super Vectorizable, ? extends Distribution<Double>> incrementalPredictive =
instance.createPredictiveDistribution( posterior.create() );
// Now run some K-S tests to see if they're almost the same
for( InputOutputPair<Vector,Double> pair : data )
{
Distribution<Double> b = batch.evaluate(pair.getInput());
ArrayList<? extends Double> sb = b.sample(RANDOM,NUM_SAMPLES);
Distribution<Double> i = incrementalPredictive.evaluate(pair.getInput());
System.out.println( "Batch: " + b );
System.out.println( "Incre: " + i );
System.out.println( "Target: " + target.evaluate(pair.getInput().getElement(0)) );
ArrayList<? extends Double> si = i.sample(RANDOM, NUM_SAMPLES);
KolmogorovSmirnovConfidence.Statistic kstest =
KolmogorovSmirnovConfidence.INSTANCE.evaluateNullHypothesis(sb,si);
System.out.println( "K-S test: " + kstest );
assertEquals( 1.0, kstest.getNullHypothesisProbability(), 0.99 );
}
}
}