/*
* File: UnivariateDistributionTestHarness.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Feb 2, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.statistics;
import gov.sandia.cognition.learning.algorithm.BatchLearner;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.statistics.method.ChiSquareConfidence;
import gov.sandia.cognition.statistics.method.ConfidenceStatistic;
import gov.sandia.cognition.statistics.method.GaussianConfidence;
import gov.sandia.cognition.statistics.method.KolmogorovSmirnovConfidence;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import junit.framework.TestCase;
/**
* Unit tests for UnivariateDistribution.
*
* @param <NumberType> Number type
* @author krdixon
*/
public abstract class UnivariateDistributionTestHarness<NumberType extends Number>
extends TestCase
{
/**
* Random number generator to use for a fixed random seed.
*/
public Random RANDOM = new Random( 1 );
/**
* Default tolerance of the regression tests, {@value}.
*/
public double TOLERANCE = 1e-5;
/**
* Confidence for sampling
*/
public double CONFIDENCE = 0.95;
/**
* Number of samples to draw
*/
public int NUM_SAMPLES = 1000;
/**
* Monte Carlo error fudge factor.
*/
public double MONTE_CARLO_FACTOR = 2.0;
/**
* Tests for class ScalarDistributionTestHarness2.
* @param testName Name of the test.
*/
public UnivariateDistributionTestHarness(
String testName)
{
super(testName);
}
/**
* Creates a new Distribution
* @return
* New Distribution
*/
public abstract UnivariateDistribution<NumberType> createInstance();
/**
* createInstance
*/
public void testCreateInstance()
{
System.out.println( "createInstance" );
UnivariateDistribution<? extends Number> instance = this.createInstance();
assertNotNull( instance );
assertFalse( instance instanceof CumulativeDistributionFunction );
assertFalse( instance instanceof ProbabilityDensityFunction );
}
/**
* Tests the constructors of class ScalarDistributionTestHarness2.
*/
public abstract void testDistributionConstructors();
/**
* Tests the CDF constructors
*/
public abstract void testCDFConstructors();
/**
* CDF known values
*/
public abstract void testCDFKnownValues();
/**
* Samples
* @param s1
* @param s2
* @return
*/
public boolean distributionSamplesEqual(
List<? extends Number> s1,
List<? extends Number> s2 )
{
if( s1.size() != s2.size() )
{
return false;
}
for( int n = 0; n < s1.size(); n++ )
{
if( s1.get(n) == null )
{
return false;
}
if( s2.get(n) == null )
{
return false;
}
if( s1 == s2 )
{
return false;
}
if( Math.abs(s1.get(n).doubleValue() - s2.get(n).doubleValue()) > TOLERANCE )
{
return false;
}
}
return true;
}
/**
* Clone
*/
public void testDistributionClone()
{
System.out.println( "Distribution.clone" );
Distribution<? extends Number> instance = this.createInstance();
@SuppressWarnings("unchecked")
Distribution<? extends Number> clone =
(Distribution<? extends Number>) instance.clone();
assertNotNull( clone );
assertNotSame( instance, clone );
Random r11 = new Random(1);
Random r12 = new Random(1);
ArrayList<? extends Number> s1 = instance.sample(r11, NUM_SAMPLES);
ArrayList<? extends Number> s2 = clone.sample(r12, NUM_SAMPLES);
assertTrue( this.distributionSamplesEqual(s1, s2) );
}
/**
* CDF.clone
*/
public void testCDFClone()
{
UnivariateDistribution<? extends Number> distribution = this.createInstance();
CumulativeDistributionFunction<? extends Number> instance = distribution.getCDF();
@SuppressWarnings("unchecked")
CumulativeDistributionFunction<? extends Number> clone =
(CumulativeDistributionFunction<? extends Number>) instance.clone();
assertNotNull( clone );
assertNotSame( instance, clone );
Random r11 = new Random(1);
Random r12 = new Random(1);
ArrayList<? extends Number> s1 = instance.sample(r11, NUM_SAMPLES);
ArrayList<? extends Number> s2 = clone.sample(r12, NUM_SAMPLES);
assertTrue( this.distributionSamplesEqual(s1, s2) );
}
/**
* Test of getMean method, of class Distribution.
*/
public void testDistributionGetMean()
{
System.out.println( "Distribution.getMean" );
UnivariateDistribution<NumberType> instance = this.createInstance();
// Ask the distribution for its mean
Number mean = instance.getMean();
// Sample from the distribution
ArrayList<? extends Number> samples =
instance.sample( RANDOM, NUM_SAMPLES );
ArrayList<Double> doubleSamples = new ArrayList<Double>( samples.size() );
for( Number sample : samples )
{
doubleSamples.add( sample.doubleValue() );
}
// Here is the confidence that the sample mean could have been
// sampled from the hypothesis distribution. This should be 1.0.
// If it's less than 0.05, then we've got a problem.
ConfidenceStatistic confidence =
GaussianConfidence.evaluateNullHypothesis( doubleSamples, mean.doubleValue() );
System.out.println( "Distribution: " + instance );
System.out.println( "Mean Confidence: " + confidence );
System.out.println( "Sample mean: " + UnivariateStatisticsUtil.computeMean( samples ) + ", Mean: " + mean );
assertEquals( 1.0, confidence.getNullHypothesisProbability(), CONFIDENCE );
}
/**
* Test of getMean method, of class Distribution.
*/
public void testDistributionGetMeanAsDouble()
{
System.out.println( "Distribution.getMeanAsDouble" );
UnivariateDistribution<NumberType> instance = this.createInstance();
// Ask the distribution for its mean
double mean = instance.getMeanAsDouble();
assertEquals(mean, instance.getMean().doubleValue(), TOLERANCE);
}
/**
* CDF.getMean
*/
public void testCDFGetMean()
{
System.out.println( "CDF.getMean" );
UnivariateDistribution<? extends Number> instance = this.createInstance();
CumulativeDistributionFunction<? extends Number> cdf = instance.getCDF();
Number m1 = instance.getMean();
Number m2 = cdf.getMean();
assertEquals( m1.doubleValue(), m2.doubleValue() );
}
/**
* Test of sample method, of class Distribution.
*/
public void testDistributionSample_Random()
{
System.out.println( "Distribution.sample(random)" );
// Make sure that when we re-feed an identical RANDOM seed, then
// we get an equal sample from the distribution. But different seeds
// should give us different results... maybe.
Random random1a = new Random( 1 );
Distribution<? extends Number> instance = this.createInstance();
Number r11 = instance.sample( random1a );
Number rx2 = instance.sample( random1a );
assertNotNull( r11 );
assertNotNull( rx2 );
// assertNotSame( r11, rx2 );
Random random1b = new Random( 1 );
Number r13 = instance.sample( random1b );
assertNotNull( r13 );
// assertNotSame( r11, r13 );
assertEquals( r11.doubleValue(), r13.doubleValue(), TOLERANCE );
Random random2 = new Random( 2 );
Number r21 = instance.sample( random2 );
assertNotNull( r21 );
// assertNotSame( r13, r21 );
// assertNotSame( r11, r21 );
Random random1c = new Random( 1 );
Number r14 = instance.sample( random1c );
assertNotNull( r14 );
// assertNotSame( r11, r14 );
assertEquals( r11.doubleValue(), r14.doubleValue(), TOLERANCE );
// assertNotSame( r13, r14 );
assertEquals( r13.doubleValue(), r14.doubleValue(), TOLERANCE );
}
/**
* Test of sample method, of class Distribution.
*/
public void testDistributionSample_Random_int()
{
System.out.println( "Distribution.sample(random,int)" );
Distribution<? extends Number> instance = this.createInstance();
// Identical RANDOM seeds should produce equal squences.
// (Can't say anything about different seeds because deterministic
// distributions always return the same result, regardless of seed.)
Random r1a = new Random( 1 );
ArrayList<? extends Number> s1a = instance.sample( r1a, NUM_SAMPLES );
assertEquals( NUM_SAMPLES, s1a.size() );
Random r1b = new Random( 1 );
ArrayList<? extends Number> s1b = instance.sample( r1b, NUM_SAMPLES );
assertEquals( NUM_SAMPLES, s1b.size() );
assertEquals( s1a.size(), s1b.size() );
assertTrue( this.distributionSamplesEqual(s1a, s1b) );
}
/**
* Test of sampleInto method, of class Distribution.
*/
public void testDistributionSampleInto()
{
System.out.println( "Distribution.sampleInto(random,int,Collection)" );
Distribution<? extends Number> instance = this.createInstance();
// Identical RANDOM seeds should produce equal squences.
// (Can't say anything about different seeds because deterministic
// distributions always return the same result, regardless of seed.)
Random r1a = new Random( 1 );
ArrayList<Number> s1a = new ArrayList<Number>();
instance.sampleInto( r1a, NUM_SAMPLES, s1a);
assertEquals( NUM_SAMPLES, s1a.size() );
Random r1b = new Random( 1 );
ArrayList<Number> s1b = new ArrayList<Number>();
instance.sampleInto( r1b, NUM_SAMPLES, s1b );
assertEquals( NUM_SAMPLES, s1b.size() );
assertEquals( s1a.size(), s1b.size() );
assertTrue( this.distributionSamplesEqual(s1a, s1b) );
}
/**
* Test of sample method, of class Distribution.
*/
public void testCDFSample_Random_int()
{
System.out.println( "CDF.sample(random,int)" );
UnivariateDistribution<NumberType> instance = this.createInstance();
CumulativeDistributionFunction<NumberType> cdf = instance.getCDF();
Random r11 = new Random(1);
Random r12 = new Random(1);
ArrayList<? extends NumberType> s1 = instance.sample(r11, NUM_SAMPLES);
ArrayList<? extends NumberType> s2 = cdf.sample(r12, NUM_SAMPLES);
assertTrue( this.distributionSamplesEqual(s1, s2) );
KolmogorovSmirnovConfidence.Statistic kstest =
KolmogorovSmirnovConfidence.evaluateNullHypothesis(s1,cdf);
System.out.println( "K-S test: " + kstest );
assertEquals( 1.0, kstest.getNullHypothesisProbability(), CONFIDENCE );
}
/**
* Tests getEstimator
*/
public void testEstimableDistributionGetEstimator()
{
System.out.println( "EstimableDistribution.getEstimator" );
UnivariateDistribution<NumberType> instance = this.createInstance();
if( instance instanceof EstimableDistribution )
{
EstimableDistribution<NumberType,? extends EstimableDistribution<NumberType,? extends UnivariateDistribution<NumberType>>> estimable =
(EstimableDistribution<NumberType, ? extends EstimableDistribution<NumberType, ? extends UnivariateDistribution<NumberType>>>) instance;
@SuppressWarnings("unchecked")
DistributionEstimator<NumberType, ? extends UnivariateDistribution<NumberType>> estimator =
(DistributionEstimator<NumberType, ? extends UnivariateDistribution<NumberType>>) estimable.getEstimator();
this.distributionEstimatorTest(estimator);
}
}
/**
* Tests the ability of the learner to estimate a distribution from its
* samples.
* @param learner
*/
@SuppressWarnings("unchecked")
public void distributionEstimatorTest(
BatchLearner<Collection<? extends NumberType>, ? extends UnivariateDistribution<NumberType>> learner )
{
System.out.println( "Test learner" );
UnivariateDistribution<? extends NumberType> distribution = this.createInstance();
if( distribution instanceof ClosedFormDistribution )
{
Vector parameters = ((ClosedFormDistribution) distribution).convertToVector();
System.out.println( "Target: " + distribution.getClass().getCanonicalName() + ", Parameters: " + parameters );
}
else
{
System.out.println( "Target distribution:\n" + distribution.toString() );
}
Random r1 = new Random(1);
ArrayList<? extends NumberType> samples =
distribution.sample(r1, NUM_SAMPLES);
UnivariateDistribution<? extends NumberType> estimate = learner.learn(samples);
if( distribution instanceof ClosedFormDistribution )
{
Vector parameters = ((ClosedFormDistribution) estimate).convertToVector();
System.out.println( "Estimate: " + distribution.getClass().getCanonicalName() + ", Parameters: " + parameters );
}
else
{
System.out.println( "Estimated distribution:\n" + estimate.toString() );
}
if( estimate instanceof DiscreteDistribution )
{
ProbabilityMassFunction<NumberType> pmf =
((DiscreteDistribution<NumberType>) estimate).getProbabilityFunction();
ChiSquareConfidence.Statistic chisquare =
ChiSquareConfidence.evaluateNullHypothesis( samples, pmf );
System.out.println( "Chi-Square Test Results:\n" + chisquare );
assertEquals( 1.0, chisquare.getNullHypothesisProbability(), CONFIDENCE );
}
else
{
CumulativeDistributionFunction<NumberType> cdf =
(CumulativeDistributionFunction<NumberType>) estimate.getCDF();
KolmogorovSmirnovConfidence.Statistic kstest =
KolmogorovSmirnovConfidence.evaluateNullHypothesis(samples, cdf);
System.out.println( "K-S Test Results:\n" + kstest );
assertEquals( 1.0, kstest.getNullHypothesisProbability(), CONFIDENCE );
}
}
/**
* Test of getCDF method, of class UnivariateDistribution.
*/
public void testDistributionGetCDF()
{
System.out.println("Distribution.getCDF");
UnivariateDistribution<? extends Number> instance = this.createInstance();
CumulativeDistributionFunction<? extends Number> cdf = instance.getCDF();
assertNotNull( cdf );
assertNotSame( instance, cdf );
assertEquals( instance.getMean().doubleValue(), cdf.getMean().doubleValue() );
Random r11 = new Random( 1 );
Random r12 = new Random( 1 );
ArrayList<? extends Number> s1 = instance.sample(r11, NUM_SAMPLES);
ArrayList<? extends Number> s2 = cdf.sample(r12, NUM_SAMPLES);
assertTrue( this.distributionSamplesEqual(s1, s2) );
}
/**
* CDF.getCDF
*/
public void testCDFGetCDF()
{
System.out.println( "CDF.getCDF" );
UnivariateDistribution<? extends Number> instance = this.createInstance();
CumulativeDistributionFunction<? extends Number> cdf = instance.getCDF();
CumulativeDistributionFunction<? extends Number> cdf2 = cdf.getCDF();
assertNotNull( cdf2 );
assertNotSame( instance, cdf );
assertSame( cdf, cdf2 );
}
/**
* Test of getVariance method, of class UnivariateDistribution.
*/
public void testDistributionGetVariance()
{
System.out.println("getVariance");
UnivariateDistribution<? extends Number> instance = this.createInstance();
ArrayList<? extends Number> s1 = instance.sample(RANDOM, NUM_SAMPLES);
double sampleVariance = UnivariateStatisticsUtil.computeVariance(s1);
double estimatedVariance = instance.getVariance();
System.out.println( "Sample Variance: " + sampleVariance );
System.out.println( "Stated Variance: " + estimatedVariance );
double max = Math.max(sampleVariance,estimatedVariance);
assertEquals( sampleVariance, instance.getVariance(),
MONTE_CARLO_FACTOR*max/Math.sqrt(NUM_SAMPLES) );
}
/**
* CDF.getVariance
*/
public void testCDFGetVariance()
{
System.out.println( "CDF.getVariance" );
UnivariateDistribution<? extends Number> instance = this.createInstance();
CumulativeDistributionFunction<? extends Number> cdf = instance.getCDF();
assertEquals( instance.getVariance(), cdf.getVariance(), TOLERANCE );
}
/**
* Test of evaluate method, of class gov.sandia.cognition.learning.util.statistics.CumulativeDistributionFunction.
*/
public void testCDFBounded()
{
System.out.println("CDF.bounded");
UnivariateDistribution<NumberType> instance = this.createInstance();
CumulativeDistributionFunction<NumberType> cdf = instance.getCDF();
ArrayList<? extends NumberType> samples = cdf.sample( RANDOM, NUM_SAMPLES );
for( NumberType sample : samples )
{
double p = cdf.evaluate( sample );
assertTrue( 0.0 <= p );
assertTrue( p <= 1.0 );
}
}
/**
* Test of evaluate method, of class gov.sandia.cognition.learning.util.statistics.CumulativeDistributionFunction.
*/
@SuppressWarnings("unchecked")
public void testCDFBoundaryConditions()
{
System.out.println("testCDFBoundaryConditions");
UnivariateDistribution<NumberType> instance = this.createInstance();
CumulativeDistributionFunction<NumberType> cdf = instance.getCDF();
NumberType min = cdf.getMinSupport();
NumberType max = cdf.getMaxSupport();
// If it's discrete, then the CDF won't be zero at the support bounds,
// but the value of the PMF at the support... ugh.
// So, let's just go to infinity (or max int) and check there.
if( cdf instanceof DiscreteDistribution )
{
if( min instanceof Integer )
{
min = (NumberType) new Integer( Integer.MIN_VALUE+1 );
max = (NumberType) new Integer( Integer.MAX_VALUE-1 );
}
else
{
min = (NumberType) new Double( Double.NEGATIVE_INFINITY );
max = (NumberType) new Double( Double.POSITIVE_INFINITY );
}
}
assertEquals( 0.0, cdf.evaluate( min ), TOLERANCE );
assertEquals( 1.0, cdf.evaluate( max ), TOLERANCE );
}
/**
* Test of evaluate method, of class gov.sandia.cognition.learning.util.statistics.CumulativeDistributionFunction.
*/
public void testCDFNonDecreasing()
{
System.out.println("CDF.nondecreasing");
UnivariateDistribution<NumberType> instance = this.createInstance();
CumulativeDistributionFunction<NumberType> cdf = instance.getCDF();
ArrayList<? extends NumberType> s1 = cdf.sample(RANDOM, NUM_SAMPLES);
ArrayList<? extends NumberType> s2 = cdf.sample(RANDOM, NUM_SAMPLES);
for( int n = 0; n < NUM_SAMPLES; n++ )
{
NumberType x1 = s1.get(n);
NumberType x2 = s2.get(n);
double v1 = cdf.evaluate(x1);
double v2 = cdf.evaluate(x2);
if( x1.doubleValue() < x2.doubleValue() )
{
assertTrue( v1 <= v2 );
}
else
{
assertTrue( v1 >= v2 );
}
}
}
/**
* Tests the support bound
*/
public void testDistributionSupport()
{
System.out.println( "Distribution.Support" );
UnivariateDistribution<NumberType> instance = this.createInstance();
ArrayList<? extends NumberType> samples = instance.sample(RANDOM, NUM_SAMPLES);
for( NumberType sample : samples )
{
assertTrue( instance.getMinSupport().doubleValue() <= sample.doubleValue() );
assertTrue( sample.doubleValue() <= instance.getMaxSupport().doubleValue() );
}
}
/**
* Tests the support bound
*/
public void testCDFSupport()
{
System.out.println( "CDF.Support" );
UnivariateDistribution<NumberType> instance = this.createInstance();
CumulativeDistributionFunction<NumberType> cdf = instance.getCDF();
assertEquals( instance.getMinSupport(), cdf.getMinSupport() );
assertEquals( instance.getMaxSupport(), cdf.getMaxSupport() );
}
}