StudentTConfidence.java example

/*
 * File:                StudentTConfidence.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright August 16, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 *
 */

package gov.sandia.cognition.statistics.method;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.math.ProbabilityUtil;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import gov.sandia.cognition.util.Summarizer;
import gov.sandia.cognition.statistics.distribution.StudentTDistribution;
import gov.sandia.cognition.statistics.distribution.UnivariateGaussian;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import gov.sandia.cognition.util.Pair;
import java.util.Collection;
import java.util.Iterator;

/**
 * This class implements Student's t-tests for different uses.  The confidence
 * test is the Paired Student's t-test to determine if the difference between
 * the pairs have zero mean.  The confidence interval calculation computes
 * the Student-t confidence interval for the mean of the given data.
 * @author Kevin R. Dixon
 * @since  2.0
 */
@ConfidenceTestAssumptions(
    name="Paired Student's t-test",
    description="Computes the value of the null hypothesis that the differences between paired samples have zero mean and that the data are sampled from a Gaussian distributions with equal variances.",
    alsoKnownAs="Dependent t-test for paired samples",
    nullHypothesis="The means of the groups are equal.",
    assumptions={
        "The data for the pairs are iid samples from a Gaussian distribution with equal variances.",
        "The common variances times the degrees of freedom is a chi-square distribution.",
        "The data pairs should be sampled independently from each other."
    },
    distribution=StudentTDistribution.CDF.class,
    dataPaired=true,
    dataSameSize=true,
    reference=@PublicationReference(
        author="Wikipedia",
        title="Student's t-test, Dependent t-test for paired samples",
        type=PublicationType.WebPage,
        year=2009,
        url="http://en.wikipedia.org/wiki/Student_t_test#Dependent_t-test_for_paired_samples"
    )
)
public class StudentTConfidence
    extends AbstractCloneableSerializable
    implements NullHypothesisEvaluator<Collection<? extends Number>>,
    ConfidenceIntervalEvaluator<Collection<? extends Number>>
{

    /**
     * This class has no members, so here's a static instance.
     */
    public static final StudentTConfidence INSTANCE =
        new StudentTConfidence();

    /**
     * Default tolerance for the standard deviation, {@value}.
     */
    public static final double DEFAULT_TOLERANCE = 1e-10;

    /** Creates a new instance of StudentTConfidence */
    public StudentTConfidence()
    {
    }

    /**
     * Computes a paired Student-t test for the given data.  The datasets must
     * be the same size.
     * @param data1 First dataset to consider
     * @param data2 Second dataset to consider
     * @return 
     * ConfidenceStatistic for a Student-t test
     */
    @PublicationReference(
        author={
            "William H. Press",
            "Saul A. Teukolsky",
            "William T. Vetterling",
            "Brian P. Flannery"
        },
        title="Numerical Recipes in C, Second Edition",
        type=PublicationType.Book,
        year=1992,
        pages=618,
        notes="Function tptest()",
        url="http://www.nrbook.com/a/bookcpdf.php"
    )
    @Override
    public StudentTConfidence.Statistic evaluateNullHypothesis(
        Collection<? extends Number> data1,
        Collection<? extends Number> data2 )
    {

        if (data1.size() != data2.size())
        {
            throw new IllegalArgumentException(
                "Data collections must have same number of elements" );
        }

        int N = data1.size();

        Pair<Double,Double> g1 = UnivariateStatisticsUtil.computeMeanAndVariance(data1);
        Pair<Double,Double> g2 = UnivariateStatisticsUtil.computeMeanAndVariance(data2);

        double mean1 = g1.getFirst();
        double var1 = g1.getSecond();

        double mean2 = g2.getFirst();
        double var2 = g2.getSecond();

        double dof = N - 1;
        Iterator<? extends Number> i1 = data1.iterator();
        Iterator<? extends Number> i2 = data2.iterator();
        double cov = 0.0;
        for (int n = 0; n < N; n++)
        {
            double v1 = i1.next().doubleValue();
            double v2 = i2.next().doubleValue();
            cov += (v1 - mean1) * (v2 - mean2);
        }
        cov /= dof;

        double sd = Math.sqrt( (var1 + var2 - 2 * cov) / N );
        if( sd < DEFAULT_TOLERANCE )
        {
            sd = DEFAULT_TOLERANCE;
        }
        double t = Math.abs( (mean1 - mean2) / sd );

        return new StudentTConfidence.Statistic( t, dof );
    }

    @Override
    public ConfidenceInterval computeConfidenceInterval(
        Collection<? extends Number> data,
        double confidence )
    {
        final Pair<Double,Double> meanAndVariance =
            UnivariateStatisticsUtil.computeMeanAndVariance(data);
        final double mean = meanAndVariance.getFirst();
        final double variance = meanAndVariance.getSecond();

        return computeConfidenceInterval(
            mean, variance, data.size(), confidence);
    }

    @Override
    public ConfidenceInterval computeConfidenceInterval(
        double mean,
        double variance,
        int numSamples,
        double confidence)
    {
        
        if ((confidence <= 0.0) ||
            (confidence > 1.0))
        {
            throw new IllegalArgumentException(
                "Confidence must be on the interval (0,1]" );
        }

        double alpha = 1.0 - confidence;
        int dof = numSamples - 1;

        StudentTDistribution.CDF cdf = new StudentTDistribution.CDF( dof );
        double z = -cdf.inverse( 0.5 * alpha );
        double delta = z * Math.sqrt( variance / numSamples );
        if (delta < 0.0)
        {
            delta = 0.0;
        }

        return new ConfidenceInterval(
            mean, mean - delta, mean + delta, confidence, numSamples );

    }

    /**
     * Confidence statistics for a Student-t test
     */
    public static class Statistic
        extends AbstractConfidenceStatistic
    {

        /**
         * Value that is used in the Student-t CDF to compute the probability.
         * Usually just called the "t-statistic"
         */
        private double t;

        /**
         * Number of degrees of freedom in the Student-t distribution, usually
         * the number of data points - 1
         */
        private double degreesOfFreedom;

        /**
         * Creates a new instance of Statistic
         * @param t 
         * Value that is used in the Student-t CDF to compute the probability.
         * Usually just called the "t-statistic"
         * @param degreesOfFreedom 
         * Number of degrees of freedom in the Student-t distribution, usually
         * the number of data points - 1
         */
        public Statistic(
            double t,
            double degreesOfFreedom )
        {
            super( Statistic.twoTailTStatistic( t, degreesOfFreedom ) );
            this.setT( t );
            this.setDegreesOfFreedom( degreesOfFreedom );
        }

        /**
         * Copy Constructor
         * @param other Statistic to copy
         */
        public Statistic(
            Statistic other )
        {
            this( other.getT(), other.getDegreesOfFreedom() );
        }

        @Override
        public Statistic clone()
        {
            return (Statistic) super.clone();
        }

        /**
         * Computes the likelihood that a StudentTDistribution would generate
         * a LESS LIKELY sample as "t", given the degrees of freedom.  This is a
         * two tailed test, thus, we're computing the probability that a Student-t
         * distribution would be as far away as "t" (both tails)
         * @param t 
         * Sample to determine how likely a worse sample is than "t"
         * @param degreesOfFreedom 
         * Number of degrees of freedom in the Student-t distribution
         * @return 
         * Probability that a Student-t distribution would generate as bad of
         * a sample as "t"
         */
        public static double twoTailTStatistic(
            double t,
            double degreesOfFreedom )
        {
            StudentTDistribution.CDF cdf =
                new StudentTDistribution.CDF( degreesOfFreedom );
            return 2.0 * cdf.evaluate( -t );
        }

        /**
         * Getter for t
         * @return 
         * Value that is used in the Student-t CDF to compute the probability.
         * Usually just called the "t-statistic"
         */
        public double getT()
        {
            return this.t;
        }

        /**
         * Setter for t
         * @param t 
         * Value that is used in the Student-t CDF to compute the probability.
         * Usually just called the "t-statistic"
         */
        protected void setT(
            double t )
        {
            this.t = t;
        }

        /**
         * Getter for degreesOfFreedom
         * @return 
         * Number of degrees of freedom in the Student-t distribution, usually
         * the number of data points - 1
         */
        public double getDegreesOfFreedom()
        {
            return this.degreesOfFreedom;
        }

        /**
         * Setter for degreesOfFreedom
         * @param degreesOfFreedom 
         * Number of degrees of freedom in the Student-t distribution, usually
         * the number of data points - 1
         */
        protected void setDegreesOfFreedom(
            double degreesOfFreedom )
        {
            if (degreesOfFreedom <= 0.0)
            {
                throw new IllegalArgumentException( "degreesOfFreedom > 0.0" );
            }
            this.degreesOfFreedom = degreesOfFreedom;
        }

        @Override
        public double getTestStatistic()
        {
            return this.getT();
        }

    }

    /**
     * An implementation of the {@code Summarizer} interface for creating a
     * {@code ConfidenceInterval}
     */
    public static class Summary
        extends AbstractCloneableSerializable
        implements Summarizer<Number, ConfidenceInterval>
    {

        /** The confidence for the created interval. */
        private double confidence;

        /**
         * Creates a new Summarizer.
         *
         * @param  confidence The confidence for the interval.
         */
        public Summary(
            final double confidence )
        {
            super();

            this.setConfidence( confidence );
        }

        @Override
        public ConfidenceInterval summarize(
            final Collection<? extends Number> data )
        {
            return new StudentTConfidence().computeConfidenceInterval(
                data, this.getConfidence() );
        }

        /** 
         * Gets the confidence for created the interval. 
         *
         * @return The confidence for the created interval.
         */
        public double getConfidence()
        {
            return this.confidence;
        }

        /** 
         * Sets the confidence for created the interval. 
         *
         * @param  confidence The confidence for the created interval.
         */
        public void setConfidence(
            final double confidence )
        {
            ProbabilityUtil.assertIsProbability( confidence );
            this.confidence = confidence;
        }

    }

}