/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math.stat.inference; import java.util.Collection; import org.apache.commons.math.MathException; import org.apache.commons.math.MathRuntimeException; import org.apache.commons.math.distribution.FDistribution; import org.apache.commons.math.distribution.FDistributionImpl; import org.apache.commons.math.exception.util.LocalizedFormats; import org.apache.commons.math.stat.descriptive.summary.Sum; import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; /** * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} * interface. * * <p>Uses the * {@link org.apache.commons.math.distribution.FDistribution * commons-math F Distribution implementation} to estimate exact p-values.</p> * * <p>This implementation is based on a description at * http://faculty.vassar.edu/lowry/ch13pt1.html</p> * <pre> * Abbreviations: bg = between groups, * wg = within groups, * ss = sum squared deviations * </pre> * * @since 1.2 * @version $Id: OneWayAnovaImpl.java 1131229 2011-06-03 20:49:25Z luc $ */ public class OneWayAnovaImpl implements OneWayAnova { /** * Default constructor. */ public OneWayAnovaImpl() { } /** * {@inheritDoc}<p> * This implementation computes the F statistic using the definitional * formula<pre> * F = msbg/mswg</pre> * where<pre> * msbg = between group mean square * mswg = within group mean square</pre> * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> * here</a></p> */ public double anovaFValue(Collection<double[]> categoryData) throws IllegalArgumentException, MathException { AnovaStats a = anovaStats(categoryData); return a.F; } /** * {@inheritDoc}<p> * This implementation uses the * {@link org.apache.commons.math.distribution.FDistribution * commons-math F Distribution implementation} to estimate the exact * p-value, using the formula<pre> * p = 1 - cumulativeProbability(F)</pre> * where <code>F</code> is the F value and <code>cumulativeProbability</code> * is the commons-math implementation of the F distribution.</p> */ public double anovaPValue(Collection<double[]> categoryData) throws IllegalArgumentException, MathException { AnovaStats a = anovaStats(categoryData); FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); return 1.0 - fdist.cumulativeProbability(a.F); } /** * {@inheritDoc}<p> * This implementation uses the * {@link org.apache.commons.math.distribution.FDistribution * commons-math F Distribution implementation} to estimate the exact * p-value, using the formula<pre> * p = 1 - cumulativeProbability(F)</pre> * where <code>F</code> is the F value and <code>cumulativeProbability</code> * is the commons-math implementation of the F distribution.</p> * <p>True is returned iff the estimated p-value is less than alpha.</p> */ public boolean anovaTest(Collection<double[]> categoryData, double alpha) throws IllegalArgumentException, MathException { if ((alpha <= 0) || (alpha > 0.5)) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, alpha, 0, 0.5); } return anovaPValue(categoryData) < alpha; } /** * This method actually does the calculations (except P-value). * * @param categoryData <code>Collection</code> of <code>double[]</code> * arrays each containing data for one category * @return computed AnovaStats * @throws IllegalArgumentException if categoryData does not meet * preconditions specified in the interface definition * @throws MathException if an error occurs computing the Anova stats */ private AnovaStats anovaStats(Collection<double[]> categoryData) throws IllegalArgumentException, MathException { // check if we have enough categories if (categoryData.size() < 2) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, categoryData.size()); } // check if each category has enough data and all is double[] for (double[] array : categoryData) { if (array.length <= 1) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, array.length); } } int dfwg = 0; double sswg = 0; Sum totsum = new Sum(); SumOfSquares totsumsq = new SumOfSquares(); int totnum = 0; for (double[] data : categoryData) { Sum sum = new Sum(); SumOfSquares sumsq = new SumOfSquares(); int num = 0; for (int i = 0; i < data.length; i++) { double val = data[i]; // within category num++; sum.increment(val); sumsq.increment(val); // for all categories totnum++; totsum.increment(val); totsumsq.increment(val); } dfwg += num - 1; double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; sswg += ss; } double sst = totsumsq.getResult() - totsum.getResult() * totsum.getResult()/totnum; double ssbg = sst - sswg; int dfbg = categoryData.size() - 1; double msbg = ssbg/dfbg; double mswg = sswg/dfwg; double F = msbg/mswg; return new AnovaStats(dfbg, dfwg, F); } /** Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. No get/set methods provided. */ private static class AnovaStats { /** Degrees of freedom in numerator (between groups). */ private final int dfbg; /** Degrees of freedom in denominator (within groups). */ private final int dfwg; /** Statistic. */ private final double F; /** * Constructor * @param dfbg degrees of freedom in numerator (between groups) * @param dfwg degrees of freedom in denominator (within groups) * @param F statistic */ private AnovaStats(int dfbg, int dfwg, double F) { this.dfbg = dfbg; this.dfwg = dfwg; this.F = F; } } }