/* * File: ShafferStaticCorrection.java * Authors: Kevin R. Dixon * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright May 18, 2011, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. * Export of this program may require a license from the United States * Government. See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.statistics.method; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationReferences; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.collection.CollectionUtil; import gov.sandia.cognition.math.MathUtil; import gov.sandia.cognition.math.matrix.Matrix; import gov.sandia.cognition.math.matrix.MatrixFactory; import gov.sandia.cognition.util.ArrayIndexSorter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; /** * The Shaffer Static Correction uses logical relationships to tighten up the * Bonferroni/Sidak corrections when performing pairwise multiple hypothesis * comparisons. This is uniformly tighter bound than the Bonferroni/Sidak * values and also uniformly tighter than the Holm correction. The original * algorithm proposed by Shaffer appears to grow super exponentially as * O(2^(N^2)), where N is the number of treatments. We have pre-computed * various quantities and used caching to minimize the amount of repeated * recursion and it appears that the runtime grows as O(N^4), where N is the * number of treatments. Since there are N(N-1)/2 comparisons, this quantity * is quadratic in the number of comparisons. This means the computation is * reasonable for N=90 (4005 comparisons). However, the algorithm seems * to slow down significantly above N=100 (4950 comparisons) or so. * This implementation uses the slightly tighter Sidak correction, * as opposed to the standard Bonferroni correction. * <BR><BR> * For example, if you have three hypothesis you are testing, * then there is no way that there can be 2 true hypotheses. (Because u1=u2 and * u2=u3 and therefore u1=u3.) With this logic, Shaffer Static correction * can greatly reduce the false-negative rate while not impacting the * false-discovery rate. * @author Kevin R. Dixon * @since 3.3.0 */ @PublicationReferences( references={ @PublicationReference( author="Juliet Popper Shaffer", title="Modified Sequentially Rejective Multiple Test Procedures", type=PublicationType.Journal, publication="Journal of the American Statistical Association", year=1986, url="http://www.jstor.org/stable/2289016" ) , @PublicationReference( author="Juliet Popper Shaffer", title="Multiple Hypothesis Testing", type=PublicationType.Journal, publication="Annual Review of Psychology", year=1995, url="http://www.dm.uba.ar/materias/optativas/aspectos_estadisticos_de_microarreglos/2010/1/teoricas/Shaffer%201995%20Multiple%20hypothesis%20testing.pdf" ) , @PublicationReference( author={ "Salvador Garcia", "Francisco Herrera" }, title="An Extension on \"Statistical Comparisons of Classifiers over Multiple Data Sets\" for all Pairwise Comparisons", type=PublicationType.Journal, publication="Journal of Machine Learning Research", year=2008, url="http://sci2s.ugr.es/publications/ficheros/2008-Garcia-JMLR.pdf" ) } ) public class ShafferStaticCorrection extends AbstractPairwiseMultipleHypothesisComparison<ShafferStaticCorrection.Statistic> { /** * Default constructor */ public ShafferStaticCorrection() { this( DEFAULT_PAIRWISE_TEST ); } /** * Creates a new instance of BonferroniCorrection * @param pairwiseTest * Confidence test used for pair-wise null-hypothesis tests. */ public ShafferStaticCorrection( final NullHypothesisEvaluator<Collection<? extends Number>> pairwiseTest) { super( pairwiseTest ); } @Override public ShafferStaticCorrection clone() { return (ShafferStaticCorrection) super.clone(); } @Override public ShafferStaticCorrection.Statistic evaluateNullHypotheses( final Collection<? extends Collection<? extends Number>> data, final double uncompensatedAlpha) { return new ShafferStaticCorrection.Statistic( data, uncompensatedAlpha, this.getPairwiseTest() ); } /** * Test statistic from the Shaffer static multiple-comparison test */ public static class Statistic extends AbstractPairwiseMultipleHypothesisComparison.Statistic { /** * Matrix of adjusted alphas (p-value thresholds) for each comparison */ protected Matrix adjustedAlphas; /** * Creates a new instance of Statistic * @param data * Data from each treatment to consider * @param uncompensatedAlpha * Uncompensated alpha (p-value threshold) for the multiple comparison * test * @param pairwiseTest * Confidence test used for pair-wise null-hypothesis tests. */ public Statistic( final Collection<? extends Collection<? extends Number>> data, final double uncompensatedAlpha, final NullHypothesisEvaluator<Collection<? extends Number>> pairwiseTest ) { super( data, uncompensatedAlpha, pairwiseTest ); final int numComparisons = this.treatmentCount * (this.treatmentCount-1) / 2; this.computePairwiseTestResults(data, pairwiseTest ); ArrayList<Integer> possibleTruths = possibleTruthsSorted( this.treatmentCount ); double[] pvalues = new double[ numComparisons ]; int index = 0; for( int i = 0; i < this.treatmentCount; i++ ) { for( int j = i+1; j < this.treatmentCount; j++ ) { pvalues[index] = this.nullHypothesisProbabilities.getElement(i, j); index++; } } // Sort the p-values from smallest to largest int[] sortedIndices = ArrayIndexSorter.sortArrayAscending(pvalues); double[] adjustedAlpha = new double[ numComparisons ]; int maxPossibleTruthIndex = possibleTruths.size()-1; int hypothesesRemaining = numComparisons; for( int i = 0; i < numComparisons; i++ ) { int maxPossibleTrueHypotheses = possibleTruths.get(maxPossibleTruthIndex); // adjustedAlpha[sortedIndices[i]] = BonferroniCorrection.adjust( // this.uncompensatedAlpha, maxPossibleTrueHypotheses ); adjustedAlpha[sortedIndices[i]] = SidakCorrection.adjust( this.uncompensatedAlpha, maxPossibleTrueHypotheses ); if( pvalues[sortedIndices[i]] >= adjustedAlpha[sortedIndices[i]] ) { // This is the critical value, there are no more remaining // false hypotheses, so the remaining ones must be true break; } else { hypothesesRemaining--; if( hypothesesRemaining < maxPossibleTrueHypotheses ) { maxPossibleTruthIndex--; } } } // Stuff the adjusted alphas back into the Matrix alphaMatrix = MatrixFactory.getDefault().createMatrix( this.treatmentCount,this.treatmentCount); index = 0; for( int i = 0; i < this.treatmentCount; i++ ) { for( int j = i+1; j < this.treatmentCount; j++ ) { alphaMatrix.setElement(i, j, adjustedAlpha[index]); alphaMatrix.setElement(j, i, adjustedAlpha[index]); index++; } } this.adjustedAlphas = alphaMatrix; } /** * Returns the sorted-ascending set of the possible set of true * hypothesis given the number of treatments being considered. * @param treatmentCount * Number of treatments to consider * @return * Set of possible true hypotheses given the number of treatments. */ public static ArrayList<Integer> possibleTruthsSorted( int treatmentCount ) { int[] jchoose2 = new int[ treatmentCount+1 ]; for( int j = 2; j <= treatmentCount; j++ ) { jchoose2[j] = (int) (Math.round(Math.exp(MathUtil.logBinomialCoefficient(j, 2)))); } ArrayList<LinkedHashSet<Integer>> recusionCaches = new ArrayList<LinkedHashSet<Integer>>( treatmentCount ); for( int j = 0; j < treatmentCount; j++ ) { recusionCaches.add( null ); } LinkedHashSet<Integer> set = possibleTruthsSet( treatmentCount, jchoose2, recusionCaches ); ArrayList<Integer> sortedSet = CollectionUtil.asArrayList(set); Collections.sort(sortedSet); return sortedSet; } /** * Recursive algorithm to compute the possible set of true hypotheses * @param treatmentCount * Number of treatments to consider * @param jchoose2 * Pre-computed cache of all "j choose 2" values to the max treatment * @param recursionCaches * Dynamic cache of recursion results for various treatmentCounts. * If entry j is null, then we have not recursed to a * treatmentCount of j yet. * @return * Unsorted set of all possible true hypothesis counts. */ private static LinkedHashSet<Integer> possibleTruthsSet( final int treatmentCount, final int[] jchoose2, ArrayList<LinkedHashSet<Integer>> recursionCaches ) { LinkedHashSet<Integer> set = new LinkedHashSet<Integer>( treatmentCount ); if( treatmentCount <= 1 ) { set.add( 0 ); } else if( treatmentCount == 2 ) { set.add( 1 ); } else { for( int j = 1; j <= treatmentCount; j++ ) { // If we've already recursed using the give index (tcmj), // then don't do it again... It leads to a super-polynomial // explosion in the recursion. (Shaffer's original This caching keeps it // polynomial. final int tcmj = treatmentCount-j; LinkedHashSet<Integer> recursion = recursionCaches.get( tcmj ); if( recursion == null ) { recursion = possibleTruthsSet( tcmj, jchoose2, recursionCaches ); recursionCaches.set( tcmj, recursion ); } // Uncomment the line below to run Shaffer's original // algorithm without caching... but it's superpolynomial, // so I would suggest only doing it if you're insane. //LinkedHashSet<Integer> recursion = possibleTruthsSet( treatmentCount - j, jchoose2, recursionCaches ); for( Integer value : recursion ) { int choose; if( j < 2 ) { choose = 0; } else { choose = jchoose2[j]; } set.add( value + choose ); } } } return set; } @Override public double getAdjustedAlpha( int i, int j) { return this.adjustedAlphas.getElement(i, j); } } }