package uk.ac.rhul.cs.stats.independentsamples; import java.util.Arrays; import uk.ac.rhul.cs.stats.StatsUtils; import uk.ac.rhul.cs.stats.tests.H1; import uk.ac.rhul.cs.stats.tests.SignificanceTest; import uk.ac.rhul.cs.utils.ArrayUtils; /** * Implementation of the Mann-Whitney U test. * * Given two samples A and B, the test tests the null hypothesis that A and B * have the same distribution against one of the following alternative hypotheses: * * <ul> * <li>A and B are different ({@link H1}.NOT_EQUAL)</li> * <li>A is stochastically less than B ({@link H1}.LESS_THAN)</li> * <li>A is stochastically greater than B {@link H1}.GREATER_THAN)</li> * </ul> * * This class is similar to the eponymous class in the Java Statistical Classes * library, but it is reimplemented from scratch to avoid license restrictions. * (JSC is not licensed under the GNU GPL). * * @author tamas */ public class MannWhitneyTest implements SignificanceTest { /** * Size of sample A */ private int nA; /** * Size of sample B */ private int nB; /** * The value of the test statistic */ private double U; /** * The tie correction that was applied */ private double tieCorrection; /** * The alternative hypothesis */ private H1 alternative; /** * Constructs a two-sample Mann-Whitney test with the given samples. * * The tolerance level is 1e-7. * * @param xA the first sample * @param xB the second sample */ public MannWhitneyTest(double[] xA, double[] xB) { this(xA, xB, H1.NOT_EQUAL, 1e-7); } /** * Constructs a two-sample Mann-Whitney test with the given samples and * the given alternative hypothesis. * * The tolerance level is 1e-7. * * @param xA the first sample * @param xB the second sample * @param alternative the alternative hypothesis */ public MannWhitneyTest(double[] xA, double[] xB, H1 alternative) { this(xA, xB, alternative, 1e-7); } /** * Constructs a two-sample Mann-Whitney test with the given samples, * the given alternative hypothesis and the given tolerance. * * @param xA the first sample * @param xB the second sample * @param alternative the alternative hypothesis * @param tolerance tolerance value within which two values are considered equal */ public MannWhitneyTest(double[] xA, double[] xB, H1 alternative, double tolerance) { nA = xA.length; nB = xB.length; int i, n = nA+nB; double[] joined = new double[n]; double uA, uB; /* Join the two arrays */ System.arraycopy(xA, 0, joined, 0, nA); System.arraycopy(xB, 0, joined, nA, nB); /* Get the rank vector of the array */ double[] ranks = ArrayUtils.getRanks(joined, tolerance); /* Calculate uA and uB */ uA = uB = nA * nB; for (i = 0; i < nA; i++) { uA -= ranks[i]; } uA += (nA * (nA+1)) / 2; if (alternative == H1.NOT_EQUAL) { uB = uB - uA; U = Math.min(uA, uB); } else U = uA; /* Calculate tie correction value */ Arrays.sort(ranks); if (n < 2) { tieCorrection = 1.0; } else { tieCorrection = 0; for (i = 0; i < n-1; i++) { if (ranks[i] == ranks[i+1]) { int nties = 1; while (i < n-1 && ranks[i] == ranks[i+1]) { nties++; i++; } tieCorrection += nties * (nties * nties - 1); } } tieCorrection = 1.0 - tieCorrection / n / (n * n - 1); } this.alternative = alternative; } /** * Returns the tie correction that was applied to the p-value */ public double getCorrectionFactor() { return tieCorrection; } public double getSP() { int n = nA + nB; double z = U - nA*nB/2.0; double sd = Math.sqrt((nA * nB / 12.0) * (n + 1) * tieCorrection); double continuityCorrection = 0.0; switch (alternative) { case NOT_EQUAL: continuityCorrection = Math.signum(z) * 0.5; break; case LESS_THAN: continuityCorrection = 0.5; break; case GREATER_THAN: continuityCorrection = -0.5; break; } z = (z - continuityCorrection) / sd; switch (alternative) { case NOT_EQUAL: z = Math.abs(z); return 2 * StatsUtils.getZProbability(-z); case LESS_THAN: return 1.0 - StatsUtils.getZProbability(z); case GREATER_THAN: return StatsUtils.getZProbability(z); } return Double.NaN; } public double getTestStatistic() { return U; } /** * Returns the size of the first sample */ public int sizeA() { return nA; } /** * Returns the size of the second sample */ public int sizeB() { return nB; } }