/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.core.tests; import rapaio.core.distributions.StudentT; import rapaio.data.Var; import rapaio.printer.Printable; import static rapaio.core.CoreTools.*; import static rapaio.sys.WS.formatFlex; /** * t test for checking if two samples have the same mean * <p> * Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 6/14/16. */ public class TTestTwoSamples implements HTest, Printable { /** * Two unpaired samples with equal variances t test for difference of the means with default values * for significance level (0.05) and alternative (two tails) * * @param x first given sample * @param y second given sample * @param mean null hypothesis mean * @return an object containing hypothesis testing analysis */ public static TTestTwoSamples test(Var x, Var y, double mean) { return new TTestTwoSamples(x, y, mean, true, 0.05, HTest.Alternative.TWO_TAILS); } /** * Two unpaired samples with equal variances t test for difference of the means * * @param x first given sample * @param y second given sample * @param mean null hypothesis mean * @param sl significance level (usual value 0.05) * @param alt alternative hypothesis (usual value two tails) * @return an object containing hypothesis testing analysis */ public static TTestTwoSamples test(Var x, Var y, double mean, double sl, HTest.Alternative alt) { return new TTestTwoSamples(x, y, mean, true, sl, alt); } /** * Two unpaired samples with unequal variances Welch t test for difference of the means with default values * for significance level (0.05) and alternative (two tails) * * @param x first given sample * @param y second given sample * @param mean null hypothesis mean * @return an object containing hypothesis testing analysis */ public static TTestTwoSamples welchTest(Var x, Var y, double mean) { return new TTestTwoSamples(x, y, mean, false, 0.05, HTest.Alternative.TWO_TAILS); } /** * Two unpaired samples with unequal variances Welch t test for difference of the means * * @param x first given sample * @param y second given sample * @param mean null hypothesis mean * @param sl significance level (usual value 0.05) * @param alt alternative hypothesis (usual value two tails) * @return an object containing hypothesis testing analysis */ public static TTestTwoSamples welchTest(Var x, Var y, double mean, double sl, HTest.Alternative alt) { return new TTestTwoSamples(x, y, mean, false, sl, alt); } // parameters private final double sampleMean; private final double xSampleMean; private final int xSampleSize; private final double xSampleSd; private final double ySampleMean; private final int ySampleSize; private final double ySampleSd; private final boolean equalVars; private double df; private final double mu; private final double sl; private final HTest.Alternative alt; // computed private double t; private double pValue; private double ciLow; private double ciHigh; private TTestTwoSamples(Var x, Var y, double mu, boolean equalVars, double sl, HTest.Alternative alt) { this.mu = mu; this.sl = sl; this.alt = alt; this.equalVars = equalVars; Var xComplete = x.stream().complete().toMappedVar(); Var yComplete = y.stream().complete().toMappedVar(); if (xComplete.rowCount() < 1 || yComplete.rowCount() < 1) { // nothing to do sampleMean = Double.NaN; xSampleMean = Double.NaN; ySampleMean = Double.NaN; xSampleSize = -1; ySampleSize = -1; xSampleSd = Double.NaN; ySampleSd = Double.NaN; df = -1; t = Double.NaN; pValue = Double.NaN; ciLow = Double.NaN; ciHigh = Double.NaN; return; } xSampleMean = mean(xComplete).value(); xSampleSize = xComplete.rowCount(); xSampleSd = var(xComplete).sdValue(); ySampleMean = mean(yComplete).value(); ySampleSize = yComplete.rowCount(); ySampleSd = var(yComplete).sdValue(); sampleMean = xSampleMean - ySampleMean; compute(); } public boolean equalVars() { return equalVars; } public double sampleMean() { return sampleMean; } public double xSampleMean() { return xSampleMean; } public int xSampleSize() { return xSampleSize; } public double ySampleMean() { return ySampleMean; } public int ySampleSize() { return ySampleSize; } public double xSampleSd() { return xSampleSd; } public double ySampleSd() { return ySampleSd; } public double mu() { return mu; } public double sl() { return sl; } public HTest.Alternative alt() { return alt; } public double t() { return t; } public double df() { return df; } public double pValue() { return pValue; } public double ciLow() { return ciLow; } public double ciHigh() { return ciHigh; } private void compute() { double pv; if (equalVars) { df = xSampleSize + ySampleSize - 2; double xv = xSampleSd * xSampleSd * (xSampleSize - 1); double yv = ySampleSd * ySampleSd * (ySampleSize - 1); pv = Math.sqrt((xv + yv) / df) * Math.sqrt(1.0 / xSampleSize + 1.0 / ySampleSize); t = (xSampleMean - ySampleMean - mu) / pv; } else { double xv = xSampleSd * xSampleSd / xSampleSize; double yv = ySampleSd * ySampleSd / ySampleSize; t = (xSampleMean - ySampleMean - mu) / Math.sqrt(xv + yv); df = Math.pow(xv + yv, 2) / (xv * xv / (xSampleSize - 1) + yv * yv / (ySampleSize - 1)); pv = Math.sqrt(xv + yv); } StudentT st = new StudentT(df); switch (alt) { case GREATER_THAN: pValue = 1 - st.cdf(t); break; case LESS_THAN: pValue = st.cdf(t); break; default: pValue = st.cdf(-Math.abs(t)) * 2; } ciLow = new StudentT(df, xSampleMean - ySampleMean, pv).quantile(sl / 2); ciHigh = new StudentT(df, xSampleMean - ySampleMean, pv).quantile(1 - sl / 2); } @Override public String summary() { StringBuilder sb = new StringBuilder(); sb.append("\n"); sb.append("> HTTools.tTestTwoSamples\n"); sb.append("\n"); if (equalVars) { sb.append(" Two Samples t-test\n"); sb.append(" (equal variances)\n"); sb.append("\n"); } else { sb.append(" Welch's Two Samples t-test\n"); sb.append(" (unequal variances)\n"); sb.append("\n"); } sb.append("mean: ").append(formatFlex(mu)).append("\n"); sb.append("\nsample estimates:\n"); sb.append("x mean: ").append(formatFlex(xSampleMean)).append("\n"); sb.append("x size: ").append(xSampleSize).append("\n"); sb.append("x sd: ").append(formatFlex(xSampleSd)).append("\n"); sb.append("y mean: ").append(formatFlex(ySampleMean)).append("\n"); sb.append("y size: ").append(ySampleSize).append("\n"); sb.append("y sd: ").append(formatFlex(ySampleSd)).append("\n"); sb.append("\ntest results:\n"); sb.append("df: ").append(df).append("\n"); sb.append("significance level: ").append(formatFlex(sl)).append("\n"); sb.append("alternative hypothesis: ").append(alt == HTest.Alternative.TWO_TAILS ? "two tails " : "one tail ").append(alt.pCondition()).append("\n"); sb.append("t: ").append(formatFlex(t)).append("\n"); sb.append("p-value: ").append(pValue).append("\n"); sb.append("conf int: [").append(formatFlex(ciLow)).append(",").append(formatFlex(ciHigh)).append("]\n"); return sb.toString(); } }