/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.core.tests; import rapaio.core.distributions.ChiSquare; import rapaio.core.tools.DTable; import rapaio.core.tools.DVector; import rapaio.data.Numeric; import rapaio.data.Var; import rapaio.printer.Printable; import rapaio.sys.WS; import java.util.Arrays; /** * Offers tools for chi-square based hypothesis testing. * <p> * Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 11/10/15. */ public abstract class ChiSquareTest implements Printable { public static ChiSquareTest goodnessOfFitTest(Var x, double... p) { DVector dv = buildDv(x); Numeric expected = Arrays.stream(p).map(pi -> pi * dv.sum()).boxed().collect(Numeric.collector()); return new GoodnessOfFit(dv, expected); } public static ChiSquareTest goodnessOfFitTest(DVector dv, double... p) { Numeric expected = Arrays.stream(p).map(pi -> pi * dv.sum()).boxed().collect(Numeric.collector()); return new GoodnessOfFit(dv, expected); } /** * Tests the independence of given discrete random variables * * @param x first random variable * @param y second random variable * @return result object */ public static ChiSquareTest independenceTest(Var x, Var y) { return new Independence(DTable.fromCounts(x, y, false)); } public static ChiSquareTest independenceTest(DTable dt) { return new Independence(dt); } public abstract int df(); public abstract double pValue(); public abstract double chiValue(); private static DVector buildDv(Var x) { switch (x.type()) { case BINARY: case NOMINAL: case ORDINAL: return DVector.fromCount(false, x); case NUMERIC: case INDEX: DVector dv = DVector.empty(true, x.rowCount()); for (int i = 0; i < x.rowCount(); i++) { dv.set(i, x.value(i)); } return dv; default: throw new IllegalArgumentException("variable of give type could not be " + "used to build discrete observed counts"); } } } class GoodnessOfFit extends ChiSquareTest { public static GoodnessOfFit fromCountAndExpected(Var o, Numeric e) { // degrees of freedom DVector dv = DVector.fromCount(false, o); if (e.rowCount() != dv.rowCount()) { throw new IllegalArgumentException("number of expected value elements is not the same as number of levels"); } return new GoodnessOfFit(dv, e); } private DVector dv; private final int df; // degrees of freedom private final double chiValue; // chi-square statistic's value private final double pValue; public GoodnessOfFit(DVector dv, Numeric expected) { if (dv.rowCount() - dv.start() != expected.rowCount()) { throw new IllegalArgumentException("Different degrees of freedom!"); } this.dv = dv; this.df = expected.rowCount() - 1; if (df <= 0) { throw new IllegalArgumentException("should be over 0"); } double sum = 0; for (int i = dv.start(); i < dv.rowCount(); i++) { double o = dv.get(i); double e = expected.value(i - dv.start()); if (Math.abs(e) < 1e-50) { sum += Double.POSITIVE_INFINITY; break; } if (Math.abs(e) < 1e-50 && Math.abs(o - e) < 1e50) { continue; } sum += Math.pow(o - e, 2) / expected.value(i - dv.start()); } chiValue = sum; pValue = 1.0 - new ChiSquare(df).cdf(chiValue); } public int df() { return df; } public double chiValue() { return chiValue; } public double pValue() { return pValue; } @Override public String summary() { StringBuilder sb = new StringBuilder(); sb.append("\n> ChiSquareTest.goodness\n"); sb.append("\n"); sb.append("Chi-squared test for given probabilities (goodness of fit)\n"); sb.append("\n"); sb.append("data: \n"); sb.append(dv.summary()); sb.append("TODO\n\n"); // sb.append(dv.summary()).append("\n"); sb.append("X-squared = ").append(WS.formatFlex(chiValue)) .append(", df = ").append(df) .append(", p-value = ").append(pValue) .append("\n"); return sb.toString(); } } /** * Implements goodness of fit test with chi-square distribution. * <p> * Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 11/9/15. */ class Independence extends ChiSquareTest { private final DTable dt; private final int df; // degrees of freedom private final double chiValue; // chi-square statistic's value private final double pValue; public Independence(DTable dt) { this.dt = dt; df = (dt.rowCount() - 1 - dt.start()) * (dt.colCount() - 1 - dt.start()); double[] rowTotals = dt.rowTotals(); double[] colTotals = dt.colTotals(); double total = Arrays.stream(rowTotals).sum(); double sum = 0.0; for (int i = dt.start(); i < dt.rowCount(); i++) { for (int j = dt.start(); j < dt.colCount(); j++) { double expected = rowTotals[i] * colTotals[j] / total; sum += Math.pow(dt.get(i, j) - expected, 2) / expected; } } chiValue = sum; pValue = 1.0 - new ChiSquare(df).cdf(sum); } public int df() { return df; } public double chiValue() { return chiValue; } public double pValue() { return pValue; } @Override public String summary() { StringBuilder sb = new StringBuilder(); sb.append("\n> ChiSquareTest.independence \n"); sb.append("\n"); sb.append(" Pearson’s Chi-squared test \n"); sb.append("\n"); sb.append("data: \n"); sb.append(dt.summary()).append("\n"); sb.append("X-squared = ").append(WS.formatFlex(chiValue)) .append(", df = ").append(df) .append(", p-value = ").append(pValue) .append("\n"); return sb.toString(); } } /* Pearson’s Chi-squared test data: ctbl X-squared = 3.2328, df = 3, p-value = 0.3571 */