/* * Copyright (C) 2011-2013 Dr. John Lindsay <jlindsay@uoguelph.ca> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package whitebox.stats; import java.util.Arrays; /** * The following code is based on Press et al. Numerical Recipes in C. * * @author johnlindsay */ public class TwoSampleKSTest { double dmax = -1.0; double pvalue = -1.0; double[] data1; double[] data2; long n1 = 0; long n2 = 0; final double EPS1 = 0.001; final double EPS2 = 1.0e-8; public TwoSampleKSTest() { } public TwoSampleKSTest(double[] data1, double[] data2) { this.data1 = data1.clone(); this.data2 = data2.clone(); this.n1 = this.data1.length; this.n2 = this.data2.length; } // properties' getters and setters public double[] getData1() { return data1.clone(); } public void setData1(double[] data1) { dmax = -1.0; pvalue = -1.0; this.data1 = data1.clone(); } public double[] getData2() { return data2.clone(); } public void setData2(double[] data2) { dmax = -1.0; pvalue = -1.0; this.data2 = data2.clone(); } public double getDmax() { if (dmax < 0) { calculateDMax(); } return dmax; } public double getPvalue() { if (dmax < 0) { calculateDMax(); } return pvalue; } public long getN1() { return n1; } public long getN2() { return n2; } // private methods private void calculateDMax() { try { int j1 = 0; int j2 = 0; double d1, d2, dt, en1, en2, en, fn1 = 0.0, fn2 = 0.0; // sort data1 and data2 Arrays.sort(data1); Arrays.sort(data2); en1 = n1; en2 = n2; while (j1 < n1 && j2 < n2) { d1 = data1[j1]; d2 = data2[j2]; if (d1 <= d2) { j1++; fn1 = j1 / en1; } if (d2 <= d1) { j2++; fn2 = j2 / en2; } dt = Math.abs(fn2 - fn1); if (dt > dmax) { dmax = dt; } } en = Math.sqrt(en1 * en2 / (en1 + en2)); calculatePValue((en + 0.12 + 0.11 / en) * dmax); } catch (Exception e) { System.out.println(e.getMessage()); } } private void calculatePValue(double alam) { int j; double a2, fac = 2.0, sum = 0.0, term, termbf = 0.0; a2 = -2.0 * alam * alam; for (j = 1; j <= 100; j++) { term = fac * Math.exp(a2 * j * j); sum += term; if (Math.abs(term) <= EPS1 * termbf || Math.abs(term) <= EPS2 * sum) { pvalue = sum; return; } fac = -fac; termbf = Math.abs(term); } pvalue = 1.0; } public static void main(String[] args) { /* This is used for testing purposes. * */ try { double[] x = {-0.87399622, -0.06073305, -0.82809841, 0.36246144, 0.61187679, -0.36278161, 2.65692271, -0.04878119, -0.29685874, 0.09778020, -0.79740043, 0.86220642, -0.08187849, -0.49417868, -0.68428830, 0.50215073, -0.02778265, -1.13114516, -0.30488283, -0.47912706, 1.10121522, 0.72200371, -0.12419619, 0.88308067, 1.24170482}; double[] y = {2.64614212, 2.40133975, -0.24951630, -1.05281579, 0.60464690, 0.42801624, 0.06603241, 1.82728020, 2.05485682, 1.71798776, 1.34008775, 1.52282631, 1.11934889, 0.34031629, 0.76826312, -0.20036927, 0.87902700, 0.77086493, 1.29494406, 0.07522084, -1.10084977, -0.12663182, 0.66229069, 0.44319635, 0.62638824}; // D = 0.4, p-value = 0.026 (notice that R calculates a p-value of 0.03561 // but other online k-s test functions calculate a p-value of 0.026) TwoSampleKSTest ks = new TwoSampleKSTest(x, y); double Dmax = ks.getDmax(); double pValue = ks.getPvalue(); System.out.println("Dmax = " + Dmax); System.out.println("p-value = " + pValue); assert ((int)Math.round(Dmax * 100) == 40) : "Test failed based on Dmax"; assert ((int)Math.round(pValue * 1000) == 26) : "Test failed based on p-value"; System.out.println("Tests passed"); } catch (AssertionError ae) { System.out.println(ae.getMessage()); } } }