package tests;
/*
Algorithms implemented from descriptions in Neave and Worthington
*
**/
import java.util.*;
import distributions.*;
public class TwoSampleTests extends Tests{
public String performTests(double[] a, double[] b)
//Performs t-test (unequal var), Mann-Whitney and Robust Rank Order test
//Returns for each test
// TestName, Test Statistic, large sample 1-sided p-value,
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
//T test
TestResults t_test=new TestResults("T_Test");
studentT_Test(t_test,d);
t_test.findPValue();
String str="T_Test,"+t_test.testStat+","+t_test.pValue+"\n";
//Mann-Whitney
TestResults mw=new TestResults("Mann_Whitney");
mannWhitney(mw,d);
mw.findPValue();
str+="Mann_Whitney,"+mw.testStat+","+mw.pValue+"\n";
//Robust Rank Sum
TestResults rrs=new TestResults("robustRankSum");
robustRankSum(rrs,d);
rrs.findPValue();
str+="robustRankSum,"+rrs.testStat+","+rrs.pValue+"\n";
return str;
}
public static TestResults performTwoSampleTest(double[] a, double[] b, int testType)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
//T test
TestResults testR=new TestResults("T_Test");
switch(testType)
{
case 0:
studentT_Test(testR,d);
break;
case 1:
mannWhitney(testR,d);
break;
case 2:
robustRankSum(testR,d);
break;
default:
System.out.println(" Test Not implemented: exit");
System.exit(0);
}
return testR;
}
public static double performTest(double[] a, double[] b, int testType, boolean returnPVal)
{
TestResults r=performTwoSampleTest(a,b,testType);
if(returnPVal)
{
r.findPValue();
return r.pValue;
}
else
return r.testStat;
}
public static double studentT_PValue(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
//T test
TestResults t_test=new TestResults("T_Test");
studentT_Test(t_test,d);
t_test.findPValue();
return t_test.pValue;
}
public static double mw_PValue(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
TestResults mw=new TestResults("Mann_Whitney");
mannWhitney(mw,d);
mw.findPValue();
return mw.pValue;
}
public static double rrs_PValue(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
TestResults rrs=new TestResults("robustRankSum");
robustRankSum(rrs,d);
rrs.findPValue();
return rrs.pValue;
}
public static double studentT_TestStat(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
//T test
TestResults t_test=new TestResults("T_Test");
studentT_Test(t_test,d);
return t_test.testStat;
}
public static double mw_TestStat(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
TestResults mw=new TestResults("Mann_Whitney");
mannWhitney(mw,d);
return mw.testStat;
}
public static double rrs_TestStat(double[] a, double[] b)
{
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[a.length];
d[1]=new DataPoint[b.length];
for(int i=0;i<a.length;i++)
d[0][i]=new DataPoint(a[i],0,i);
for(int i=0;i<b.length;i++)
d[1][i]=new DataPoint(b[i],1,i);
TestResults rrs=new TestResults("robustRankSum");
robustRankSum(rrs,d);
return rrs.testStat;
}
public static void robustRankSum(TestResults t, DataPoint[][] d)
// Find U as with Mann-Whitley, different test stat
{
// 1. Find placement arrays for both ways
// 1.1 Merge two data series into one,
DataPoint[] mergedD=new DataPoint[d[0].length+d[1].length];
for(int i=0;i<d[0].length;i++)
mergedD[i]=d[0][i];
for(int i=0;i<d[1].length;i++)
mergedD[d[0].length+i]=d[1][i];
//1.2. Sort combined data series
Arrays.sort(mergedD);
int[] p_YX=new int[d[0].length];
int[] p_XY=new int[d[1].length];
int m=p_YX.length;
int n=p_XY.length;
int j=0;
int countA=0;
int countB=0;
double u_YX=0,u_XY=0;
for(int i=0;i<mergedD.length; i++)
{
//If d[i] from sample A increment U
if(mergedD[i].sampleNumber()==0)
{
p_YX[countA]=countB;
u_YX+=p_YX[countA];
countA++;
}
else
{
p_XY[countB]=countA;
u_XY+=p_XY[countB];
countB++;
}
}
System.out.println(" Series A positions = ");
for(int i=0;i<p_XY.length;i++)
System.out.print(p_XY[i]+",");
System.out.println(" Series B positions = ");
for(int i=0;i<p_YX.length;i++)
System.out.print(p_YX[i]+",");
System.out.println(" u1 = "+u_XY+" u2 = "+u_YX);
System.out.println(" m*n = "+p_XY.length*p_YX.length+" u1+u2 = "+(u_XY+u_YX));
//
// 2. Calculate test statistic
System.out.println(" U_A statistic = "+u_YX/m);
System.out.println(" U_B statistic = "+u_XY/n);
u_YX/=m;
u_XY/=n;
double Va=0,Vb=0;
for(int i=0;i<m;i++)
Va+=(p_YX[i]-u_YX)*(p_YX[i]-u_YX);
for(int i=0;i<n;i++)
Va+=(p_XY[i]-u_XY)*(p_XY[i]-u_XY);
double U=(m*u_YX-n*u_XY)/(2*Math.sqrt(Va+Vb+u_YX*u_XY));
t.dist = new NormalDistribution(0,1);
t.testStat=U;
System.out.println(" U New = "+t.testStat);
}
public static void wilcoxonRankSum(TestResults t, DataPoint[][] d)
//Find U as with Mann-Whitley, different test stat
{
double U=(double)findU(d);
t.testStat=U+0.5*d[0].length*(d[0].length+1);
System.out.println(" R statistic = "+t.testStat);
}
/** Man Whitney two sample test
* H. B. Mann and D. R. Whitney "On a test of whether one of two random variables
* is stochastically larger than another" Ann. Math. Statist. 18, 50-60 (1947)
*
* The test on two samples sums the number of elements in sample B that are larger
* than each element of sample A. This test statistic, U, has what kind of distribution?
* tables are given in the book.
*
*
* the number of elements
* @param t Test results
* @param d Data set
*
*
* Notes: Works with unequal sample sizes
* Related to Wilcoxon test
*/
public static void mannWhitney(TestResults t, DataPoint[][] d)
{
//1. Find U.
t.testStat=(double)findU(d);
// System.out.println(" U statistic = "+t.testStat);
double nA=(double)d[0].length;
double nB=(double)d[1].length;
double nullMean=0.5*nA*nB;
double nullStDev=Math.sqrt(nA*nB*(nA+nB+1)/12.0);
t.dist=new NormalDistribution(nullMean,nullStDev);
System.out.println(" Null Mean ="+nullMean+" Null SD = "+nullStDev);
// a*NB = "+d[0].length*d[1].length);
}
public static void studentT_Test(TestResults t, DataPoint[][] d)
{
//Find means and var
double m1=0,m2=0;
double s1=0,s2=0;
for(int i=0;i<d[0].length;i++)
m1+=d[0][i].d;
for(int i=0;i<d[1].length;i++)
m2+=d[1][i].d;
m1/=d[0].length;
m2/=d[1].length;
for(int i=0;i<d[0].length;i++)
s1+=(d[0][i].d-m1)*(d[0][i].d-m1);
for(int i=0;i<d[1].length;i++)
s2+=(d[1][i].d-m2)*(d[1][i].d-m2);
s1/=(d[0].length-1);
s2/=(d[1].length-1);
//Find test stat
double tStat=(m1-m2)/Math.sqrt((s1/d[0].length+s2/d[1].length));
t.testStat=tStat;
//Find df
int n1=d[0].length;
int n2=d[1].length;
t.df1=(int)Math.ceil( (s1/n1+s2/n2)*(s1/n1+s2/n2)/( (s1/n1)*(s1/n1)/(n1-1)+ (s2/n2)*(s2/n2)/(n2-1) ) ) ;
t.dist=new StudentDistribution(t.df1);
}
private static DataPoint[] mergeData(DataPoint[][] d)
{
DataPoint[] md=new DataPoint[d[0].length+d[1].length];
for(int i=0;i<d[0].length;i++)
md[i]=d[0][i];
for(int i=0;i<d[1].length;i++)
md[d[0].length+i]=d[1][i];
return md;
}
static private int findU(DataPoint[][] d, int a, int b)
{
// The test on two samples sums the number of elements in sample B that are
// smaller than each element of sample A.
//NUMBER OF B's proceeding each A
// 1. Merge two data series into one,
DataPoint[] mergedD=new DataPoint[d[a].length+d[b].length];
for(int i=0;i<d[a].length;i++)
mergedD[i]=d[a][i];
for(int i=0;i<d[b].length;i++)
mergedD[d[a].length+i]=d[b][i];
//2. Sort combined data series
Arrays.sort(mergedD);
//3. Find U statistic: Does NOT handle equal values
int j=0;
int count=0;
int U=0;
for(int i=0;i<mergedD.length && j<d[a].length; i++)
{
//If d[i] from sample A increment U
if(mergedD[i].sampleNumber()==a)
{
U+=count;
j++;
}
//else from sample B, increment count
else
count++;
}
return U;
}
static private int findU(DataPoint[][] d)
//Implementation 1: Does not deal with ties: Defaults for finding positions
// The test on two samples sums the number of elements in sample B that are larger
// * than each element of sample A.
{
//1. Linear scan to find the number of sample B proceeding sample A
if(d.length!=2)
{
System.out.println("Error, cannot use this for k!=2");
System.exit(0);
}
return findU(d,0,1);
}
public static void testTwoSamples()
/*
* Test Data 1: page 110
* Sample A: 3,7,15,10,4,6,4,7
* Sample B: 19,11,36,8,25,23,38,14,17,41,25,21
n_a = 8, n_b=12
Test Stat: U=4
* Test Data 2: Feltovich paper "Nonparametric tests of differences in medians
* Sample A: 5.025,6.7,6.725,6.75,7.05,7.25,8.375
* Sample B: 4.875,5.125,5.225,5.55,5.75,5.925,6.125
n_a = m=7, n_b=n=8
Test Stat: U=4
*
* */
{
int n_a=8;
int n_b=12;
DataPoint[][] d = new DataPoint[2][];
d[0]=new DataPoint[n_a];
d[1]=new DataPoint[n_b];
double []d1={3,7,15,10,4,6,4,7};
for(int i=0;i<n_a;i++)
d[0][i]=new DataPoint(d1[i],0,i);
double []d2={19,11,36,8,25,23,38,14,17,41,25,21};
TwoSampleTests ts = new TwoSampleTests();
String str = ts.performTests(d1,d2);
System.out.println(str+"\n");
// System.exit(0);
for(int i=0;i<n_b;i++)
d[1][i]=new DataPoint(d2[i],1,i);
TestResults t=new TestResults("Mann Whittley");
studentT_Test(t,d);
mannWhitney(t,d);
robustRankSum(t,d);
int m=7;
int n=8;
double []d3={5.025,6.7,6.725,6.75,7.05,7.25,8.375};
double[]d4={4.875,5.125,5.225,5.425,5.55,5.75,5.925,6.125};
d = new DataPoint[2][];
d[0]=new DataPoint[m];
d[1]=new DataPoint[n];
for(int i=0;i<m;i++)
d[0][i]=new DataPoint(d3[i],0,i);
for(int i=0;i<n;i++)
d[1][i]=new DataPoint(d4[i],1,i);
studentT_Test(t,d);
mannWhitney(t,d);
robustRankSum(t,d);
str = ts.performTests(d3,d4);
System.out.println(str+"\n");
System.exit(0);
}
public static void wilcoxonMatchedPairs(TestResults t, DataPoint[][] d)
{
if(d.length!=2)
{
System.out.println("Error, cannot use this for k!=2");
System.exit(0);
}
if(d[0].length!=d[1].length)
{
System.out.println("Error, cannot use this for unequal samples, they should be matched");
System.exit(0);
}
DataPoint[] ranked= new DataPoint[d[0].length];
for(int i=0;i<ranked.length;i++)
{
System.out.println(" Difference ="+(d[0][i].d-d[1][i].d));
ranked[i]= new DataPoint(d[0][i].d-d[1][i].d,0,d[0][i].position);
}
Tests.rank(ranked);
OneSampleTests.wilcoxonSignRank(t,ranked);
}
public static void main(String args[]) {
testTwoSamples();
System.exit(0);
String fileName = "C:/JavaSource/Clustering/Clustering_Data/Java_Application_Release/Experiment3/Exp3NoRandomShocks.txt";
double[][] d;
TestResults T=new TestResults("SSS");
int testType=0;
T.h0=0;
T.level=0.05;
T.type=0;
loadData(fileName);
wilcoxonMatchedPairs(T,dataByLevel);
System.out.println(T);
/*
d=getData(fileName);
switch(testType)
{
//Location
case 0: //Independent, normal, unknown variance
T=T_Test(d);
break;
//Independent, unknown distribution
case 1:
T=MannWhitney(d);
break;
case 2:
T=Wilcoxon(d);
break;
case 3:
T=Tukey(d);
break;
case 4: //Dependent, Just do single sample test on di
*/
}
}