package ca.pfv.spmf.algorithms.sequentialpatterns.goKrimp;
/**
* SignTest class implements the standard Sign Test to compare two populations and test if
* they are sampled from the distributions with the same mean value
* <br/><br/>
*
* For more information please refer to the paper Mining Compressing Sequential Patterns in the Journal Statistical Analysis and Data Mining
* * <br/><br/>
*
* Copyright (c) 2014 Hoang Thanh Lam (TU Eindhoven and IBM Research)
* Toon Calders (Université Libre de Bruxelles), Fabian Moerchen (Amazon.com inc)
* and Dmitriy Fradkin (Siemens Corporate Research)
* <br/><br/>
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
* <br/><br/>
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
* <br/><br/>
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* <br/><br/>
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @see DataReader
* @see Event
* @see MyPattern
* @see AlgoGoKrimp
* @author Hoang Thanh Lam (TU Eindhoven and IBM Research)
*/
public class SignTest {
static final double alpha=0.01; //the significant level
static final int N=25; // the minimum number of pairs must be at least 25 to ensure taht the sign test is correct
int Npairs; // the number of pairs (X,Y)
double Nplus; // the number of pairs (X,Y) such that X>Y
/**
* the cdf function of the Standard Normal distribution
* @param xx input value
* @return cdf of standard normal distribution
*/
double standard_normal_cdf(double xx){
double x=xx;
if (xx<0)
x=-x;
double b0=0.2316419, b1=0.319381530, b2=-0.356563782, b3=1.781477937, b4=-1.821255978, b5=1.330274429;
double t=1/(1+b0*x);
double pi=4.0*Math.atan(1.0);
double pdf= 1/Math.sqrt(2*pi)*Math.exp(-0.5*x*x); //standard normal distribution's pdf
if (xx>0)
return 1-pdf*(b1*t+b2*t*t+b3*t*t*t+b4*t*t*t*t+b5*t*t*t*t*t);
else
return pdf*(b1*t+b2*t*t+b3*t*t*t+b4*t*t*t*t+b5*t*t*t*t*t);
}
/**
* return true if it passes the test
*/
boolean sign_test(){
if(Npairs<N){ // the number of pairs must be at least N=25 to perform the test
return false;
}else {
double x=Math.abs(Nplus -0.5*Npairs)/Math.sqrt(Npairs+0.0);
if(1-standard_normal_cdf(x)<alpha){
return true;
}else
return false;
}
}
SignTest(int np, double npp){
Npairs=np;
Nplus=npp;
}
}