/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.M5;
/**
* Class implementing some distributions, tests, etc.
*/
public class Distributions {
/** Some constants */
private static double logSqrtPi = Math.log(Math.sqrt(Math.PI));
private static double rezSqrtPi = 1 / Math.sqrt(Math.PI);
private static double bigx = 20.0;
/**
* Computes standard error for observed values of a binomial
* random variable.
*
* @param p the probability of success
* @param n the size of the sample
* @return the standard error
*/
public static double binomialStandardError(double p, int n) {
if (n == 0) {
return 0;
}
return Math.sqrt((p * (1 - p)) / (double) n);
}
/**
* Returns chi-squared probability for given value and degrees
* of freedom. (The probability that the chi-squared variate
* will be greater than x for the given degrees of freedom.)
*
* @param x the value
* @param df the number of degrees of freedom
*/
public static double chiSquaredProbability(double x, int df) {
double a, y = 0, s, e, c, z, val;
boolean even;
if (x <= 0 || df < 1) {
return (1);
}
a = 0.5 * x;
even = (((int) (2 * (df / 2))) == df);
if (df > 1) {
y = Math.exp( -a); //((-a < -bigx) ? 0.0 : Math.exp (-a));
}
s = (even ? y : (2.0 * normalProbability( -Math.sqrt(x))));
if (df > 2) {
x = 0.5 * (df - 1.0);
z = (even ? 1.0 : 0.5);
if (a > bigx) {
e = (even ? 0.0 : logSqrtPi);
c = Math.log(a);
while (z <= x) {
e = Math.log(z) + e;
val = c * z - a - e;
s += Math.exp(val); //((val < -bigx) ? 0.0 : Math.exp (val));
z += 1.0;
}
return (s);
} else {
e = (even ? 1.0 : (rezSqrtPi / Math.sqrt(a)));
c = 0.0;
while (z <= x) {
e = e * (a / z);
c = c + e;
z += 1.0;
}
return (c * y + s);
}
} else {
return (s);
}
}
/**
* Critical value for given probability of F-distribution.
* @param p the probability
* @param df1 the first number of degrees of freedom
* @param df2 the second number of degrees of freedom
* @return the critical value for the given probability
*/
public static double FCriticalValue(double p, int df1, int df2) {
double fval;
double maxf = 99999.0; /* maximum possible F ratio */
double minf = .000001; /* minimum possible F ratio */
if (p <= 0.0 || p >= 1.0) {
return (0.0);
}
fval = 1.0 / p; /* the smaller the p, the larger the F */
while (Math.abs(maxf - minf) > .000001) {
if (FProbability(fval, df1, df2) < p) /* F too large */
{
maxf = fval;
} else
/* F too small */
{
minf = fval;
}
fval = (maxf + minf) * 0.5;
}
return (fval);
}
/**
* Computes probability of F-ratio.
*
* @param F the F-ratio
* @param df1 the first number of degrees of freedom
* @param df2 the second number of degrees of freedom
* @return the probability of the F-ratio.
*/
public static double FProbability(double F, int df1, int df2) {
int i, j;
int a, b;
double w, y, z, d, p;
if ((Math.abs(F) < 10e-10) || df1 <= 0 || df2 <= 0) {
return (1.0);
}
a = (df1 % 2 == 1) ? 1 : 2;
b = (df2 % 2 == 1) ? 1 : 2;
w = (F * df1) / df2;
z = 1.0 / (1.0 + w);
if (a == 1) {
if (b == 1) {
p = Math.sqrt(w);
y = 1 / Math.PI; /* 1 / 3.14159 */
d = y * z / p;
p = 2.0 * y * Math.atan(p);
} else {
p = Math.sqrt(w * z);
d = 0.5 * p * z / w;
}
} else if (b == 1) {
p = Math.sqrt(z);
d = 0.5 * z * p;
p = 1.0 - p;
} else {
d = z * z;
p = w * z;
}
y = 2.0 * w / z;
for (j = b + 2; j <= df2; j += 2) {
d *= (1.0 + a / (j - 2.0)) * z;
p = (a == 1 ? p + d * y / (j - 1.0) : (p + w) * z);
}
y = w * z;
z = 2.0 / z;
b = df2 - 2;
for (i = a + 2; i <= df1; i += 2) {
j = i + b;
d *= y * j / (i - 2.0);
p -= z * d / j;
}
// correction for approximation errors suggested in certification
if (p < 0.0) {
p = 0.0;
} else if (p > 1.0) {
p = 1.0;
}
return (1.0 - p);
}
/**
* Returns probability that the standardized normal variate Z (mean = 0, standard
* deviation = 1) is less than z.
*
* @param z the z-value
* @return the probability of the z value according to the normal pdf
*/
public static double normalProbability(double z) {
double y, x, w;
if (z == 0.0) {
x = 0.0;
} else {
y = 0.5 * Math.abs(z);
if (y >= 3.0) {
x = 1.0;
} else if (y < 1.0) {
w = y * y;
x = ((((((((0.000124818987 * w
- 0.001075204047) * w + 0.005198775019) * w
- 0.019198292004) * w + 0.059054035642) * w
- 0.151968751364) * w + 0.319152932694) * w
- 0.531923007300) * w + 0.797884560593) * y * 2.0;
} else {
y -= 2.0;
x = ((((((((((((( -0.000045255659 * y
+ 0.000152529290) * y - 0.000019538132) * y
- 0.000676904986) * y + 0.001390604284) * y
- 0.000794620820) * y - 0.002034254874) * y
+ 0.006549791214) * y - 0.010557625006) * y
+ 0.011630447319) * y - 0.009279453341) * y
+ 0.005353579108) * y - 0.002141268741) * y
+ 0.000535310849) * y + 0.999936657524;
}
}
return (z > 0.0 ? ((x + 1.0) / 2.0) : ((1.0 - x) / 2.0));
}
/**
* Computes absolute size of half of a student-t confidence interval
* for given degrees of freedom, probability, and observed value.
*
* @param df the number of degrees of freedom
* @param p the probability
* @param se the observed value
* @return absolute size of half of a student-t confidence interval
*/
public static double studentTConfidenceInterval(int df, double p,
double se) {
return Math.sqrt(FCriticalValue(p, 1, df)) * se;
}
/**
* Main method for testing this class.
*/
public static void main(String[] ops) {
System.out.println("Binomial standard error (0.5, 100): " +
Distributions.binomialStandardError(0.5, 100));
System.out.println("Chi-squared probability (2.558, 10): " +
Distributions.chiSquaredProbability(2.558, 10));
System.out.println("Normal probability (0.2): " +
Distributions.normalProbability(0.2));
System.out.println("F critical value (0.05, 4, 5): " +
Distributions.FCriticalValue(0.05, 4, 5));
System.out.println("F probability (5.1922, 4, 5): " +
Distributions.FProbability(5.1922, 4, 5));
System.out.println("Student-t confidence interval (9, 0.01, 2): " +
Distributions.studentTConfidenceInterval(9, 0.01, 2));
}
}