/*
GeoGebra - Dynamic Mathematics for Everyone
http://www.geogebra.org
This file is part of GeoGebra.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation.
*/
package org.geogebra.common.kernel.statistics;
import org.apache.commons.math3.distribution.ChiSquaredDistribution;
import org.geogebra.common.kernel.Construction;
import org.geogebra.common.kernel.algos.AlgoElement;
import org.geogebra.common.kernel.arithmetic.NumberValue;
import org.geogebra.common.kernel.commands.Commands;
import org.geogebra.common.kernel.geos.GeoElement;
import org.geogebra.common.kernel.geos.GeoList;
/**
* Performs a chi square Goodness of Fit test or Test of Independence.
*
*
* @author G. Sturr
*/
public class AlgoChiSquaredTest extends AlgoElement {
private GeoList geoList1, geoList2; // input
private GeoList result; // output
private double p, testStat;
private ChiSquaredDistribution chisquared = null;
/**
*
* @param cons
* construction
* @param geoList
* first list or matrix
* @param geoList2
* second list
*/
public AlgoChiSquaredTest(Construction cons, GeoList geoList,
GeoList geoList2) {
super(cons);
this.geoList1 = geoList;
this.geoList2 = geoList2;
result = new GeoList(cons);
setInputOutput(); // for AlgoElement
compute();
}
@Override
public Commands getClassName() {
return Commands.ChiSquaredTest;
}
@Override
protected void setInputOutput() {
if (geoList2 == null) {
input = new GeoElement[1];
input[0] = geoList1;
} else {
input = new GeoElement[2];
input[0] = geoList1;
input[1] = geoList2;
}
setOnlyOutput(result);
setDependencies(); // done by AlgoElement
}
/**
* @return result
*/
public GeoList getResult() {
return result;
}
/**
* @param df
* degree of freedom
* @return implementation of ChiSquaredDistribution for given degree of
* freedom
*/
ChiSquaredDistribution getChiSquaredDistribution(double df) {
if (chisquared == null || chisquared.getDegreesOfFreedom() != df) {
chisquared = new ChiSquaredDistribution(df);
}
return chisquared;
}
@Override
public final void compute() {
int df;
int rows = geoList1.size();
int columns = 0;
if (!geoList1.isDefined() || rows < 2) {
result.setUndefined();
return;
}
if (geoList2 != null) {
if (!geoList2.isDefined() || geoList2.size() != rows) {
result.setUndefined();
return;
}
}
double[][] observed = null;
double[][] expected = null;
double[][] diff = null;
// store observed and expected values in arrays
// Three cases must be handled:
// 1) <List of Observed, List of Expected> (the GOF test)
// 2) <Matrix of Observed, Matrix of Expected>
// 3) <Matrix of Observed>, here we compute the expected counts based on
// the hypothesis of independence:
// expected count = row sum * column sum / grand total)
// if list1 is not a matrix, then we have the two list case
if (!geoList1.isMatrix()) {
if (geoList2 == null) {
result.setUndefined();
return;
}
columns = 1;
df = rows - 1;
observed = new double[rows][columns];
expected = new double[rows][columns];
for (int i = 0; i < rows; i++) {
GeoElement geo = geoList1.get(i);
GeoElement geo2 = geoList2.get(i);
if (geo instanceof NumberValue && geo2 instanceof NumberValue) {
observed[i][0] = ((NumberValue) geo).getDouble();
expected[i][0] = ((NumberValue) geo2).getDouble();
} else {
result.setUndefined();
return;
}
}
}
else { // list1 is matrix
columns = ((GeoList) geoList1.get(0)).size();
observed = new double[rows][columns];
expected = new double[rows][columns];
df = (columns - 1) * (rows - 1);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < columns; j++) {
// get observed values
GeoElement geo = ((GeoList) geoList1.get(i)).get(j);
if (geo instanceof NumberValue) {
observed[i][j] = ((NumberValue) geo).getDouble();
} else {
result.setUndefined();
return;
}
// get expected values if list2 exists (it must be a matrix)
if (geoList2 != null) {
GeoElement geo2 = ((GeoList) geoList2.get(i)).get(j);
if (geo2 instanceof NumberValue) {
expected[i][j] = ((NumberValue) geo2).getDouble();
} else {
result.setUndefined();
return;
}
}
}
}
// compute expected values if list2 is not given
if (geoList2 == null) {
double[] columnSum = new double[columns];
for (int j = 0; j < columns; j++) {
columnSum[j] = 0;
}
double[] rowSum = new double[rows];
for (int i = 0; i < rows; i++) {
rowSum[i] = 0;
}
double total = 0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < columns; j++) {
rowSum[i] += observed[i][j];
columnSum[j] += observed[i][j];
total += observed[i][j];
}
}
for (int i = 0; i < rows; i++) {
for (int j = 0; j < columns; j++) {
expected[i][j] = rowSum[i] * columnSum[j] / total;
}
}
}
}
// compute test statistic and chi-square contributions
diff = new double[rows][columns];
testStat = 0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < columns; j++) {
diff[i][j] = (observed[i][j] - expected[i][j])
* (observed[i][j] - expected[i][j]) / expected[i][j];
testStat += diff[i][j];
}
}
try {
double leftArea = getChiSquaredDistribution(df)
.cumulativeProbability(testStat);
p = 1 - leftArea;
} catch (RuntimeException e) {
// catches ArithmeticException, IllegalStateException and
// ArithmeticException
e.printStackTrace();
result.setUndefined();
}
// put results into the output list
result.clear();
result.addNumber(p, null);
result.addNumber(testStat, null);
}
}