/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.math; public class AssociationMeasures { public static double cramersV(int[][] contingencyTableValues, int numberOfFieldsVar1, int numberOfFieldsVar2) { int[] sumFieldsVar1 = new int[numberOfFieldsVar1]; int[] sumFieldsVar2 = new int[numberOfFieldsVar2]; int numberOfNonZeroFieldsVar1 = numberOfFieldsVar1; int numberOfNonZeroFieldsVar2 = numberOfFieldsVar2; int totalSum = 0; for (int i = 0; i < numberOfFieldsVar1; i++) { sumFieldsVar1[i] = 0; for (int j = 0; j < numberOfFieldsVar2; j++) { sumFieldsVar1[i] += contingencyTableValues[i][j]; totalSum += contingencyTableValues[i][j]; } if (sumFieldsVar1[i] == 0) { numberOfNonZeroFieldsVar1--; } } for (int j = 0; j < numberOfFieldsVar2; j++) { sumFieldsVar2[j] = 0; for (int i = 0; i < numberOfFieldsVar1; i++) { sumFieldsVar2[j] += contingencyTableValues[i][j]; } if (sumFieldsVar2[j] == 0) { numberOfNonZeroFieldsVar2--; } } // int degreesOfFreedom = numberOfNonZeroFieldsVar1 * // numberOfNonZeroFieldsVar2 - numberOfNonZeroFieldsVar1 - // numberOfNonZeroFieldsVar2 +1; double chiSquareCoefficient = 0d; for (int i = 0; i < numberOfFieldsVar1; i++) { for (int j = 0; j < numberOfFieldsVar2; j++) { double expected = (double) sumFieldsVar2[j] * (double) sumFieldsVar1[i] / (double) totalSum; double temp = (double) contingencyTableValues[i][j] - expected; chiSquareCoefficient += temp * temp / (expected + 1.0e-30); } } // double probability = // ChiSquaredProbabilityFunction.chiSquaredProbabilityFunction(chiSquareCoefficient,degreesOfFreedom); int minimumOfVar1OrVar2Fields = numberOfNonZeroFieldsVar1; if (numberOfNonZeroFieldsVar2 < numberOfNonZeroFieldsVar1) { minimumOfVar1OrVar2Fields = numberOfNonZeroFieldsVar2; } minimumOfVar1OrVar2Fields--; double result = Math.sqrt(chiSquareCoefficient / ((double) totalSum * (double) minimumOfVar1OrVar2Fields)); if (result>1d&&result<1.000001){ result = 1d; } return result; } public static double logLikelihoodRatio(int A, int B, int C, int D) { // entries A, B , C and D represent the cells of a contingency table A = // cell 0,0 B = cell 0,1 etc // rows first and rows represent free variable, e.g. the occurrence of a // term, column the expermental condition. double result = 0; if (D != 0) { double chanceInSelection = (double) A / (double) (C + A); double chanceNotInSelection = (double) B / (double) (D + B); double chanceOverall = (double) (A + B) / (double) (A + B + C + D); result = A * Math.log(chanceOverall) + C * Math.log(1 - chanceOverall); result += B * Math.log(chanceOverall) + D * Math.log(1 - chanceOverall); if (C > 0) { result -= A * Math.log(chanceInSelection) + C * Math.log(1 - chanceInSelection); } if (B > 0) { result -= B * Math.log(chanceNotInSelection) + D * Math.log(1 - chanceNotInSelection); } } return result; } public static double symmetricUncertaintyCoefficient(int A, int B, int C, int D) { double tiny = 1e-30; int rowTotal1 = A + B; int rowTotal2 = C + D; int columnTotal1 = A + C; int columnTotal2 = B + D; int total = A + B + C + D; double entropyVar1 = -nlogn((double) rowTotal1 / (double) total) - nlogn((double) rowTotal2 / (double) total); double entropyVar2 = -nlogn((double) columnTotal1 / (double) total) - nlogn((double) columnTotal2 / (double) total); double totalEntropy = -nlogn((double) A / (double) total) - nlogn((double) B / (double) total) - nlogn((double) C / (double) total) - nlogn((double) D / (double) total); // double entropyVar1GivenVar2 = totalEntropy-entropyVar2; // double entropyVar2GivenVar1 = totalEntropy-entropyVar1; // double uncertaintyCoefficientVar1GivenVar2 = // (entropyVar1-entropyVar1GivenVar2)/(entropyVar1+tiny); // double uncertaintyCoefficientVar2GivenVar1 = // (entropyVar2-entropyVar2GivenVar1)/(entropyVar2+tiny); double uncertaintyCoefficientVar1Var2 = 2d * (entropyVar1 + entropyVar2 - totalEntropy) / (entropyVar1 + entropyVar2 + tiny); return uncertaintyCoefficientVar1Var2; } public static double asymmetricUncertaintyCoefficient(int A, int B, int C, int D) { double tiny = 1e-30; int rowTotal1 = A + B; int rowTotal2 = C + D; int columnTotal1 = A + C; int columnTotal2 = B + D; int total = A + B + C + D; double entropyVar1 = -nlogn((double) rowTotal1 / (double) total) - nlogn((double) rowTotal2 / (double) total); double entropyVar2 = -nlogn((double) columnTotal1 / (double) total) - nlogn((double) columnTotal2 / (double) total); double totalEntropy = -nlogn((double) A / (double) total) - nlogn((double) B / (double) total) - nlogn((double) C / (double) total) - nlogn((double) D / (double) total); double entropyVar1GivenVar2 = totalEntropy - entropyVar2; double uncertaintyCoefficientVar1GivenVar2 = (entropyVar1 - entropyVar1GivenVar2) / (entropyVar1 + tiny); return uncertaintyCoefficientVar1GivenVar2; } public static double nlogn(double n) { if (n != 0) { return n * Math.log(n); } else { return 0; } } /** * Based on http://math.hws.edu/javamath/ryan/ChiSquare.html * @param a * @param b * @param c * @param d * @return */ public static double chiSquare(int a, int b, int c, int d){ double da = a; double db = b; double dc = c; double dd = d; double denominator = (da+db)*(dc+dd)*(db+dd)*(da+dc); double x = (da*dd-db*dc); double numerator = x*x*(da+db+dc+dd); return numerator/denominator; } public static double chiSquareToP(double chiSquare){ return ChiSquaredProbabilityFunction.chiSquaredProbabilityFunction(chiSquare, 1); } }