/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * File: Wilcoxon.java. * * Wilcoxon signed ranks test * * @author Written by Joaquin Derrac (University of Granada) 1/12/2010 * @version 1.0 * @since JDK1.5 */ package keel.GraphInterKeel.statistical.tests; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.util.*; import keel.GraphInterKeel.statistical.Configuration; import org.core.*; public class Wilcoxon { private static DecimalFormat nf; private static double wilcoxonRanks[][]; private static double exactPValues[][]; private static double asymptoticPValues[][]; private static String confidenceIntervals95[][]; private static String confidenceIntervals90[][]; private static double exactConfidence90[][]; private static double exactConfidence95[][]; private static int wins90[]; private static int wins95[]; private static int draw90[]; private static int draw95[]; private static double data[][]; private static int columns; private static int rows; private static String algorithms[]; private static String outputFileName; private static String outputSummaryFileName; /** * Builder */ public Wilcoxon() { }//end-method /** * In this method, all possible pairwise Wilcoxon comparisons are performed * * @param newData Array with the results of the method * @param newAlgorithms A vector of String with the names of the algorithms */ public static void doWilcoxon(double newData[][], String newAlgorithms[]) { outputFileName = Configuration.getPath(); String outputString = ""; outputString = header(); data = new double[newData[0].length][newData.length]; algorithms = new String[newAlgorithms.length]; columns = data[0].length; rows = data.length; //reverse data matrix for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { data[i][j] = newData[j][i]; } } System.arraycopy(newAlgorithms, 0, algorithms, 0, newAlgorithms.length); wilcoxonRanks = new double[columns][columns]; exactPValues = new double[columns][columns]; asymptoticPValues = new double[columns][columns]; confidenceIntervals95 = new String[columns][columns]; confidenceIntervals90 = new String[columns][columns]; exactConfidence90 = new double[columns][columns]; exactConfidence95 = new double[columns][columns]; wins90 = new int[columns]; wins95 = new int[columns]; draw90 = new int[columns]; draw95 = new int[columns]; Arrays.fill(wins90,0); Arrays.fill(wins95,0); Arrays.fill(draw90,0); Arrays.fill(draw95,0); nf = (DecimalFormat) DecimalFormat.getInstance(); nf.setMaximumFractionDigits(6); nf.setMinimumFractionDigits(0); DecimalFormatSymbols dfs = nf.getDecimalFormatSymbols(); dfs.setDecimalSeparator('.'); nf.setDecimalFormatSymbols(dfs); Files.writeFile(outputFileName, outputString); computeBody(); outputString = footer(); Files.addToFile(outputFileName, outputString); //write summary file outputSummaryFileName = outputFileName.substring(0, outputFileName.length() - 4) + "_Summary.tex"; outputString = headerSummary(); Files.addToFile(outputSummaryFileName, outputString); computeSummary(); outputString = footer(); Files.addToFile(outputSummaryFileName, outputString); }//end-method /** * Computes body of the summary file * */ public static void computeSummary() { String text = "\n"; //print the rank matrix text += "\\begin{sidewaystable}[!htp]\n\\centering\\scriptsize\n" + "\\resizebox{\\textwidth}{!}{\\begin{tabular}{\n"; text += "|c"; for (int i = 0; i < columns; i++) { text += "|r"; } text += "|}\n\\hline\n"; for (int i = 0; i < columns; i++) { text += "&(" + (i + 1) + ") "; } text += "\\\\\n\\hline\n"; for (int i = 0; i < columns; i++) { text += algorithms[i] + " (" + (i + 1) + ")"; for (int j = 0; j < columns; j++) { if (i != j) { text += "& " + wilcoxonRanks[i][j]; } else { text += "& -"; } } text += "\\\\\n\\hline\n"; } text += "\n" + "\\end{tabular}}\n" + "\\caption{Ranks computed by the Wilcoxon test}\n"; text += "\n\\end{sidewaystable}\n"; text += "\n \\clearpage \n\n"; Files.addToFile(outputSummaryFileName, text); //print the p-value matrix text = "\n"; text += "\\begin{sidewaystable}[!htp]\n\\centering\\scriptsize\n" + "\\resizebox{\\textwidth}{!}{\\begin{tabular}{\n"; text += "|c"; for (int i = 0; i < columns; i++) { text += "|c"; } text += "|}\n\\hline\n"; for (int i = 0; i < columns; i++) { text += "&(" + (i + 1) + ") "; } text += "\\\\\n\\hline\n"; if (rows <= 50) { for (int i = 0; i < columns; i++) { text += algorithms[i] + " (" + (i + 1) + ")"; for (int j = 0; j < columns; j++) { if (i < j) {//0.1 text += "& " + getSymbol(i,j,exactPValues[i][j], exactPValues[j][i], 0.1) + " "; } if (i == j) { text += "& -"; } if (i > j) {//0.05 text += "& " + getSymbol(i,j,exactPValues[i][j], exactPValues[j][i], 0.05) + " "; } } text += "\\\\\n\\hline\n"; } } else { for (int i = 0; i < columns; i++) { text += algorithms[i] + " (" + (i + 1) + ")"; for (int j = 0; j < columns; j++) { if (i < j) {//0.1 text += "& " + getSymbol(i,j,asymptoticPValues[i][j], asymptoticPValues[j][i], 0.1) + " "; } if (i == j) { text += "& -"; } if (i > j) {//0.05 text += "& " + getSymbol(i,j,asymptoticPValues[i][j], asymptoticPValues[j][i], 0.05) + " "; } } text += "\\\\\n\\hline\n"; } } text += "\n" + "\\end{tabular}}\n" + "\\caption{Summary of the Wilcoxon test. \\textbullet = " + "the method in the row improves the method of the column. \\textopenbullet = " + "the method in the column improves the method of the row. Upper diagonal of level significance $\\alpha=0.9$," + "Lower diagonal level of significance $\\alpha=0.95$}\n"; text += "\n\\end{sidewaystable}\n"; text += "\n \\clearpage \n\n"; Files.addToFile(outputSummaryFileName, text); text = "\n"; //print the summary table text += "\\begin{table}[!htp]\n\\centering\\scriptsize\n" + "\\begin{tabular}{\n"; text += "|c|c|c|c|c|}\n\\hline\n"; text += "&\\multicolumn{2}{c|}{$\\alpha=0.9$} & \\multicolumn{2}{c|}{$\\alpha=0.95$}\\\\\\hline\n"; text += "Method & + & $\\pm$ & + & $\\pm$ "; text += "\\\\\n\\hline\n"; for (int i = 0; i < columns; i++) { text += algorithms[i]+" & "+wins90[i]+" & "+draw90[i]+" & "+wins95[i]+" & "+draw95[i]; text += "\\\\\n\\hline\n"; } text += "\n" + "\\end{tabular}\n" + "\\caption{Wilcoxon test summary results}\n"; text += "\n\\end{table}\n"; text += "\n \\clearpage \n\n"; Files.addToFile(outputSummaryFileName, text); }//end-method /** * Computes a symbol for the symbol table * * @param indexA Index of first element * @param indexB Index of second element * @param pA First p-value * @param pB Second p-value * @param threshold minimun p-value * @return */ private static String getSymbol(int indexA,int indexB,double pA, double pB, double threshold){ if(threshold==0.1){ if((pA<pB)&&(pA<threshold)){ wins90[indexA]++; draw90[indexA]++; return "\\textbullet"; } if((pA>pB)&&(pB<threshold)){ wins90[indexB]++; draw90[indexB]++; return "\\textopenbullet"; } draw90[indexA]++; draw90[indexB]++; }else{ if((pA<pB)&&(pA<threshold)){ wins95[indexA]++; draw95[indexA]++; return "\\textbullet"; } if((pA>pB)&&(pB<threshold)){ wins95[indexB]++; draw95[indexB]++; return "\\textopenbullet"; } draw95[indexA]++; draw95[indexB]++; } return ""; }//end-method /** * Computes body of the report file (i.e. the test itself) * */ public static void computeBody() { double value; String text; for (int first = 0; first < columns; first++) { for (int second = 0; second < columns; second++) { if (first != second) { computeRanks(first, second); } } } text = "\n"; //print individual comparisons for (int first = 0; first < columns; first++) { text += "\n\\section{Detailed results for " + algorithms[first] + "}\n\n"; text += "\n\\subsection{Results}\n\n"; text += "\\begin{table}[!htp]\n\\centering\\small\n" + "\\begin{tabular}{\n"; text += "|c|c|c|c|c|"; text += "}\n\\hline\n"; text += " VS & $R^{+}$ & $R^{-}$ & Exact P-value & Asymptotic P-value \\\\ \\hline \n"; for (int second = 0; second < columns; second++) { if (first != second) { text += algorithms[second] + " & " + wilcoxonRanks[first][second] + " & " + wilcoxonRanks[second][first] + " & "; if (rows < 51) { value = exactPValues[first][second]; if (value != 1.0) { text += value + " & "; } else { text += "$\\geq$ 0.2 & "; } } else { text += "- & "; } value = asymptoticPValues[first][second]; text += nf.format(value); text += "\\\\ \\hline \n"; } } text += "\n" + "\\end{tabular}\n" + "\\caption{Results obtained by the Wilcoxon test for algorithm " + algorithms[first] + "}\n\\end{table}\n"; text += "\n\\subsection{Confidence intervals for Median of diferences}\n\n"; text += "\\begin{table}[!htp]\n\\centering\\small\n" + "\\begin{tabular}{\n"; text += "|c|c|c|"; text += "}\n\\hline\n"; text += " $\\alpha$=0.90 & Confidence interval & Exact confidence \\\\ \\hline \n"; for (int second = 0; second < columns; second++) { if (first != second) { text += algorithms[second] + " & " + confidenceIntervals90[first][second] + " & " + nf.format(exactConfidence90[first][second]) + "\\\\ \\hline \n"; } } text += "\n" + "\\end{tabular}\n" + "\\caption{Confidence intervals for algorithm " + algorithms[first] + " ($\\alpha$=0.90)}\n\\end{table}\n"; text += "\\begin{table}[!htp]\n\\centering\\small\n" + "\\begin{tabular}{\n"; text += "|c|c|c|"; text += "}\n\\hline\n"; text += " $\\alpha$=0.95 & Confidence interval & Exact confidence \\\\ \\hline \n"; for (int second = 0; second < columns; second++) { if (first != second) { text += algorithms[second] + " & " + confidenceIntervals95[first][second] + " & " + nf.format(exactConfidence95[first][second]) + "\\\\ \\hline \n"; } } text += "\n" + "\\end{tabular}\n" + "\\caption{Confidence intervals for algorithm " + algorithms[first] + " ($\\alpha$=0.95)}\n\\end{table}\n"; text += "\n \\clearpage \n\n"; Files.addToFile(outputFileName, text); text = ""; } }//end-method /** * Compute ranks and associated p-values for a giver pair of samples * * @param first First sample * @param second Second sample */ public static void computeRanks(int first, int second) { double AOld[], A[]; double BOld[], B[]; double diffOld[], diff[]; int ties, N, pointer; boolean sign[]; double ranks[]; double RA, RB; ArrayList<Double> walsh; int criticalN; String interval; AOld = new double[rows]; BOld = new double[rows]; diffOld = new double[rows]; ties = 0; for (int i = 0; i < rows; i++) { if (Configuration.getObjective() == 1) { AOld[i] = data[i][first]; BOld[i] = data[i][second]; } else { AOld[i] = data[i][second]; BOld[i] = data[i][first]; } diffOld[i] = Math.abs(AOld[i] - BOld[i]); if (diffOld[i] == 0.0) { ties++; } } N = rows - ties; A = new double[N]; B = new double[N]; diff = new double[N]; sign = new boolean[N]; ranks = new double[N]; pointer = 0; for (int i = 0; i < rows; i++) { if (diffOld[i] != 0.0) { A[pointer] = AOld[i]; B[pointer] = BOld[i]; diff[pointer] = Math.abs(A[pointer] - B[pointer]); if ((A[pointer] - B[pointer]) > 0.0) { sign[pointer] = true; } else { sign[pointer] = false; } pointer++; } } //compute ranks double min; double points; int tied; String tiedString = ""; Arrays.fill(ranks, -1.0); for (int rank = 1; rank <= N;) { min = Double.MAX_VALUE; tied = 1; for (int i = 0; i < N; i++) { if ((ranks[i] == -1.0) && diff[i] == min) { tied++; } if ((ranks[i] == -1.0) && diff[i] < min) { min = diff[i]; tied = 1; } } //min has the lower unassigned value if (tied == 1) { points = rank; } else { tiedString += (tied + "-"); points = 0.0; for (int k = 0; k < tied; k++) { points += (rank + k); } points /= tied; } for (int i = 0; i < N; i++) { if (diff[i] == min) { ranks[i] = points; } } rank += tied; } //compute sumOfRanks RA = 0.0; RB = 0.0; for (int i = 0; i < ranks.length; i++) { if (sign[i]) { RA += ranks[i]; } else { RB += ranks[i]; } } //Treatment of 0's double increment; double sum0; if (ties > 1) { //discard a tie if there's an odd number of them if (ties % 2 == 1) { increment = ties - 1.0; } else { increment = ties; } //Adition of 0 ranked differences sum0 = (((double) increment + 1.0) * (double) increment) / 2.0; sum0 /= 2.0; RA += sum0; RB += sum0; //Reescaling of the rest of ranks for (int i = 0; i < ranks.length; i++) { if (sign[i]) { RA += increment; } else { RB += increment; } } //Updating N so it correctly contain the ties N+=increment; } //save the ranks wilcoxonRanks[first][second] = RA; wilcoxonRanks[second][first] = RB; //compute exact pValue exactPValues[first][second] = WilcoxonDistribution.computeExactProbability(N, RB); exactPValues[second][first] = WilcoxonDistribution.computeExactProbability(N, RA); //compute asymptotic P Value int tiesDistribution[]; tiesDistribution = decode(tiedString); asymptoticPValues[first][second] = WilcoxonDistribution.computeAsymptoticProbability(N, RB, tiesDistribution); asymptoticPValues[second][first] = WilcoxonDistribution.computeAsymptoticProbability(N, RA, tiesDistribution); //compute confidence intervals walsh = new ArrayList<Double>(); double aux, aux2; for (int i = 0; i < diffOld.length - 1; i++) { if (Configuration.getObjective() == 1) { aux = AOld[i] - BOld[i]; } else { aux = BOld[i] - AOld[i]; } walsh.add(aux); for (int j = i + 1; j < diffOld.length; j++) { if (Configuration.getObjective() == 1) { aux2 = AOld[j] - BOld[j]; } else { aux2 = BOld[j] - AOld[j]; } walsh.add((aux + aux2) / 2.0); } } Collections.sort(walsh); //Find critical levels criticalN = findCriticalValue(diffOld.length, 0.05, tiesDistribution); criticalN = Math.max(criticalN, 0); //Build interval interval = "["; interval += nf.format(walsh.get(criticalN)); interval += " , "; interval += nf.format(walsh.get(walsh.size() - (criticalN + 1))); interval += "]"; confidenceIntervals95[first][second] = interval; exactConfidence95[first][second] = 1.0 - (WilcoxonDistribution.computeExactProbability(diffOld.length, criticalN)); criticalN = findCriticalValue(diffOld.length, 0.1, tiesDistribution); criticalN = Math.max(criticalN, 0); //Build interval interval = "["; interval += nf.format(walsh.get(criticalN)); interval += " , "; interval += nf.format(walsh.get(walsh.size() - (criticalN + 1))); interval += "]"; confidenceIntervals90[first][second] = interval; exactConfidence90[first][second] = 1.0 - (WilcoxonDistribution.computeExactProbability(diffOld.length, criticalN)); }//end-method /** * Find the first critical value lower than alpha * * @param N N parameter * @param alpha Limit p-value * * @return Critical value */ private static int findCriticalValue(int N, double alpha, int[] tiesDistribution) { double limit = alpha; int critical = -1; if (N < 51) { do { critical++; } while (WilcoxonDistribution.computeExactProbability(N, critical) < limit); } else { do { critical++; } while (WilcoxonDistribution.computeAsymptoticProbability(N, critical, tiesDistribution) < limit); } critical--; return critical; }//end-method /** * Decodes an string of ties * @param cad String * * @return Integer array representation */ private static int[] decode(String cad) { int result[]; String array[]; if (cad.equals("")) { result = new int[0]; } else { array = cad.split("-"); result = new int[array.length]; for (int i = 0; i < array.length; i++) { result[i] = Integer.parseInt(array[i]); } } return result; }//end-method /** * Footer of the report * * @return Contents of the footer */ private static String footer() { String output = ""; output += "\n\\end{document}"; return output; }//end-method /** * * This method composes the header of the LaTeX file where the results are saved * * @return A string with the header of the LaTeX file */ private static String header() { String output = ""; output += "\\documentclass[a4paper,10pt]{article}\n"; output += "\\title{Wilcoxon Signed Ranks test.}\n"; output += "\\date{\\today}\n\\author{KEEL non-parametric statistical module}\n\\begin{document}\n\n\\pagestyle{empty}\n\\maketitle\n\\thispagestyle{empty}\n\n"; return output; }//end-method /** * * This method composes the header of the summary LaTeX file * * @return A string with the header of the summary LaTeX file */ private static String headerSummary() { String output = ""; output += "\\documentclass[a4paper,10pt]{article}\n"; output += "\\title{Wilcoxon Signed Ranks test.}\n"; output += "\\usepackage{rotating}\n"; output += "\\usepackage{textcomp}\n"; output += "\\date{\\today}\n\\author{KEEL non-parametric statistical module}\n\\begin{document}\n\n\\pagestyle{empty}\n\\maketitle\n\\thispagestyle{empty}\n\n"; return output; }//end-method }//end-class