/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Luciano Sanchez (University of Oviedo) 24/02/2005
* @author Modified by Jose Otero (University of Oviedo) 01/12/2008
* @version 1.0
* @since JDK1.5
* </p>
*/
package keel.Algorithms.Statistical_Tests.Shared;
import keel.Algorithms.Shared.Parsing.*;
import org.core.*;
import java.io.*;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.regex.*;
public class ParseFileList {
/**
* <p>
* Parse a list of files and perform certain the statistical test identified by 'selector' over them
* </p>
*/
public void statisticalTest(int selector, boolean tty, ProcessConfig pc) {
Vector nameResults = new Vector();
String line = new String();
ProcessDataset pd = new ProcessDataset();
double sigLevel = pc.parSignificanceLevel;
// The list of files is arranged in a cubic matrix
// First index: data file
// Second index: pattern
// Third index: output
Vector data = new Vector();
int x = 0, y = 0, z = 0;
BufferedReader in;
StringTokenizer tokens = new StringTokenizer(line, "\"");
String tmp, pattern, tmp1;
double dv;
int nFiles = 0, nPatterns = 0, nOutputs = 0;
boolean firstFile = true, firstLine = true;
String listOfNames = new String();
String lastRel = new String();
String[] labels = null;
while (true) {
if (nFiles >= pc.parInputData.size()) {
break;
}
tmp = (String) pc.parInputData.get(nFiles);
if (tmp.length() > 0) {
System.out.println("Procesing data file [" + tmp + "]");
nameResults.add(new String(tmp));
try {
// Extract the name of the dataset
in = new BufferedReader(new FileReader(tmp));
labels = pc.skipHeader(in);
if (!lastRel.equals(pc.getRelation())) {
lastRel = pc.getRelation();
listOfNames += pc.getRelation() + " ";
}
data.add(new Vector());
y = 0;
nFiles++;
do {
pattern = in.readLine();
if (pattern == null) {
break;
}
Vector vtmp = (Vector) data.get(x);
vtmp.add(new Vector());
z = 0;
if (firstFile) {
nPatterns++;
}
StringTokenizer tk = new StringTokenizer(pattern, " ");
while (tk.hasMoreTokens()) {
tmp1 = tk.nextToken();
if (labels == null) {
dv = Double.parseDouble(tmp1);
}
else {
// Let's search the label
dv = labels.length;
for (int i = 0; i < labels.length; i++) {
if (tmp1.equalsIgnoreCase(labels[i])) {
dv = i;
break;
}
}
if (dv == labels.length) {
// pass integer values
if (Pattern.matches("(\\d)*", tmp1)) {
System.out.println("WARNING: label [" +
tmp1 +
"] found, assuming integer output");
dv = Double.parseDouble(tmp1);
} else {
if (tmp1.equalsIgnoreCase(
"unclassified")||tmp1.equalsIgnoreCase("?")) {
dv = -1;
} else {
System.out.println(
"WARNING: label [" + tmp1 +
"] not found");
}
}
}
}
Vector vvtmp = (Vector) (vtmp.get(y));
vvtmp.add(new Double(dv));
if (firstLine) {
nOutputs++;
}
z++;
}
firstLine = false;
y++;
} while (true);
x++;
} catch (FileNotFoundException e) {
System.err.println(e + " Configuration file not found");
} catch (IOException e) {
System.err.println(e + " Input error");
} catch (Exception e) {
System.err.println(e + " Invalid data");
}
firstFile = false;
}
}
// Parse the vector of names and guess the number of experiments
Vector indexes = new Vector();
String lastMethod = new String("");
String header = new String("");
Vector Vheader = new Vector();
String dataset = new String("Sin Inicializar");
int lastIndex = -1;
for (int i = 0; i < nameResults.size(); i++) {
String name = (String) (nameResults.get(i));
String fields[] = name.split("/");
dataset = fields[3];
if (!lastMethod.equals(fields[2])) {
// New method
indexes.add(new Vector());
lastIndex++;
lastMethod = fields[2];
header += fields[2] + " ";
Vheader.add(new String(fields[2]));
}
((Vector) indexes.get(lastIndex)).add(new Integer(i));
}
System.out.println("Results:");
System.out.println("Detected " + indexes.size() + " methods");
System.out.print("Folds=");
int nFolds = ((Vector) indexes.get(0)).size();
nFolds /= 2; // Half of files for test
for (int i = 0; i < indexes.size(); i++) {
System.out.print(((Vector) indexes.get(i)).size() / 2 + " ");
}
System.out.println();
double[][][][] d;
double[][][][] dtrain;
//If the test to be run is not the Global Wilcoxon Test or the Friedman test we
//check if the number of results is different among the executed algorithms
if ((selector != StatTest.globalWilcoxonC) && (selector != StatTest.globalWilcoxonR) &&
(selector != StatTest.FriedmanC) && (selector != StatTest.FriedmanR)&&
(selector != StatTest.FriedmanAlignedC) && (selector != StatTest.FriedmanAlignedR)&&
(selector != StatTest.QuadeC) && (selector != StatTest.QuadeR)&&
(selector != StatTest.ContrastC) && (selector != StatTest.ContrastR)&&
(selector != StatTest.MultipleC) && (selector != StatTest.MultipleR)&&
(selector != StatTest.globalWilcoxonI) && (selector != StatTest.FriedmanI)) {
for (int i = 0; i < indexes.size(); i++) {
if (nFolds != ((Vector) indexes.get(i)).size() / 2) {
System.out.println("Error: different number of folds");
return;
}
}
// Process test files
d = new double[1][data.size() / 2][][];
int i = 0;
for (int i1 = 0; i1 < data.size(); i1++) {
//if (i1 % (2 * nfolds) < nfolds) {
if (i1 % (2 * nFolds) < nFolds) {
Vector vtmp = (Vector) (data.get(i1));
d[0][i] = new double[vtmp.size()][];
for (int j = 0; j < vtmp.size(); j++) {
Vector vvtmp = (Vector) (vtmp.get(j));
d[0][i][j] = new double[vvtmp.size()];
for (int k = 0; k < vvtmp.size(); k++) {
Double vd = (Double) (vvtmp.get(k));
d[0][i][j][k] = vd.doubleValue();
}
}
i++;
}
}
dtrain = new double[1][data.size() / 2][][];
i = 0;
for (int i1 = 0; i1 < data.size(); i1++) {
if (i1 % (2 * nFolds) >= nFolds) {
Vector vtmp = (Vector) (data.get(i1));
dtrain[0][i] = new double[vtmp.size()][];
for (int j = 0; j < vtmp.size(); j++) {
Vector vvtmp = (Vector) (vtmp.get(j));
dtrain[0][i][j] = new double[vvtmp.size()];
for (int k = 0; k < vvtmp.size(); k++) {
Double vd = (Double) (vvtmp.get(k));
dtrain[0][i][j][k] = vd.doubleValue();
}
}
i++;
}
}
} else {
int i, it, cumulated;
i = it = cumulated = 0;
d = new double[indexes.size()][][][]; //d[#Alg][#nfolds][#res][#sal];
dtrain = new double[indexes.size()][][][]; //d[#Alg][#nfolds][#res][#sal];
for (int h = 0; h < indexes.size(); h++) {
nFolds = ((Vector) indexes.get(h)).size() / 2;
d[h] = new double[nFolds][][];
for (int i1 = cumulated, tst = 0; i1 < (cumulated) + nFolds; i1++, tst++) { //Test
Vector vtmp = (Vector) (data.get(i1));
d[h][tst] = new double[vtmp.size()][];
for (int j = 0; j < vtmp.size(); j++) {
Vector vvtmp = (Vector) (vtmp.get(j));
d[h][tst][j] = new double[vvtmp.size()];
for (int k = 0; k < vvtmp.size(); k++) {
Double vd = (Double) (vvtmp.get(k));
d[h][tst][j][k] = vd.doubleValue();
}
}
i++;
}
cumulated += nFolds;
dtrain[h] = new double[nFolds][][];
for (int i1 = cumulated,tr = 0; i1 < (cumulated) + nFolds; i1++, tr++) { //train
Vector vtmp = (Vector) (data.get(i1));
dtrain[h][tr] = new double[vtmp.size()][];
for (int j = 0; j < vtmp.size(); j++) {
Vector vvtmp = (Vector) (vtmp.get(j));
dtrain[h][tr][j] = new double[vvtmp.size()];
for (int k = 0; k < vvtmp.size(); k++) {
Double vd = (Double) (vvtmp.get(k));
dtrain[h][tr][j][k] = vd.doubleValue();
}
}
it++;
}
cumulated += nFolds;
}
}
String resultName;
resultName = pc.parResultTrainName;
// Statistic test
StatTest mySt = new StatTest(selector, d, dtrain, sigLevel, resultName, listOfNames,
nameResults, labels);
}
}