/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: Algorithm.java
*
* This class creates a report in the exoeriment directory.
* A file "report.txt" is created in the same result directory
*
* @author Written by Juan Carlos Fern�ndez Caballero 13-03-2007
* @version 1.0
* @since JDK1.5
*/
package keel.GraphInterKeel.experiments;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
public class CreateInform {
static final int CLASSIFICATION = 0;
static final int REGRESSION = 1;
private int experimentType;
private String[] readingFiles = null;
private String[] classes = null;
private FileReader fr = null;
private BufferedReader br = null;
private FileWriter fw = null;
private BufferedWriter bw = null;
//training and test
private String set = "";
private int[][] confussionMatrix = null;
private int nPartitions = -1;
private List<Double> ECMPartitionList = null;
private double ecmBest = 0.0, ecmMean = 0.0, ecmDev = 0;
/**
* Builder
*
* @param path report path without extension
* @param files result files path
* @param problemType type of problem (classification, regression)
*/
public CreateInform(String path, String[] files, int problemType) {
experimentType = problemType;
this.readingFiles = files;
//Creation report.txt
try {
fw = new FileWriter(path);
bw = new BufferedWriter(fw);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* This method has to invoque for to create the report.
* Verify the type of problem, type partition and paths for
* to create the report. Read in iterative way the files of
* results
*/
public void execute() {
StringTokenizer st = null;
int totalAciertos = 0;
int totalInstancias = 0;
String porcentajesParticiones = "";
String porcentajesCuadraticosParticiones = "";
double porcentajeTotal = 0.0;
String relation = "";
double ecmTotal = 0.0;
if (experimentType == CLASSIFICATION) {
this.calcularClases();
}
//for training and test
for (int p = 0; p < 2; p++) {
totalInstancias = 0;
totalAciertos = 0;
porcentajesParticiones = "";
porcentajesCuadraticosParticiones = "";
ecmTotal = 0.0;
String esperada = "";
String obtenida = "";
int numeroInstancias = 0;
nPartitions = 0;
ecmBest = 0.0;
ecmMean = 0.0;
ecmDev = 0;
ECMPartitionList = new ArrayList<Double>();
if (p == 0) {
set = "training";
} else {
set = "test";
}
//for each file
for (int i = p; i < readingFiles.length; i = i + 2) {
nPartitions++;
try {
fr = new FileReader(readingFiles[i]);
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
switch (experimentType) {
case CLASSIFICATION:
int aciertos = 0;
String cadena = "";
numeroInstancias = 0; //>
double porcentajeParcial = 0.0;
try {
cadena = br.readLine();
} catch (IOException e) {
e.printStackTrace();
}
while (cadena != null) {
if (cadena.startsWith("@") == false) {
st = new StringTokenizer(cadena);
esperada = st.nextToken();
obtenida = st.nextToken();
if (obtenida.equals(esperada) == true) {
aciertos++;
}
numeroInstancias++;
totalInstancias++;
this.calcularConfusion(esperada, obtenida);
} else if (cadena.startsWith("@relation") == true) {
st = new StringTokenizer(cadena);
st.nextToken(); //@relation
relation = st.nextToken();
}
try {
cadena = br.readLine();
} catch (IOException e) {
e.printStackTrace();
}
}
//Partial percentage partition
porcentajeParcial = (double) ((double) aciertos / (double) numeroInstancias);
porcentajeParcial = CreateInform.round(porcentajeParcial, 3);
porcentajesParticiones = porcentajesParticiones + nPartitions + "\t" + Double.toString(porcentajeParcial) + "\n";
totalAciertos = totalAciertos + aciertos;
break;
case REGRESSION:
numeroInstancias = 0;
String cadenaAux = "";
double ecmParcial = 0.0;
double valorEsperado = 0.0;
double valorObtenido = 0.0;
try {
cadenaAux = br.readLine();
} catch (IOException e) {
e.printStackTrace();
}
while (cadenaAux != null) {
if (cadenaAux.startsWith("@") == false) {
st = new StringTokenizer(cadenaAux);
//row 1 ->obtained
obtenida = st.nextToken();
//row 2-> expected or real
esperada = st.nextToken();
valorEsperado = Double.valueOf(esperada);
valorObtenido = Double.valueOf(obtenida);
double aux = (double) Math.abs((double) valorEsperado - (double) valorObtenido);
double parcial = (double) Math.pow(aux, 2);
ecmParcial = ecmParcial + parcial;
ecmTotal = ecmTotal + parcial;
numeroInstancias++;
totalInstancias++;
} else if (cadenaAux.startsWith("@relation") == true) {
st = new StringTokenizer(cadenaAux);
st.nextToken(); //@relation
relation = st.nextToken();
}
try {
cadenaAux = br.readLine();
} catch (IOException e) {
e.printStackTrace();
}
}
double ecmParticion = 0.0;
ecmParticion = (double) ((double) ecmParcial / (double) numeroInstancias);
ecmParticion = CreateInform.round(ecmParticion, 3);
ECMPartitionList.add(Double.valueOf(ecmParticion));
/*if(i == readingFiles.length-1) //last partition
porcentajesCuadraticosParticiones = porcentajesCuadraticosParticiones +
nPartitions + "\t" + Double.toString(ecmParticion);
else
porcentajesCuadraticosParticiones = porcentajesCuadraticosParticiones +
nPartitions + "\t" + Double.toString(ecmParticion) + "\n";
*/
porcentajesCuadraticosParticiones = porcentajesCuadraticosParticiones +
nPartitions + "\t" + Double.toString(ecmParticion) + "\n";
break;
}
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}//for partitions
// All partitions have finished
switch (experimentType) {
case CLASSIFICATION:
porcentajeTotal = (double) ((double) totalAciertos / (double) totalInstancias);
porcentajeTotal = CreateInform.round(porcentajeTotal, 3);
try {
if (set.equals("training") == true) {
bw.write("Relation: " + relation);
bw.newLine();
} else {
bw.newLine();
}
bw.newLine();
bw.write("Set:" + set);
bw.newLine();
bw.write("Total percentage of successes:");
bw.newLine();
bw.write(Double.toString(porcentajeTotal));
bw.newLine();
bw.write("Percentage of successes in each partition:");
bw.newLine();
bw.write(porcentajesParticiones);
//bw.newLine(); "\n" In porcentajesParticiones
bw.write("Confusion matrix (rows=real class;columns=obtained class):");
//bw.newLine();
for (int i = 0; i < confussionMatrix.length; i++) {
bw.newLine();
String filaConfusion = "";
for (int j = 0; j < confussionMatrix[0].length; j++) {
filaConfusion = filaConfusion + Integer.toString(confussionMatrix[i][j]) + "\t";
}
bw.write(filaConfusion);
}
} catch (IOException e) {
e.printStackTrace();
}
break;
case REGRESSION:
ecmBest = (double) ECMPartitionList.get(0);
for (int i = 0; i < ECMPartitionList.size(); i++) {
if ((double) ECMPartitionList.get(i) < ecmBest) {
ecmBest = (double) ECMPartitionList.get(i);
}
ecmMean = (double) (ecmMean + (double) ECMPartitionList.get(i));
}
ecmMean = (double) (ecmMean / nPartitions);
for (int i = 0; i < ECMPartitionList.size(); i++) {
ecmDev += Math.pow(((double) ECMPartitionList.get(i) - (double) ecmMean), 2);
}
ecmDev /= nPartitions;
ecmDev = Math.sqrt(ecmDev);
ecmBest = CreateInform.round(ecmBest, 3);
ecmMean = CreateInform.round(ecmMean, 3);
ecmDev = CreateInform.round(ecmDev, 3);
try {
if (set.equals("training") == true) {
bw.write("Relation: " + relation);
bw.newLine();
} else {
bw.newLine();
}
bw.newLine();
bw.write("Set:" + set);
bw.newLine();
bw.write("Partial Mean Squared Error in each partition:");
bw.newLine();
bw.write(porcentajesCuadraticosParticiones);
//bw.newLine(); "\n" In porcentajesCuadraticosParticiones
bw.newLine();
bw.write("Best\tMean\tStandar Deviation:");
bw.newLine();
bw.write(Double.toString(ecmBest) + "\t" + Double.toString(ecmMean) + "\t" +
Double.toString(ecmDev));
bw.newLine();
if (set.equals("test") == true) {
bw.newLine();
bw.write("------ Experiments Expresions ------\n");
bw.newLine();
bw.write("Partial MSE = 1/N*(Sum[(Di-Yi)^2]), where\n" +
"\"Di\" is desired result in pattern \"i\",\n" +
"\"Yi\" is obtained result in pattern \"i\",\n" +
"and \"N\" is number of patterns\n");
bw.newLine();
bw.write("Global MSE = sum(MSEi)/n), where\n" +
"\"MSEi\" is partial MSE for partition \"i\",\n" +
"and \"n\" is number of partitions\n");
bw.newLine();
bw.write("Standar Deviation = SQRT(1/n*(Sum[(GMSE-PMSEi)^2])), where\n" +
"\"GMSE\" is Global MSE,\n" +
"\"PMSEi\" is Partial MSE in partition \"i\",\n" +
"and \"n\" is number of partitions\n");
bw.newLine();
bw.write("------ Experiments Expresions ------");
}
} catch (IOException e) {
e.printStackTrace();
}
break;
}//switch
}//for training and test
//All partitions have finished
try {
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}//execute
/**
* This method computes the classes for classification problem
* This classes are used for to create confusion matrix
*/
private void calcularClases() {
String cadena = "";
int indice = 0;
StringTokenizer st = null;
try {
fr = new FileReader(readingFiles[0]);
br = new BufferedReader(fr);
cadena = br.readLine();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
while (cadena != null) {
if (cadena.startsWith("@") == true) {
if ((indice = cadena.indexOf("{")) != -1) {
cadena = cadena.substring(indice + 1, cadena.length() - 1);
st = new StringTokenizer(cadena, ",");
classes = new String[st.countTokens()];
int i = 0;
while (st.hasMoreTokens()) {
classes[i] = st.nextToken();
i++;
}
break;
}
}
try {
cadena = br.readLine();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
confussionMatrix = new int[classes.length][classes.length];
for (int i = 0; i < classes.length; i++) {
for (int j = 0; j < classes.length; j++) {
confussionMatrix[i][j] = 0;
}
}
//delete spaces, tab
for (int i = 0; i < classes.length; i++) {
StringTokenizer stAux = new StringTokenizer(classes[i]);
classes[i] = stAux.nextToken();
}
}
/**
* This method completes the confusion matrix
* @param esperada_
* @param obtenida_
*/
private void calcularConfusion(String esperada_, String obtenida_) {
int posi = 0;
int posj = 0;
for (int i = 0; i < classes.length; i++) {
if (classes[i].equals(esperada_) == true) {
posi = i;
}
if (classes[i].equals(obtenida_) == true) {
posj = i;
}
}
confussionMatrix[posi][posj]++;
}
/**
* Round a double whith a decimal precision
*
* @param num double value
* @param ndecimal number of digits of precision
*
*/
public static double round(double num, int ndecimal) {
double aux0 = Math.pow(10, ndecimal);
double aux = num * aux0;
int tmp = (int) aux;
return (double) (tmp / aux0);
}
}//CreateInform