/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Albert Orriols (La Salle, Ram�n Llull University - Barcelona) 28/03/2004 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008 * @version 1.1 * @since JDK1.2 * </p> */ package keel.Algorithms.Genetic_Rule_Learning.XCS; import keel.Algorithms.Genetic_Rule_Learning.XCS.KeelParser.Config; //import keel.Algorithms.Genetic_Rule_Learning.XCS.TimeControl.TimeControl; import java.util.*; import java.lang.*; import java.io.*; public class Statistic { /** * <p> * This class is used to show and configurate all the possible statistics. * In the normal mode, it will generate four statistic output files: The * INC file that contains the incremental results of the execution. The TRN * and the TST file, that have the final result statistic of the a train o * a test execution. And finally, it generates the PLT (population) file, * that contains all the resulting classifiers of the execution. The * statistics that will be written on that files are: iterations, number of * good classifications, number of bad classifications, number of not * covered classifications, total number of examples, percentage of correct * classification of the covered examples, percentage of correct * classifications of the whole number of examples, number of macro * classifiers of the population, percentage of the optimal population that * has been reached already, the percentage of generalitzation and the * number of micro classifiers of the population. * </p> */ /////////////////////////////////////// // attributes /** * <p> * Represents the output file where the incremental statistics are written. * </p> */ private PrintWriter fInc = null; /** * <p> * Represents the output file where the train statistics are written. * </p> */ private PrintWriter fTrain = null; /** * <p> * Represents the output file where the test statistics are written. * </p> */ private PrintWriter fTest = null; /** * <p> * Represents the output file where the test statistics of a ten fold cross * validation made with the same fold that have been used to train the * classifier will be written. * </p> */ //private PrintWriter fTrainTest = null; /** * <p> * Represents the output file where the population will be written * at the end of the execution. * </p> */ private PrintWriter fPop = null; /** * <p> * Desnormalized population * </p> */ private PrintWriter fPopNNorm = null; /** * <p> * Represents the output file where the population will be drawn * at the end of the execution. * </p> */ private PrintWriter fPDraw = null; /** * <p> * Represents the output file where the time cost of every type * (reductions, train and test) will be written. * </p> */ private PrintWriter fTime = null; /** * <p> * Output file where the attributes values and * the class of every example in a test execution are set down. * </p> */ //private PrintWriter fClass = null; /** * <p> * Output file where the attributes values and * for the examples that are not covered in a test execution are set down. * </p> */ //private PrintWriter fNoClass = null; /** * Output file where expected vs. real output statistics are written in the train */ private PrintWriter fOTrain = null; /** * Output file where expected vs. real output statistics are written in the test */ private PrintWriter fOTest = null; /////////////////////////////////////// // operations /** * <p> * Creates an Statistic object. It takes the name of the ouput files * from the configuration file and opens them. If this parameter does not * exist in the configuration file, it is not possible to perform statistics. * </p> * */ public Statistic(String execKind, String baseName) { try{ if (execKind.toLowerCase().equals("train") || execKind.toLowerCase().equals("cv")){ System.out.println ("All files are openend."); System.out.println ("Opening INC ouptut file: "+(Config.fOTrainFileName+".inc")); fInc = new PrintWriter(new BufferedWriter(new FileWriter(Config.fOTrainFileName+".inc"))); //fTrain = new PrintWriter(new BufferedWriter(new FileWriter(Config.fTrainFileName))); System.out.println ("Opening POP output file: "+(Config.fPopFileName)); fPop = new PrintWriter(new BufferedWriter(new FileWriter(Config.fPopFileName))); //fPopNNorm = new PrintWriter(new BufferedWriter(new FileWriter(Config.fPopNormFileName))); //fPDraw = new PrintWriter(new BufferedWriter(new FileWriter(Config.fDrawFileName))); //If its a ten fold cross validation, the fTrainTest has to be opened //fTrainTest = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".TrainTest.tst"))); System.out.println ("Opening OTrain output file: "+Config.fOTrainFileName); fOTrain = new PrintWriter(new BufferedWriter(new FileWriter(Config.fOTrainFileName))); //The test file is openend is test will be made within training. if (Config.doTest || execKind.toLowerCase().equals("cv")){ //fTest = new PrintWriter(new BufferedWriter(new FileWriter(Config.fTestFileName))); System.out.println ("Opening OTest output file: "+Config.fOTestFileName); fOTest = new PrintWriter(new BufferedWriter(new FileWriter(Config.fOTestFileName))); } } else if (execKind.toLowerCase().equals("test")){ System.out.println ("All test files are opened"); fTest = new PrintWriter(new BufferedWriter(new FileWriter(Config.fTestFileName))); fOTest = new PrintWriter(new BufferedWriter(new FileWriter(Config.fOTestFileName))); //fClass = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".class"))); //fNoClass = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".NO.class"))); }else if(execKind.toLowerCase().equals("cvtest")){ //fTrainTest = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".TrainTest.tst"))); fTest = new PrintWriter(new BufferedWriter(new FileWriter(Config.fTestFileName))); //fClass = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".class"))); //fNoClass = new PrintWriter(new BufferedWriter(new FileWriter(baseName+".NO.class"))); } //In all cases a Time Statistics file will be opened. fTime = new PrintWriter(new BufferedWriter(new FileWriter(Config.fTimeFileName))); }catch (Exception e){ System.err.println ("ERROR IN THE STATISTICS. One or more files of the output statistics cannot be opened."); e.printStackTrace(); System.exit(0); } if (fInc != null) fInc.println ("Iter - #correct - #wrong - #notCovered - #total - correct/covered - correct/total - #macroCl - % of [O] reached - %generalization - # of microcl - System Error"); if (fTrain != null) fTrain.println ("Iter - #correct - #wrong - #notCovered - #total - correct/covered - correct/total - #macroCl - % of [O] reached - %generalization - # of microcl"); if (fTest != null) fTest.println ("Iter - #correct - #wrong - #notCovered - #total - correct/covered - correct/total - #macroCl - % of [O] reached - %generalization - # of microcl"); //if (fTrainTest != null) fTrainTest.println ("Iter - #correct - #wrong - #notCovered - #total - correct/covered - correct/total - #macroCl - % of [O] reached - %generalization - # of microcl"); if (fOTrain != null)initOutputFile(fOTrain); if (fOTest != null) initOutputFile (fOTest); } // end Statistic /** * <p> * It makes the incremental statistics. It will write incrementally * the statistics mentioned above. * </p> * @param pop is the current population. * @param iteration is the current iteration of the problem. * @param executionResults is a vector that contains the results of * the window execution. In the first position it contains the number * of examples that have been correctly classified. In the second it * contains the number of failed classifications, and in the third one * the number of not covered classifications. */ public void makeIncStatistics(Population pop, Population optimalPop, int iteration, int []executionResults, double sysError) { if (Config.doStatistics && fInc != null){ fInc.print (iteration); fInc.print ("\t "+executionResults[0]); fInc.print ("\t "+executionResults[1]); fInc.print ("\t "+executionResults[2]); int total = executionResults[0] + executionResults[1] + executionResults[2]; fInc.print ("\t "+total); fInc.print ("\t "+ (float)((double)executionResults[0] / (double) (executionResults[0]+executionResults[1]))); fInc.print ("\t "+ (float)((double)executionResults[0] / (double) total)); fInc.print ("\t "+ pop.getMacroClSum()); fInc.print ("\t "+ pop.optimalPopulationPercentage(optimalPop)); fInc.print ("\t "+ pop.getGeneralityAverage()); fInc.print ("\t "+ pop.getMicroClSum()); fInc.println ("\t "+sysError); } } // end makeIncStatistics /** * <p> * It makes the train statistics. It will write on a train statistics file the * statistics mentioned above. * </p> * @param pop is the current population. * @param iteration is the current iteration of the problem * @param executionResults is a vector that contains the results of * the train execution. In the first position it contains the number * of examples that have been correctly classified. In the second it * contains the number of failed classifications, and in the third one * the number of not covered classifications. */ public void makeTrainStatistics(Population pop, Population optimalPop, int iteration, int []executionResults) { if (Config.doStatistics && fTrain != null){ fTrain.print (iteration); fTrain.print ("\t "+executionResults[0]); fTrain.print ("\t "+executionResults[1]); fTrain.print ("\t "+executionResults[2]); int total = executionResults[0] + executionResults[1] + executionResults[2]; fTrain.print ("\t "+total); fTrain.print ("\t "+ (float)((double)executionResults[0] / (double) (executionResults[0]+executionResults[1]))); fTrain.print ("\t "+ (float)((double)executionResults[0] / (double) total)); fTrain.print ("\t "+ pop.getMacroClSum()); fTrain.print ("\t "+ pop.optimalPopulationPercentage(optimalPop)); fTrain.print ("\t "+ pop.getGeneralityAverage()); fTrain.println ("\t "+ pop.getMicroClSum()); } } // end makeTrainStatistics /** * <p> * It makes the test statistics. * </p> * @param pop is the current population. * @param iteration is the current iteration of the problem * @param executionResults is a vector that contains the results of * the test execution. In the first position it contains the number * of examples that have been correctly classified. In the second it * contains the number of failed classifications, and in the third one * the number of not covered classifications. */ public void makeTestStatistics(Population pop, Population optimalPop, int iteration, int []executionResults) { if (Config.doStatistics && fTest != null){ makeTrainOrTestStatistics(fTest,pop,optimalPop,iteration,executionResults); } } // end makeTestStatistics /** * <p> * It makes the train statistics. * </p> * @param pop is the current population. * @param iteration is the current iteration of the problem * @param executionResults is a vector that contains the results of * the test execution. In the first position it contains the number * of examples that have been correctly classified. In the second it * contains the number of failed classifications, and in the third one * the number of not covered classifications. * @param f is a PrintWriter of the file where the results have to be * writen. */ public void makeTrainOrTestStatistics(PrintWriter f, Population pop, Population optimalPop, int iteration, int []executionResults) { f.print (iteration); f.print ("\t "+executionResults[0]); f.print ("\t "+executionResults[1]); f.print ("\t "+executionResults[2]); int total = executionResults[0] + executionResults[1] + executionResults[2]; f.print ("\t "+total); f.print ("\t "+ (float)((double)executionResults[0] / (double) (executionResults[0]+executionResults[1]))); f.print ("\t "+ (float)((double)executionResults[0] / (double) total)); f.print ("\t "+ pop.getMacroClSum()); f.print ("\t "+ pop.optimalPopulationPercentage(optimalPop)); f.print ("\t "+ pop.getGeneralityAverage()); f.println ("\t "+ pop.getMicroClSum()); } // end makeTestStatistics /** * <p> * It makes the time statistics. It will write into a file the time * wasted in training and testing the system, and the reduction * algorithms time. * </p> * <p> * @param tControl is a TimeControl object that contains the times * wasted in each category * </p> */ public void makeTimeStatistics(TimeControl tControl) { if (Config.doStatistics && fTime != null){ tControl.printTimes(fTime); } } // end makeTimeStatistics /** * It writes the population to a file. * @param pop is the current population. */ public void printPopulation(Population pop) { if (Config.doStatistics && fPop != null){ pop.printPopulationToFile(fPop); //pop.printNotNormPopToFile(fPopNNorm); //drawPopulation(pop); } } //printPopulation /** * <p> * It draws the population to a file. A character allele is * drawn as 1 o 0. Otherwise, a real allele is drawn in ten * points, that represent the interval [0..1] divided in ten * fragments. In each fragment, there are three types of symbols: * . --> The fragment is not covered by the classifier. * o --> The fragment is partially covered by the classifier. * O --> The fragment is totally covered by the classifier. * * This notation has been obtained from Wilson2000 XCSR * * </p> * <p> * @param pop is the current population. * </p> */ public void drawPopulation(Population pop) { if (Config.doStatistics && fPDraw != null){ pop.drawPopulationToFile(fPDraw); } } //end drawPopulation /** * <p> * Prints the statistics for a cross validation experiment. * </p> * @param pop is the final population of the cross validation * @param optimalPop is the optimal pop (if known) * @param trainTestResults is the results of the test made with * the trainig set. * @param trainNumber is the number of trains that have been made * @param testResults is the results fo the test made with the * test set * @param testNumber is the number of tests examples that have been * tried * @param tControl is a time control object that contains the time * wasted in each part of the execution. */ public void CVPrintStatistics(Population pop,Population optimalPop, int[] trainTestResults,int trainNumber,int[] testResults, int testNumber,TimeControl tControl){ if (Config.doStatistics){ System.out.println ("PRINTING STATISTICS"); if (fTest != null) makeTrainOrTestStatistics(fTest,pop,optimalPop,testNumber,testResults); else System.out.println ("Test statistics cannot be printed"); //if (fTrainTest != null) makeTrainOrTestStatistics(fTrainTest,pop,optimalPop,trainNumber,trainTestResults); //else System.out.println ("Train statistics cannot be printed"); printPopulation(pop); drawPopulation(pop); if (fTime != null) tControl.printTimes(fTime); else System.out.println ("Time statistics cannot be printed"); } } //end CVPrintStatistics /** * <p> * It prints the statistics for a cross validation experiment, but * only for the testing execution. * </p> * @param pop is the final population of the cross validation * @param optimalPop is the optimal pop (if known) * @param trainTestResults is the results of the test made with * the trainig set. * @param trainNumber is the number of trains that have been made * @param testResults is the results fo the test made with the * test set * @param testNumber is the number of tests examples that have been * tried * @param tControl is a time control object that contains the time * wasted in each part of the execution. */ public void CVPrintTestStatistics(Population pop,Population optimalPop, int[] trainTestResults,int trainNumber,int[] testResults, int testNumber,TimeControl tControl){ if (Config.doStatistics){ System.out.println ("I printing statistics"); if (fTest != null) makeTrainOrTestStatistics(fTest,pop,optimalPop,testNumber,testResults); else System.out.println ("Test statistics cannot be printed"); //if (fTrainTest != null) makeTrainOrTestStatistics(fTrainTest,pop,optimalPop,trainNumber,trainTestResults); //else System.out.println ("Train statistics cannot be printed"); if (fTime != null) tControl.printTimes(fTime); else System.out.println ("Time statistics cannot be printed"); } } //end CVPrintTestStatistics /** * <p> * It prints the environmental state and the correct associated action. * It is only used in test experiments. * </p> * <p> * @param envState is the environmental state. * </p> * <p> * @param action is the correct action associated to that state. * </p> */ public void printStateAndClass(double []envState, int action){ } //end printStateAndClass /** * <p> * It prints the environmental state and for those examples with no action. * It's used only in test experiments. * </p> * @param envState is the environmental state. * @param action is the correct action associated to that state. */ public void printStateAndClassNoCov(double []envState, int action){ } //end printStateAndClassNoCov /** * It initializes the output file */ private void initOutputFile (PrintWriter f){ String line; //Printing the relation name f.println ("@relation "+Config.relationName); //Printing the input attribute keel.Dataset.Attribute []attrs = keel.Dataset.Attributes.getInputAttributes(); for (int i=0; i<attrs.length; i++){ f.println (attrs[i].toString()); } //Printing the output attribute attrs = keel.Dataset.Attributes.getOutputAttributes(); f.println (attrs[0].toString()); }//end initOutputFile /** * <p> * It writes to the train statistics file the expected out compared with the * output predicted by the system. * </p> * @param expected the expected ouptut * @param current the current output (the real one) */ public void writeExpectedTrainOut(int expected, int current){ if (current == -1){ fOTrain.print((String)Config.classConv.elementAt(expected)+" "); fOTrain.println("unclassified"); } else if (Config.classConv != null){ fOTrain.print((String)Config.classConv.elementAt(expected)+" "); fOTrain.println((String)Config.classConv.elementAt(current)+" "); } else{ fOTrain.print(expected+" "); fOTrain.println(current+" "); } }//end writeExpectedTrainOut /** * <p> * It writes to the test statistics file the expected out compared with the * output predicted by the system. * </p> * @param expected the expected ouptut * @param current the current output (the real one) */ public void writeExpectedTestOut(int expected, int current){ if (current == -1){ fOTest.print((String)Config.classConv.elementAt(expected)+" "); fOTest.println("unclassified"); } else if (Config.classConv != null){ fOTest.print((String)Config.classConv.elementAt(expected)+" "); fOTest.println((String)Config.classConv.elementAt(current)+" "); } else{ fOTest.print(expected+" "); fOTest.println(current+" "); } }//end writeExpectedTestOut /** * <p> * Closes all the opened files. * </p> */ public void closeFiles(){ System.out.println ("We close all opened files"); if (Config.doStatistics){ if (fInc != null) fInc.close(); if (fTrain != null) fTrain.close(); if (fTest != null) fTest.close(); if (fPop != null) fPop.close(); if (fTime != null) fTime.close(); if (fPopNNorm != null) fPopNNorm.close(); if (fOTrain != null) fOTrain.close(); if (fOTest != null) fOTest.close(); } } // end closeFiles } // end Statistic