/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Fuzzy_Rule_Learning.Genetic.Thrift; /** * <p>Title: Algorithm</p> * * <p>Description: It contains the implementation of the algorithm</p> * * * <p>Company: KEEL </p> * * @author Alberto Fern�ndez * @version 1.0 */ import java.io.IOException; import org.core.*; public class Thrift { myDataset train, val, test; String outputTr, outputTst, outputEvo, outputTh; long seed; int populationSize, nEvaluations, nLabels; double crossProb, mutProb; int n_genes; BaseD baseDatos; BaseR baseReglas; //int nClasses; //We may declare here the algorithm's parameters private boolean somethingWrong = false; //to check if everything is correct. /** * Default constructor */ public Thrift() { } /** * It reads the data from the input files (training, validation and test) and parse all the parameters * from the parameters array. * @param parameters parseParameters It contains the input files, output files and parameters */ public Thrift(parseParameters parameters) { train = new myDataset(); val = new myDataset(); test = new myDataset(); try { System.out.println("\nReading the training set: " + parameters.getTrainingInputFile()); train.readRegressionSet(parameters.getTrainingInputFile(), true); System.out.println("\nReading the validation set: " + parameters.getValidationInputFile()); val.readRegressionSet(parameters.getValidationInputFile(), false); System.out.println("\nReading the test set: " + parameters.getTestInputFile()); test.readRegressionSet(parameters.getTestInputFile(), false); } catch (IOException e) { System.err.println( "There was a problem while reading the input data-sets: " + e); somethingWrong = true; } //We may check if there are some numerical attributes, because our algorithm may not handle them: //somethingWrong = somethingWrong || train.hasNumericalAttributes(); somethingWrong = somethingWrong || train.hasMissingAttributes(); outputTr = parameters.getTrainingOutputFile(); outputTst = parameters.getTestOutputFile(); outputEvo = parameters.getOutputFile(0); outputTh = parameters.getOutputFile(1); //Now we parse the parameters, for example: seed = Long.parseLong(parameters.getParameter(0)); nLabels = Integer.parseInt(parameters.getParameter(1)); //nGenerations = Integer.parseInt(parameters.getParameter(2)); populationSize = Integer.parseInt(parameters.getParameter(2)); nEvaluations = Integer.parseInt(parameters.getParameter(3)); this.crossProb = Double.parseDouble(parameters.getParameter(4)); this.mutProb = Double.parseDouble(parameters.getParameter(5)); } /** * It launches the algorithm */ public void execute() { if (somethingWrong) { //We do not execute the program System.err.println("An error was found, either the data-set has missing values."); System.err.println("Please remove the examples with missing data or apply a MV preprocessing."); System.err.println("Aborting the program"); //We should not use the statement: System.exit(-1); } else { //We do here the algorithm's operations Randomize.setSeed(seed); n_genes = 1; for (int i = 0; i < train.getnInputs(); i++) { n_genes *= nLabels; } this.crossProb = (crossProb * this.populationSize) - 0.5; this.mutProb = (mutProb / n_genes); baseDatos = new BaseD(nLabels, train.getnVars(), train.devuelveRangos()); baseDatos.Semantica(); baseReglas = new BaseR(n_genes, baseDatos, train); GA genetico = new GA(train, test, baseDatos, baseReglas, populationSize, nEvaluations, n_genes, crossProb, mutProb, outputEvo); Individuo solucion = genetico.lanzar(); this.generaSalida(solucion); //Finally we should fill the training and test output files doOutput(this.val, this.outputTr); doOutput(this.test, this.outputTst); System.out.println("Algorithm Finished"); } } /** * It generates the output file from a given dataset and stores it in a file * @param dataset myDataset input dataset * @param filename String the name of the file */ private void doOutput(myDataset dataset, String filename) { String output = new String(""); output = dataset.copyHeader(); //we insert the header in the output file //We write the output for each example for (int i = 0; i < dataset.getnData(); i++) { //for classification: output += dataset.getOutputAsReal(i) + " " + this.classificationOutput(dataset.getExample(i)) + "\n"; } Fichero.escribeFichero(filename, output); } /** * It returns the algorithm classification output given an input example * @param example double[] The input example * @return double the output generated by the algorithm */ private double classificationOutput(double[] example) { /** Here we should include the algorithm directives to generate the classification output from the input example */ return baseReglas.FLC(example, baseReglas.n_reglas); } private void generaSalida(Individuo solucion) { int n_reg = baseReglas.decodifica(solucion.getGene()); /* Apertura del fichero de resultados que almacena el conjunto de reglas finalmente aprendido */ String salida = new String(""); salida += "Numero de reglas: " + n_reg + "\n\n"; salida += baseReglas.BRtoString(); /* Calculo del Error de la Base de Conocimiento aprendida */ double ec_tra = GA.Error(val, n_reg); double ec_tst = GA.Error(test, n_reg); int Trials_mejor = GA.dameTrials(); salida += "\nECMtra: " + ec_tra + ", ECMtst: " + ec_tst + ", EMS: " + Trials_mejor + "\n"; //salida += "\n----------------- Parametros de Entrada aceptados ------------------\n\n"; //fprintf(fp, FORMATO_SAL, VAR_SAL); for (int i = 0; i < train.getnInputs(); i++) { salida += "Variable de entrada = " + (i + 1) + "\n"; salida += "Numero de etiquetas = " + baseDatos.getnLabels(i) + "\n"; salida += "Universo de discurso = [" + baseDatos.getExtremoInf(i) + "," + baseDatos.getExtremoSup(i) + "]\n\n"; } salida += "Variable de salida = 1\n"; salida += "Numero de etiquetas = " + baseDatos.getnLabels(baseDatos.n_var_estado - 1) + "\n"; salida += "Universo de discurso = [" + baseDatos.getExtremoInf(baseDatos.n_var_estado - 1) + "," + baseDatos.getExtremoSup(baseDatos.n_var_estado - 1) + "]\n\n"; /* Se almacena la Base de Datos en el fichero de informe */ salida += "\n\nBase de Datos inicial: \n"; salida += baseDatos.printString(); salida += "\n--------------------------------------------------------------------\n\n"; Fichero.escribeFichero(outputTh, salida); System.out.println("ECMtra: " + ec_tra + ", ECMtst: " + ec_tst + ", #R: " + n_reg + ", EMS: " + Trials_mejor); } }