/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Fuzzy_Rule_Learning.Genetic.ClassifierIshibuchi99; import java.io.IOException; import java.util.*; import org.core.*; /** * <p> * It contains the implementation of the algorithm * </p> * * <p> * @author Written by Francisco Jos� Berlanga (University of Ja�n) 01/01/2007 * @author Modified by Alberto Fern�ndez (University of Ja�n) 22/09/2010 * @version 1.1 * @since JDK 1.6 * </p> */ public class Algorithm { myDataset train, val, test; String outputTr, outputTst, outputBD, outputBR; double classProb[]; double attrProb[][][]; //atribute value, atribute position, class int nClasses, entradas; ArrayList<Individual> Poblacion; ArrayList<Individual> Poblacion2; ArrayList<Individual> Hijos; ArrayList<Individual> MejorPoblacion; double[] Particiones_difusas; long semilla; int tamPoblacion, nRep, num_evaluaciones, numEtiquetas, n_eval; int son, mum, dad, total_reglas, Num_var, Num_cond; double probCross, probMut, probDontCare, fitness, fitness_mejor_pob, porcen1, porcen2; //We may declare here the algorithm's parameters private boolean somethingWrong = false; //to check if everything is correct. /** * <p> * Default constructor * </p> */ public Algorithm() { } /** * <p> * It reads the data from the input files (training, validation and test) and parse all the parameters * from the parameters array. * </p> * @param parameters parseParameters It contains the input files, output files and parameters */ public Algorithm(parseParameters parameters) { train = new myDataset(); val = new myDataset(); test = new myDataset(); try { System.out.println("\nReading the training set: " + parameters.getTrainingInputFile()); train.readClassificationSet(parameters.getTrainingInputFile(), true); System.out.println("\nReading the validation set: " + parameters.getValidationInputFile()); val.readClassificationSet(parameters.getValidationInputFile(), false); System.out.println("\nReading the test set: " + parameters.getTestInputFile()); test.readClassificationSet(parameters.getTestInputFile(), false); } catch (IOException e) { System.err.println( "There was a problem while reading the input data-sets: " + e); somethingWrong = true; } //We may check if there are some numerical attributes, because our algorithm may not handle them: //somethingWrong = somethingWrong || train.hasNumericalAttributes(); //somethingWrong = somethingWrong || train.hasMissingAttributes(); outputTr = parameters.getTrainingOutputFile(); outputTst = parameters.getTestOutputFile(); outputBD = parameters.getOutputFile(0); outputBR = parameters.getOutputFile(1); //Now we parse the parameters, for example: semilla = Long.parseLong(parameters.getParameter(0)); //... numEtiquetas = Integer.parseInt(parameters.getParameter(1)); tamPoblacion = Integer.parseInt(parameters.getParameter(2)); num_evaluaciones = Integer.parseInt(parameters.getParameter(3)); nRep = Integer.parseInt(parameters.getParameter(4)); probCross = Double.parseDouble(parameters.getParameter(5)); probMut = Double.parseDouble(parameters.getParameter(6)); probDontCare = Double.parseDouble(parameters.getParameter(7)); entradas = train.getnInputs(); nClasses = train.getnClasses(); Particiones_difusas = new double[3 * numEtiquetas * entradas]; Poblacion = new ArrayList<Individual>(tamPoblacion); Poblacion.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(entradas); Poblacion.add(indi); } Poblacion2 = new ArrayList<Individual>(tamPoblacion); Poblacion2.clear(); MejorPoblacion = new ArrayList<Individual>(tamPoblacion); // MejorPoblacion.clear(); // for (int i = 0; i < tamPoblacion; i++) { // Individual indi = new Individual(entradas); // MejorPoblacion.add(indi); // } Hijos = new ArrayList<Individual>(nRep + 1); // Hijos.clear(); // for (int i = 0; i < (nRep + 1); i++) { // Individual indi = new Individual(entradas); // Hijos.add(indi); // } } /** * <p> * It launches the algorithm * </p> */ public void execute() { if (somethingWrong) { //We do not execute the program System.err.println("An error was found, either the data-set have numerical values or missing values."); System.err.println("Aborting the program"); //We should not use the statement: System.exit(-1); } else { total_reglas = tamPoblacion; //We do here the algorithm's operations Randomize.setSeed(semilla); Initial_fuzzy_partition(); System.out.println("Creating the initial population."); initializePopulation(); evaluatePopulation(); fitness_mejor_pob = Training_accuracy(); MejorPoblacion.clear(); Poblacion2.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(Poblacion.get(i)); MejorPoblacion.add(i, indi); Individual indi2 = new Individual(Poblacion.get(i)); Poblacion2.add(i, indi2); } n_eval = 0; System.out.println("Starting the evolutionary process."); while (n_eval < num_evaluaciones) { Poblacion.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(Poblacion2.get(i)); Poblacion.add(i, indi); } Hijos.clear(); son = 0; while (son < nRep) { Selection(); Crossover(Poblacion.get(mum), Poblacion.get(dad)); Mutation(); Class_and_certainty_degree(Hijos.get(son)); son++; Mutation(); Class_and_certainty_degree(Hijos.get(son)); son++; } Replace_rules(); evaluatePopulation(); fitness = Training_accuracy(); if (fitness > fitness_mejor_pob) { fitness_mejor_pob = fitness; MejorPoblacion.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(Poblacion.get(i)); MejorPoblacion.add(i, indi); } } Poblacion2.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(Poblacion.get(i)); Poblacion2.add(i, indi); } } Poblacion.clear(); for (int i = 0; i < tamPoblacion; i++) { Individual indi = new Individual(MejorPoblacion.get(i)); Poblacion.add(i, indi); } Delete_rules_fitness_zero(); Delete_rules_certainty_degree_zero(); Num_var = Count_variables(); Num_cond = Count_conditions(); porcen1 = Training_accuracy(); porcen2 = Test_accuracy(); String salida0 = new String(""); salida0 += Print_Partitions(); Files.writeFile(outputBD, salida0); String salida = new String(""); salida += Print_Population(); salida += "Training Accuracy:\t" + porcen1 + "%\n"; salida += "Test Accuracy:\t\t" + porcen2 + "%\n\n"; Files.writeFile(outputBR, salida); doOutput(this.val, this.outputTr); doOutput(this.test, this.outputTst); System.out.println("Algorithm Finished."); } } /** * <p> * It creates the initial fuzzy partition (using triangular fuzzy sets) * </p> */ void Initial_fuzzy_partition() { int i, j, k; double dist; k = 0; for (i = 0; i < entradas; i++) { dist = train.getMax(i) - train.getMin(i); dist /= (numEtiquetas - 1); Particiones_difusas[k] = train.getMin(i) - dist; Particiones_difusas[k + 1] = train.getMin(i); Particiones_difusas[k + 2] = Particiones_difusas[k + 1] + dist; k += 3; for (j = 0; j < (numEtiquetas - 1); j++) { Particiones_difusas[k] = Particiones_difusas[k - 2]; Particiones_difusas[k + 1] = Particiones_difusas[k - 1]; Particiones_difusas[k + 2] = Particiones_difusas[k - 1] + dist; k += 3; } } } /** * <p> * It initializes each individual in the population * </p> */ void initializePopulation() { int i, j; for (j = 0; j < tamPoblacion; j++) { for (i = 0; i < entradas; i++) { Poblacion.get(j).Arbol[i] = Random_Label(); } Class_and_certainty_degree(Poblacion.get(j)); } } /** * <p> * It randomly obtains a label number for a given variable or the Don't Care label * </p> * @return The label number or -1 if the Don't Care label is selected */ int Random_Label() { int result; double u; u = Randomize.Rand(); if (u < probDontCare) { result = -1; } else { result = Randomize.RandintClosed(0, numEtiquetas-1); } return (result); } /** * <p> * It heuristically calculate the best class for the individual, and also calculate its certainty degree * </p> */ void Class_and_certainty_degree(Individual indiv) { int i, j, clase; double max, max2, grado, grado3, grado4; double[] grado2; grado2 = new double[nClasses]; for (j = 0; j < nClasses; j++) { grado2[j] = 0.0; } for (i = 0; i < train.getnData(); i++) { grado = Product_Matching_degree(indiv, train.getExample(i)); clase = train.getOutputAsInteger(i); grado2[clase] += grado; } max = 0.0; max2 = 0.0; for (j = 0; j < nClasses; j++) { if (grado2[j] > max) { max2 = max; max = grado2[j]; indiv.clase = j; } } if ((max == max2) || (max == 0.0)) { indiv.clase = -1; } if (indiv.clase != -1) { grado = grado3 = grado4 = 0.0; for (j = 0; j < nClasses; j++) { if (j != indiv.clase) { grado += grado2[j]; } else { grado4 = grado2[j]; } grado3 += grado2[j]; } grado /= (nClasses - 1); if (grado3 > 0.0) { indiv.grado_certeza = ((grado4 - grado) / grado3); } else { indiv.grado_certeza = 0.0; } } else { indiv.grado_certeza = 0.0; } } /** * <p> * It calculate the matching degree between the antecedent of the rule and a given example (using the product t-norm) * </p> * @param indiv Individual The individual representing a fuzzy rule * @param ejemplo double [] A given example * @return double The matching degree between the example and the antecedent of the rule */ double Product_Matching_degree(Individual indiv, double[] ejemplo) { int variable, etiqueta, pos; double result, valor_ejemplo, grado, x0, x1, x2; result = 1.0; for (variable = 0; variable < entradas; variable++) { grado = 0.0; etiqueta = indiv.Arbol[variable]; valor_ejemplo = ejemplo[variable]; if (etiqueta != -1) { pos = (variable * (numEtiquetas * 3)); pos += (etiqueta * 3); x0 = Particiones_difusas[pos]; x1 = Particiones_difusas[pos + 1]; x2 = Particiones_difusas[pos + 2]; if ((valor_ejemplo > x0) && (valor_ejemplo < x2)) { if (valor_ejemplo < x1) { grado = ((valor_ejemplo - x0) / (x1 - x0)); } else { if (valor_ejemplo > x1) { grado = (1 - ((valor_ejemplo - x1) / (x2 - x1))); } else { grado = 1.0; } } } else { grado = 0.0; } } else { grado = 1.0; } result *= grado; if (result == 0.0) { variable = entradas; } } return (result); } /** * <p> * It evaluates each individual in the population * </p> */ void evaluatePopulation() { int i, j, mejor, clase; double max, grado; for (j = 0; j < tamPoblacion; j++) { Poblacion.get(j).fitness = 0; } for (i = 0; i < train.getnData(); i++) { max = 0.0; mejor = -1; clase = -1; for (j = 0; j < tamPoblacion; j++) { grado = Product_Matching_degree(Poblacion.get(j), train.getExample(i)); grado *= Poblacion.get(j).grado_certeza; if (grado > max) { mejor = j; clase = Poblacion.get(j).clase; max = grado; } } if (clase != -1) { if (clase == train.getOutputAsInteger(i)) { Poblacion.get(mejor).fitness++; } } } n_eval += tamPoblacion; } /** * <p> * It calculate the correct percentage accuracy in training examples * </p> */ double Training_accuracy() { int i, j, k, n_pos, num_max, clases_distintas, clase; double porcen, max, grado; int[] pos_max; pos_max = new int[tamPoblacion]; n_pos = 0; for (i = 0; i < train.getnData(); i++) { max = -1.0; num_max = 0; for (j = 0; j < total_reglas; j++) { grado = Product_Matching_degree(Poblacion.get(j), train.getExample(i)); grado *= Poblacion.get(j).grado_certeza; if (grado >= max) { if (grado > max) { num_max = 0; } pos_max[num_max] = j; num_max++; max = grado; } } if(max > 0.0){ clases_distintas = 0; clase = Poblacion.get(pos_max[0]).clase; for (j = 1; j < num_max; j++) { if (clase != Poblacion.get(pos_max[j]).clase) { clases_distintas = 1; j = num_max; } } if (clases_distintas == 0) { if (clase == train.getOutputAsInteger(i)) { n_pos++; } } } } porcen = ((double) n_pos / train.getnData()) * 100.0; return (porcen); } /** * <p> * It calculate the correct percentage accuracy in test examples * </p> */ double Test_accuracy() { int i, j, k, n_pos, num_max, clases_distintas, clase; double porcen, max, grado; int[] pos_max; pos_max = new int[tamPoblacion]; n_pos = 0; for (i = 0; i < test.getnData(); i++) { max = -1.0; num_max = 0; for (j = 0; j < total_reglas; j++) { grado = Product_Matching_degree(Poblacion.get(j), test.getExample(i)); grado *= Poblacion.get(j).grado_certeza; if (grado >= max) { if (grado > max) { num_max = 0; } pos_max[num_max] = j; num_max++; max = grado; } } if(max > 0.0){ clases_distintas = 0; clase = Poblacion.get(pos_max[0]).clase; for (j = 1; j < num_max; j++) { if (clase != Poblacion.get(pos_max[j]).clase) { clases_distintas = 1; j = num_max; } } if (clases_distintas == 0) { if (clase == test.getOutputAsInteger(i)) { n_pos++; } } } } porcen = ((double) n_pos / test.getnData()) * 100.0; return (porcen); } /** * <p> * It selects two parents to participate in the evolutionary process (by rank based roulette wheel selection). * </p> */ void Selection() { int i, j, k; double sum_fitness, peor_fitness, u; double[] Ruleta; Ruleta = new double[tamPoblacion]; Collections.sort(Poblacion); peor_fitness = Poblacion.get(tamPoblacion - 1).fitness; sum_fitness = 0.0; for (i = 0; i < tamPoblacion; i++) { sum_fitness += (Poblacion.get(i).fitness - peor_fitness); } if (sum_fitness > 0.0) { for (i = 0; i < tamPoblacion; i++) { if (i == 0) { Ruleta[i] = ((Poblacion.get(i).fitness - peor_fitness) / sum_fitness); } else { Ruleta[i] = Ruleta[i - 1] + ((Poblacion.get(i).fitness - peor_fitness) / sum_fitness); } } for (k = 0; k < 2; k++) { u = Randomize.RanddoubleClosed(0.0, Ruleta[tamPoblacion - 1]); i = 0; for (j = 0; j < tamPoblacion; j++) { for (; Ruleta[i] < u; i++) { ; } if (k == 0) { mum = i; } else { dad = i; } } } } else { mum = Randomize.RandintClosed(0, tamPoblacion-1); do { dad = Randomize.RandintClosed(0, tamPoblacion-1); } while (mum == dad); } } /** * <p> * It applies a crossover genetic operator between individual in position "madre" and "padre" in the population. * The new generated children (2 descendants) are added in a population of descendants * </p> * @param madre int Parent number 1 is in position "madre" in the population * @param padre int Parent number 2 is in position "padre" in the population */ void Crossover(Individual Madre, Individual Padre) { int i; Individual Hijo1 = new Individual(entradas); Individual Hijo2 = new Individual(entradas); if (Randomize.Rand() < probCross) { for (i = 0; i < entradas; i++) { if (Randomize.Rand() < 0.5) { Hijo1.Arbol[i] = Madre.Arbol[i]; Hijo2.Arbol[i] = Padre.Arbol[i]; } else { Hijo2.Arbol[i] = Madre.Arbol[i]; Hijo1.Arbol[i] = Padre.Arbol[i]; } } } else { for (i = 0; i < entradas; i++) { Hijo1.Arbol[i] = Madre.Arbol[i]; Hijo2.Arbol[i] = Padre.Arbol[i]; } } Hijos.add(son, Hijo1); Hijos.add(son + 1, Hijo2); } /** * <p> * It applies mutation genetic operator * </p> */ void Mutation() { int i, valor_old, valor_new; Individual Hijo = new Individual(entradas); for (i = 0; i < entradas; i++) { if (Randomize.Rand() < probMut) { valor_old = Hijos.get(son).Arbol[i]; do { valor_new = Random_Label(); } while (valor_old == valor_new); Hijo.Arbol[i] = valor_new; } else { Hijo.Arbol[i] = Hijos.get(son).Arbol[i]; } } Hijos.add(son, Hijo); } /** * <p> * It replaces the nRep (a prefixed number of rules) worst individual in the population by the new generated descendants * </p> */ void Replace_rules() { int i, j, tam, pos; if (nRep < tamPoblacion) { tam = nRep; pos = tamPoblacion - nRep; } else { tam = tamPoblacion; pos = 0; } for (i = pos, j = 0; i < (pos + tam); i++, j++) { Poblacion.add(i, Hijos.get(j)); } } /** * <p> * It deletes those rules in the population which their fitness equal to zero * </p> */ void Delete_rules_fitness_zero() { int i, pos; Collections.sort(Poblacion); pos = total_reglas; if (Poblacion.get(0).fitness != 0) { for (i = tamPoblacion - 1; i >= 0; i--) { if (Poblacion.get(i).fitness != 0) { pos = i + 1; i = -1; } } } else { pos = 0; } total_reglas = pos; } /** * <p> * It deletes those rules in the population which their certainty degree equal to zero * </p> */ void Delete_rules_certainty_degree_zero() { int i; Poblacion2.clear(); for (i = 0; i < total_reglas; i++) { if(Poblacion.get(i).grado_certeza != 0.0){ Individual indi = new Individual(Poblacion.get(i)); Poblacion2.add(i, indi); } } Poblacion.clear(); for (i = 0; i < Poblacion2.size(); i++) { Individual indi = new Individual(Poblacion2.get(i)); Poblacion.add(i, indi); } total_reglas = Poblacion.size(); } /** * <p> * It counts the number of variables in all the rule set * </p> * @return int The number of variables used in all the rules of the learned rule set */ int Count_variables() { int i, j, num; num = 0; for (i = 0; i < total_reglas; i++) { for (j = 0; j < entradas; j++) { if (Poblacion.get(i).Arbol[j] != -1) { num++; } } } return (num); } /** * <p> * It counts the number of conditions (labels) in all the rule set * </p> * @return int The number of conditions (labels) used in all the rules of the learned rule set */ int Count_conditions() { int i, j, num; num = 0; for (i = 0; i < total_reglas; i++) { for (j = 0; j < entradas; j++) { if (Poblacion.get(i).Arbol[j] != -1) { num++; } } } return (num); } /** * <p> * It prints the current population as a String * </p> * @return String The current population as a String */ String Print_Population() { int i, j, antes; String output = new String(""); Collections.sort(Poblacion); output += "\nRule base with " + total_reglas + " rules, " + Num_var + " variables and " + Num_cond + " labels.\n"; output += "-----------------------------------------------------------------------------------\n\n"; for (i = 0; i < total_reglas; i++) { antes = 0; output += "Rule " + (i + 1) + ":\n\n"; output += "If "; for (j = 0; j < entradas; j++) { if (Poblacion.get(i).Arbol[j] != -1) { if (antes == 1) { output += "and "; } antes = 1; output += "\"X" + (j + 1) + " is "; output += "Label " + (Poblacion.get(i).Arbol[j] + 1) + "\" "; } } if (Poblacion.get(i).clase != -1) { output += "then Class is " + train.getOutputValue(Poblacion.get(i).clase) + " "; } else { output += "then Class is \"Empty\" "; } output += "with certainty degree " + Poblacion.get(i).grado_certeza + "\n\n"; output += "Fitness: " + Poblacion.get(i).fitness + "\n"; output += "\n\n"; } output += "\n"; return (output); } /** * <p> * It prints the fuzzy partition as a String * </p> * @return String The fuzzy partition as a String */ String Print_Partitions() { int i, j, k; String output = new String(""); output += "Fuzzy Partition:\n"; output += "----------------\n"; k = 0; for (i = 0; i < entradas; i++) { output += "\nVariable X" + (i + 1) + ":\n\n"; for (j = 0; j < numEtiquetas; j++) { output += "\tLabel" + (j + 1) + ":\t (" + Particiones_difusas[k] + ", " + Particiones_difusas[k + 1] + ", " + Particiones_difusas[k + 2] + ")\n"; k += 3; } } output += "\n----------------------------------------------\n\n\n\n"; return (output); } /** * <p> * It generates the output file from a given dataset and stores it in a file * </p> * @param dataset myDataset input dataset * @param filename String the name of the file */ private void doOutput(myDataset dataset, String filename) { int i, j, k, n_pos, num_max, clases_distintas, clase; double porcen, max, grado; int[] pos_max; String output = new String(""); int aciertos = 0; pos_max = new int[tamPoblacion]; output = dataset.copyHeader(); //we insert the header in the output file //We write the output for each example for (i = 0; i < dataset.getnData(); i++) { max = -1.0; num_max = 0; for (j = 0; j < total_reglas; j++) { grado = Product_Matching_degree(Poblacion.get(j), dataset.getExample(i)); grado *= Poblacion.get(j).grado_certeza; if (grado >= max) { if (grado > max) { num_max = 0; } pos_max[num_max] = j; num_max++; max = grado; } } if(max > 0.0){ clases_distintas = 0; clase = Poblacion.get(pos_max[0]).clase; for (j = 1; j < num_max; j++) { if (clase != Poblacion.get(pos_max[j]).clase) { clases_distintas = 1; j = num_max; } } if (clases_distintas == 0) { output += dataset.getOutputAsString(i) + " " + dataset.getOutputValue(clase) + "\n"; if (clase == dataset.getOutputAsInteger(i)) { aciertos++; } } else{ output += dataset.getOutputAsString(i) + " ?\n"; } } else{ output += dataset.getOutputAsString(i) + " ?\n"; } } System.out.println("" + 1.0 * aciertos / dataset.getnData()); Files.writeFile(filename, output); } }