/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Juli�n Luengo Mart�n 02/03/2007 * @version 0.2 * @since JDK 1.5 * </p> */ package keel.Algorithms.Genetic_Rule_Learning.ILGA; import org.core.Files; import org.core.Randomize; import keel.Algorithms.Preprocess.Basic.KNN; import keel.Dataset.*; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; /** * <p> * This class contains the main body of the ILGA algorithm, presented by: * </p> * <p> * Guan, S.-U., Zhu, F. An incremental approach to genetic-algorithms-based classification. IEEE Transactions on Systems, Man, and Cybernetics, Part B: Cybernetics 35 (2), pp. 227-239 * </p> */ public class Ilga { static int IS1 = 1; static int IS2 = 2; static int IS3 = 3; static int IS4 = 4; int long_poblacion; int n_genes; int nAtt; double prob_mutacion = 0.01; double crossoverRate = 1.0; int numberRules; int Mu_next; int stagnationLimit = 30; int generationLimit = 60; double survivorsPercent = 0.5; double mutationRedRate = 0.6; double crossoverRedRate = 0.6; double bestCR = -1; RuleSet poblacion[]; RuleSet previousPob[]; RuleSet intermediatePob[]; int incrementalStrategy; InstanceSet IS; InstanceSet ISval; InstanceSet IStest; String input_train_name = new String(); String input_validation_name; String input_test_name = new String(); String output_train_name = new String(); String output_test_name = new String(); String method_output; String attributeOrdering; long seed; static int attributeOrder[] = new int[Attributes.getInputNumAttributes()]; /** * <p> * Default constructor * </p> */ public Ilga(){ poblacion = null; } /** * Constructor for the KEEL parameter file * @param paramfile the file with the parameters of this method */ public Ilga(String paramfile){ config_read(paramfile); Randomize.setSeed(seed); poblacion = new RuleSet[long_poblacion]; nAtt = 0; n_genes = numberRules*(3*nAtt+1); try { IS = new InstanceSet(); IStest = new InstanceSet(); ISval = new InstanceSet(); // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); ISval.readSet(input_validation_name, false); IStest.readSet(input_test_name,false); attributeOrder = new int[Attributes.getInputNumAttributes()]; } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } for(int i=0;i<long_poblacion;i++){ poblacion[i] = new RuleSet(numberRules,0); } } /** * Read the pattern file, and parse data into strings * @param fileParam the file with the parameters */ private void config_read(String fileParam) { File inputFile = new File(fileParam); if (inputFile == null || !inputFile.exists()) { System.out.println("parameter " + fileParam + " file doesn't exists!"); System.exit(-1); } // begin the configuration read from file try { FileReader file_reader = new FileReader(inputFile); BufferedReader buf_reader = new BufferedReader(file_reader); // FileWriter file_write = new FileWriter(outputFile); String line; do { line = buf_reader.readLine(); } while (line.length() == 0); // avoid empty lines for processing // -> // produce exec failure String out[] = line.split("algorithm = "); // alg_name = new String(out[1]); //catch the algorithm name // input & output filenames do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("inputData = "); out = out[1].split("\\s\""); input_train_name = new String(out[0].substring(1,out[0].length() - 1)); input_validation_name = new String(out[1].substring(0,out[2].length() - 1)); input_test_name = new String(out[2].substring(0,out[2].length() - 1)); if (input_validation_name.charAt(input_validation_name.length() - 1) == '"') input_validation_name = input_validation_name.substring(0, input_validation_name .length() - 1); if (input_test_name.charAt(input_test_name.length() - 1) == '"') input_test_name = input_test_name.substring(0, input_test_name .length() - 1); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("outputData = "); out = out[1].split("\\s\""); output_train_name = new String(out[0].substring(1, out[0].length() - 1)); output_test_name = new String(out[1].substring(0, out[1].length() - 1)); method_output = new String(out[2].substring(0,out[2].length() - 1)); method_output = method_output.trim(); if (method_output.charAt(method_output.length() - 1) == '"') method_output = method_output.substring(0, method_output.length() - 1); if (output_test_name.charAt(output_test_name.length() - 1) == '"') output_test_name = output_test_name.substring(0, output_test_name.length() - 1); // parameters do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("seed = "); seed = (new Long(out[1])).longValue(); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("ProbMutation = "); prob_mutacion = (new Double(out[1])).doubleValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("CrossoverRate = "); crossoverRate = (new Double(out[1])).doubleValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("popSize = "); long_poblacion = (new Integer(out[1])).intValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("ruleNumber = "); numberRules = (new Integer(out[1])).intValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("stagnationLimit = "); stagnationLimit = (new Integer(out[1])).intValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("generationLimit = "); generationLimit = (new Integer(out[1])).intValue(); // parse the string into do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("SurvivorsPercent = "); survivorsPercent = (new Double(out[1])).doubleValue(); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("mutationRedRate = "); mutationRedRate = (new Double(out[1])).doubleValue(); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("crossoverRedRate = "); crossoverRedRate = (new Double(out[1])).doubleValue(); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("AttributeOrder = "); attributeOrdering = (new String(out[1])); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("incrementalStrategy = "); incrementalStrategy = (new Integer(out[1])).intValue(); // parse the string into file_reader.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } } /** * Writes the output in KEEL format * @param fileName output file * @param instancesIN output from instances of the input data set * @param instancesOUT class of classified instances * @param inputs the input attributes * @param output the output attribute * @param nInputs number of input attributes * @param relation data set name */ public static void writeOutput(String fileName, String instancesIN[], String instancesOUT[], Attribute inputs[], Attribute output, int nInputs, String relation) { String cadena = ""; int i, j, k; int aux; /* Printing input attributes */ cadena += "@relation " + relation + "\n"; for (i = 0; i < nInputs; i++) { cadena += "@attribute " + inputs[i].getName() + " "; if (inputs[i].getType() == Attribute.NOMINAL) { cadena += "{"; for (j = 0; j < inputs[i].getNominalValuesList().size(); j++) { cadena += (String) inputs[i].getNominalValuesList() .elementAt(j); if (j < inputs[i].getNominalValuesList().size() - 1) { cadena += ", "; } } cadena += "}\n"; } else { if (inputs[i].getType() == Attribute.INTEGER) { cadena += "integer"; cadena += " [" + String.valueOf((int) inputs[i] .getMinAttribute()) + ", " + String.valueOf((int) inputs[i] .getMaxAttribute()) + "]\n"; } else { cadena += "real"; cadena += " [" + String.valueOf(inputs[i].getMinAttribute()) + ", " + String.valueOf(inputs[i].getMaxAttribute()) + "]\n"; } } } /* Printing output attribute */ cadena += "@attribute " + output.getName() + " "; if (output.getType() == Attribute.NOMINAL) { cadena += "{"; for (j = 0; j < output.getNominalValuesList().size(); j++) { cadena += (String) output.getNominalValuesList().elementAt(j); if (j < output.getNominalValuesList().size() - 1) { cadena += ", "; } } cadena += "}\n"; } else { cadena += "integer [" + String.valueOf((int) output.getMinAttribute()) + ", " + String.valueOf((int) output.getMaxAttribute()) + "]\n"; } /* Printing the data */ cadena += "@data\n"; Files.writeFile(fileName, cadena); cadena = ""; for (i = 0; i < instancesIN.length; i++) { cadena += instancesIN[i] + " " + instancesOUT[i]; cadena += "\n"; } Files.addToFile(fileName, cadena); } /** * One-point crossover * @param cr1 index of parent 1 in poblation * @param cr2 index of parent 2 in poblation */ public void onePointCrossover(int cr1,int cr2){ RuleSet rule1 = poblacion[cr1]; RuleSet rule2 = poblacion[cr2]; int cutpoint; int cutpoint_rule; int cutpoint_variable; //there are 3*number of attribute elements, plus class value in each cromosome do{ cutpoint = Randomize.Randint(0, n_genes); cutpoint_rule = cutpoint/(3*nAtt+1); cutpoint_variable = cutpoint%(3*nAtt+1); }while(cutpoint_variable != (nAtt-1) && Randomize.Rand()>crossoverRedRate); //rule1 is replaced from cutpoint (inclusive) to the end of his rule set rule1.copyFromPointtoEnd(rule2, cutpoint_rule, cutpoint_variable); //rule2 is replaced from the begining of his rule set to cutpoint (not inclusive) rule2.copyFromBegintoPoint(rule1, cutpoint_rule, cutpoint_variable); //childs must be evaluated rule1.setEvaluated(false); rule2.setEvaluated(false); } /** * It performs a one point crossover in the new poblation, using adjacent chromosomes as parents */ public void crossOver(){ for(int i=0;i<long_poblacion;i=i+2){ if(Randomize.Rand() < crossoverRate && i+1 < long_poblacion) onePointCrossover(i,i+1); } } /** * Copy the survivorsPercent proportion of the old poblation into the bottom half of * the new one */ public void elitism(){ int parentspreserved = (int)(long_poblacion*survivorsPercent); // we keep the best parents and sons // Arrays.sort(poblacion,Collections.reverseOrder()); for(int i=parentspreserved,j=0;i<long_poblacion;i++,j++){ poblacion[i] = previousPob[j]; } } /** * Applies mutation in the new poblation */ public void mutate(){ int posiciones, i, j; double m; posiciones=n_genes*long_poblacion; if (prob_mutacion>0) while (Mu_next<posiciones){ /* Se determina el cromosoma y el gen que corresponden a la posicion que se va a mutar */ i=Mu_next/n_genes; j=Mu_next%n_genes; /* Se efectua la mutacion sobre ese gen */ if((j%(3*nAtt+1))/3 != (nAtt-1) && Randomize.Rand()<mutationRedRate) poblacion[i].mutate(j); /* Se marca el cromosoma mutado para su posterior evaluacion */ poblacion[i].setEvaluated(false); /* Se calcula la siguiente posicion a mutar */ if (prob_mutacion<1) { m = Randomize.Rand(); Mu_next += Math.ceil (Math.log(m) / Math.log(1.0 - prob_mutacion)); } else Mu_next += 1; } Mu_next -= posiciones; } /** * Applies a roulette wheel selection */ public void selection(){ RuleSet temp[]; double probability[] = new double [long_poblacion]; double total; double prob; int sel; temp = new RuleSet[long_poblacion]; //sort the poblation in order of fitness Arrays.sort(poblacion, Collections.reverseOrder()); probability[0] = poblacion[0].getFitness(); for(int i=1;i<long_poblacion;i++){ probability[i] = probability[i-1]+poblacion[i].getFitness(); } total = probability[long_poblacion-1]; for(int i=0;i<long_poblacion;i++){ probability[i] /= total; } for(int i=0;i<long_poblacion;i++){ prob = Randomize.Rand(); sel = -1; for(int j=0;j<long_poblacion && sel==-1;j++){ if(probability[j]>prob) sel = j; } temp[i] = new RuleSet(poblacion[sel]); } previousPob = poblacion; poblacion = temp; } /** * Applies a tournament selection, with tournament size of 2 */ public void tournament_selection(){ int i, j, k, mejor_torneo; int tam_torneo = 2; int Torneo[] = new int[tam_torneo]; boolean repetido; RuleSet sample[] = new RuleSet[long_poblacion]; for (i=0;i<long_poblacion;i++){ Torneo[0] = Randomize.Randint(0,long_poblacion); mejor_torneo=Torneo[0]; for (j=1;j<tam_torneo;j++) { do { Torneo[j] = Randomize.Randint(0,long_poblacion); repetido=false; k=0; while ((k<j) && (!repetido)){ if (Torneo[j]==Torneo[k]) repetido=true; else k++; } } while (repetido); if (poblacion[Torneo[j]].fitness > poblacion[mejor_torneo].fitness) mejor_torneo=Torneo[j]; } sample[i] = new RuleSet(poblacion[mejor_torneo]); } previousPob = poblacion; poblacion = sample; } /** * Its evaluate the NEW poblation, with the train data */ public void evaluate(){ double fitness_train,fitness_test; for(int j=0;j<long_poblacion;j++){ fitness_train = poblacion[j].classify(IS); // poblacion[j].setEvaluated(false); // fitness_test = poblacion[j].classify(IStest); // poblacion[j].fitness = (fitness_train+fitness_test)/2.0; } Arrays.sort(poblacion,Collections.reverseOrder()); } /** * @param sem the SEM model evolved for the actual attribute */ public void IGA(SEM sem, int whichSEM){ int sel; RuleSet previousBestPoblation = new RuleSet(poblacion[0]); for(int i=0;i<long_poblacion;i++){ //SEM size is long_poblacion/2 sel = Randomize.Randint(0, long_poblacion/2); if(this.incrementalStrategy == Ilga.IS1){ poblacion[i] = new RuleSet(previousBestPoblation); poblacion[i].IS3(whichSEM); } else if(this.incrementalStrategy == Ilga.IS2){ poblacion[i] = new RuleSet(previousBestPoblation); poblacion[i].IS4(sem.getChromosome(sel)); } else if(this.incrementalStrategy == Ilga.IS3) poblacion[i].IS3(whichSEM); else if(this.incrementalStrategy == Ilga.IS4) poblacion[i].IS4(sem.getChromosome(sel)); poblacion[i].setEvaluated(false); } } /** * Runs the ILGA algorithm, with first creates and evolve a single SEM for * each attribute. The next step comprises the selection of a attribute order * for integration in an incremental way. * For each attribte, we use the SEM evolved for it, and integrate its 1 attribute rules * in the OIGA ones, using the selected IGA approach. * Each integration implies an GA execution for adapting the rules. * */ public void run(){ boolean endCondition = false; AttributeCR attCR[] = new AttributeCR[Attributes.getInputNumAttributes()]; SEM selector[] = new SEM[Attributes.getInputNumAttributes()]; int gen = 0; int stagnation = 0; RuleSet swp; String instanciasIN[]; String instanciasOUT[]; int pos; //set the initial order of the attributes for(int i=0;i<Attributes.getInputNumAttributes();i++){ //set the first attribute to the current, in order to be used by //the SEM attributeOrder[0] = i; attCR[i] = new AttributeCR(i,0); selector[i] = new SEM(long_poblacion/2,numberRules/2,i,IS); selector[i].setGenerationLimit(generationLimit/2); selector[i].IStest = this.IStest; selector[i].prob_mutacion = this.prob_mutacion; selector[i].run(); attCR[i].CR = selector[i].getCR(); System.out.println("SEM ["+i+"] CR: "+selector[i].getCR()); } //now set the attributeOrder vector, with the attribute order for the IGA if(attributeOrdering.compareTo("descendent")==0) Arrays.sort(attCR,Collections.reverseOrder()); if(attributeOrdering.compareTo("ascendent")==0) Arrays.sort(attCR); for(int i=0;i<Attributes.getInputNumAttributes();i++){ if(attributeOrdering.compareTo("original")==0) attributeOrder[i] = i; if(attributeOrdering.compareTo("random")==0){ boolean found = false; pos = Randomize.Randint (0, Attributes.getInputNumAttributes()); for(int j=i-1;j>=0 && !found;j--){ if(attributeOrder[j] == pos) found = true; } while(found){ pos = (pos+1)%Attributes.getInputNumAttributes(); found = false; for(int j=i-1;j>=0 && !found;j--){ if(attributeOrder[j] == pos) found = true; } } attributeOrder[i] = pos; } if(attributeOrdering.compareTo("descendent")==0) attributeOrder[i] = attCR[i].attribute; if(attributeOrdering.compareTo("ascendent")==0) attributeOrder[i] = attCR[i].attribute; } for(int i=0;i<Attributes.getInputNumAttributes();i++){ //integrate the rules of the SEM IGA(selector[attributeOrder[i]],attributeOrder[i]); nAtt++; n_genes = numberRules*(3*nAtt+1); //Evolve the entire rule set with new attributes bestCR = -1; stagnation = 0; endCondition = false; gen = 0; evaluate(); while(!endCondition){ tournament_selection(); crossOver(); mutate(); elitism(); evaluate(); Arrays.sort(poblacion,Collections.reverseOrder()); gen++; if(bestCR!=poblacion[0].getFitness()) stagnation = 0; else stagnation++; if(gen>generationLimit || stagnation > stagnationLimit || poblacion[0].getFitness()==1.0) endCondition = true; bestCR = poblacion[0].getFitness(); } System.out.print("\nattribute ["+attributeOrder[i]+"] added. "+(gen-1)+"/"+generationLimit+" iterations used."); System.out.print(" CR = "+bestCR); } System.out.println("\nCR train: "+bestCR); for(int i=0;i<1;i++){ poblacion[i].setEvaluated(false); System.out.println("["+i+"] CR test: "+poblacion[i].classify(IStest)); } //Write Train file results instanciasIN = new String[ISval.getNumInstances()]; instanciasOUT = new String[ISval.getNumInstances()]; for(int i=0;i<ISval.getNumInstances();i++){ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); int claseObt = poblacion[0].classify(ISval.getInstance(i)); if(tipo!=Attribute.NOMINAL){ instanciasIN[i] = new String(String.valueOf(ISval.getInstance(i).getOutputNominalValues(0))); if(claseObt!=-1) instanciasOUT[i] = new String(String.valueOf(claseObt)); else instanciasOUT[i] = new String("?"); } else{ instanciasIN[i] = new String(ISval.getInstance(i).getOutputNominalValues(0)); if(claseObt!=-1) instanciasOUT[i] = new String(a.getNominalValue(claseObt)); else instanciasOUT[i] = new String("?"); } } writeOutput(output_train_name, instanciasIN, instanciasOUT, Attributes.getInputAttributes(), Attributes.getOutputAttributes()[0], Attributes.getInputNumAttributes(), Attributes.getRelationName()); //write test file results instanciasIN = new String[IStest.getNumInstances()]; instanciasOUT = new String[IStest.getNumInstances()]; for(int i=0;i<IStest.getNumInstances();i++){ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); int claseObt = poblacion[0].classify(IStest.getInstance(i)); if(tipo!=Attribute.NOMINAL){ instanciasIN[i] = new String(String.valueOf(IStest.getInstance(i).getOutputNominalValues(0))); if(claseObt!=-1) instanciasOUT[i] = new String(String.valueOf(claseObt)); else instanciasOUT[i] = new String("?"); } else{ instanciasIN[i] = new String(IStest.getInstance(i).getOutputNominalValues(0)); if(claseObt!=-1) instanciasOUT[i] = new String(a.getNominalValue(claseObt)); else instanciasOUT[i] = new String("?"); } } writeOutput(output_test_name, instanciasIN, instanciasOUT, Attributes.getInputAttributes(), Attributes.getOutputAttributes()[0], Attributes.getInputNumAttributes(), Attributes.getRelationName()); // write the obtained rules to disk printRules(); } /** * <p> * Print the rules to the file passed as parameters in the configuration file * </p> */ protected void printRules(){ Rule r; String cad = new String(); Attribute a; double lims[] = null; for(int i=0;i<poblacion[0].numberRules;i++){ r = poblacion[0].reglas[i]; cad += "IF "; for(int j=0;j<r.numAttributes;j++){ a = Attributes.getInputAttribute(j); for(int n=0;n<r.numAttributes;n++){ if(attributeOrder[n] == j) lims = r.getLimits(n); } if(a.getType()!=Attribute.NOMINAL) cad += String.valueOf(lims[0]) + " < " + a.getName() + " < " + String.valueOf(lims[1]); else{ cad += a.getName() + " in ["; for(int k=(int)lims[0];k<=(int)lims[1];k++){ if(k != (int)lims[0]) cad += ","; cad += a.getNominalValue(k); } cad += "]"; } if(j<r.numAttributes-1) cad += " AND "; } if(Attributes.getOutputAttribute(0).getType() == Attribute.NOMINAL) cad += " THEN " + Attributes.getOutputAttribute(0).getNominalValue(r.getClas()); else cad += " THEN " + r.getClas(); cad +="\n"; } Files.writeFile(method_output, cad); } }