/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.RE_SL_Methods.mogulHC; import java.io.*; import org.core.*; import java.util.*; import java.lang.Math; class Lear_m3 { public int MaxReglas; public double semilla, epsilon; public long Gen, n_generaciones; public String fich_datos_chequeo, fich_datos_tst, fich_datos_val; public String fichero_conf, fichero_inf, ruta_salida; public String fichero_br, fichero_reglas, fich_tra_obli, fich_tst_obli; public String informe = ""; public String cadenaReglas = ""; public MiDataset tabla, tabla_tst, tabla_val; public BaseR base_reglas; public BaseD base_datos; public Adap fun_adap; public AG alg_gen; public Est_evol ee; public Lear_m3(String f_e) { fichero_conf = f_e; } private String Quita_blancos(String cadena) { StringTokenizer sT = new StringTokenizer(cadena, "\t ", false); return (sT.nextToken()); } /** Reads the data of the configuration file */ public void leer_conf() { int i, j; String cadenaEntrada, valor; double porc_pob_ee, cruce, mutacion, K, omega, valor_a, valor_b, sigma; int tipo_fitness, tipo_nichos, long_poblacion, n_gen_ee, n_etiquetas; // we read the file in a String informe = ""; cadenaEntrada = Fichero.leeFichero(fichero_conf); StringTokenizer sT = new StringTokenizer(cadenaEntrada, "\n\r=", false); // we read the algorithm's name sT.nextToken(); sT.nextToken(); // we read the name of the training and test files sT.nextToken(); valor = sT.nextToken(); StringTokenizer ficheros = new StringTokenizer(valor, "\t ", false); fich_datos_chequeo = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); fich_datos_val = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); fich_datos_tst = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); // we read the name of the output files sT.nextToken(); valor = sT.nextToken(); ficheros = new StringTokenizer(valor, "\t ", false); fich_tra_obli = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); fich_tst_obli = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); fichero_reglas = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); fichero_inf = ( (ficheros.nextToken()).replace('\"', ' ')).trim(); ruta_salida = fichero_reglas.substring(0, fichero_reglas.lastIndexOf('/') + 1); // we read the seed of the random generator sT.nextToken(); valor = sT.nextToken(); semilla = Double.parseDouble(valor.trim()); Randomize.setSeed( (long) semilla); // we read the Number of Iterations sT.nextToken(); valor = sT.nextToken(); n_generaciones = Long.parseLong(valor.trim()); // we read the Evolutionary Strategy Iterations sT.nextToken(); valor = sT.nextToken(); n_gen_ee = Integer.parseInt(valor.trim()); // we read the Rate of the Population to which the ES is applied sT.nextToken(); valor = sT.nextToken(); porc_pob_ee = Double.parseDouble(valor.trim()); // we read the Population Size sT.nextToken(); valor = sT.nextToken(); long_poblacion = Integer.parseInt(valor.trim()); // we read the Parameter a sT.nextToken(); valor = sT.nextToken(); valor_a = Double.parseDouble(valor.trim()); // we read the Parameter b sT.nextToken(); valor = sT.nextToken(); valor_b = Double.parseDouble(valor.trim()); // we read the Omega parameter for the maching degree of the positive instances sT.nextToken(); valor = sT.nextToken(); omega = Double.parseDouble(valor.trim()); // we read the K parameter for the percentage of allowed negative instances sT.nextToken(); valor = sT.nextToken(); K = Double.parseDouble(valor.trim()); // we read the Epsilon parameter for the minimun maching degree required to the KB sT.nextToken(); valor = sT.nextToken(); this.epsilon = Double.parseDouble(valor.trim()); // we read the Type of Niches sT.nextToken(); valor = sT.nextToken(); tipo_nichos = Integer.parseInt(valor.trim()); // we read the Type of Fitness Function sT.nextToken(); valor = sT.nextToken(); tipo_fitness = Integer.parseInt(valor.trim()); // we read the Cross Probability sT.nextToken(); valor = sT.nextToken(); cruce = Double.parseDouble(valor.trim()); // we read the Mutation Probability sT.nextToken(); valor = sT.nextToken(); mutacion = Double.parseDouble(valor.trim()); // we read the Number of Labels sT.nextToken(); valor = sT.nextToken(); n_etiquetas = Integer.parseInt(valor.trim()); // we create all the objects tabla = new MiDataset(fich_datos_chequeo, true); tabla_val = new MiDataset(fich_datos_val, false); tabla_tst = new MiDataset(fich_datos_tst, false); base_datos = new BaseD(n_etiquetas, tabla.n_variables); for (i = 0; i < tabla.n_variables; i++) { base_datos.n_etiquetas[i] = n_etiquetas; base_datos.extremos[i].min = tabla.extremos[i].min; base_datos.extremos[i].max = tabla.extremos[i].max; } //MaxReglas = (new Double(Math.pow(n_etiquetas, tabla.n_var_estado))).intValue(); MaxReglas = 10000; base_reglas = new BaseR(MaxReglas, base_datos, tabla); fun_adap = new Adap(tabla, base_reglas, omega, K, tipo_fitness, tipo_nichos); alg_gen = new AG(long_poblacion, tabla.n_variables, cruce, mutacion, valor_a, valor_b, fun_adap); ee = new Est_evol(base_datos, fun_adap, alg_gen, porc_pob_ee, n_gen_ee); } public void run() { int i, j; double RCE, min_CR, min_CVR, ec, el, ec_tst, el_tst, PN, fitness; /* We read the configutate file and we initialize the structures and variables */ leer_conf(); if (tabla.salir == false) { /* we generate the semantics of the linguistic variables */ base_datos.Semantica(); /* we store the DB in the report file */ informe += "Initial Data Base: \n\n"; informe += base_datos.BDtoString() + "\n"; Fichero.escribeFichero(fichero_inf, informe); /* Inicialization of the counter */ base_reglas.n_reglas = 0; do { /* Generation of the initial population */ alg_gen.Initialize(base_reglas, base_datos, tabla); Gen = 0; /* Evaluation of the initial population inicial */ alg_gen.Evaluate(); Gen++; /* Main of the genetic algorithm */ do { /* Interchange of the new and old population */ alg_gen.Intercambio(); /* Selection by means of Baker */ alg_gen.Select(); /* Crossover */ alg_gen.Cruce_MMA_Simple(tabla.n_variables); /* Mutation */ alg_gen.Mutacion_Thrift_No_Uniforme(Gen, n_generaciones, base_datos); /* Elitist Selection */ alg_gen.Elitist(); /* Evaluation of the current population */ alg_gen.Evaluate(); /* Evolution Strategy */ ee.Estrategia_Evolucion(); /* we increment the counter */ Gen++; } while (Gen <= n_generaciones); fun_adap.CriteriosReglas(alg_gen.solucion()); fitness = fun_adap.F * fun_adap.G * fun_adap.g; if (fun_adap.tipo_fitness == 1) { PN = fun_adap.LNIR(alg_gen.solucion()); fitness *= PN; } else { fitness *= fun_adap.PC; } /* The rule is stored in the RB */ base_reglas.inserta_regla(alg_gen.solucion()); /* we calculate the matching degree of the rule with each example. the covered examples are marked */ for (i = 0; i < tabla.long_tabla; i++) { RCE = fun_adap.ReglaCubreEjemplo(alg_gen.solucion(), tabla.datos[i].ejemplo); tabla.datos[i].nivel_cubrimiento += RCE; tabla.datos[i].maximo_cubrimiento = Adap.Maximo(tabla.datos[i]. maximo_cubrimiento, RCE); if ( (tabla.datos[i].nivel_cubrimiento >= epsilon) && (tabla.datos[i].cubierto == 0)) { tabla.datos[i].cubierto = 1; tabla.no_cubiertos--; } } /* the GA finish when the condition is true */ } while (Parada() == 0); /* we calculate the minimum and maximum matching */ min_CR = 1.0; min_CVR = 10E37; for (i = 0; i < tabla.long_tabla; i++) { min_CR = Adap.Minimo(min_CR, tabla.datos[i].maximo_cubrimiento); min_CVR = Adap.Minimo(min_CVR, tabla.datos[i].nivel_cubrimiento); } /* we calcule the MSEs */ fun_adap.Error_tra(); ec = fun_adap.EC; el = fun_adap.EL; fun_adap.Error_tst(tabla_tst); ec_tst = fun_adap.EC; el_tst = fun_adap.EL; /* we write the RB */ cadenaReglas = base_reglas.BRtoString(); cadenaReglas += "\nMSEtra: " + ec + " MSEMtst: " + ec_tst + "\nMLEtra: " + el + " MLEtst: " + el_tst + "\nMinimum of C_R: " + min_CR + " Minimum of CV_R: " + min_CVR + "\n"; Fichero.escribeFichero(fichero_reglas, cadenaReglas); /* we write the obligatory output files*/ String salida_tra = tabla.getCabecera(); salida_tra += fun_adap.getSalidaObli(tabla_val); Fichero.escribeFichero(fich_tra_obli, salida_tra); String salida_tst = tabla_tst.getCabecera(); salida_tst += fun_adap.getSalidaObli(tabla_tst); Fichero.escribeFichero(fich_tst_obli, salida_tst); /* we write the MSEs in specific files */ Fichero.AnadirtoFichero(ruta_salida + "MogulHCcomunR.txt", "" + base_reglas.n_reglas + "\n"); Fichero.AnadirtoFichero(ruta_salida + "MogulHCcomunTRA.txt", "" + ec + "\n"); Fichero.AnadirtoFichero(ruta_salida + "MogulHCcomunTST.txt", "" + ec_tst + "\n"); } } /** Criterion of stop */ public int Parada() { if ( (tabla.no_cubiertos == 0) || (base_reglas.n_reglas == MaxReglas)) { //System.out.println("Cubiertos -> "+tabla.no_cubiertos); //System.out.println("Reglas -> "+base_reglas.n_reglas); return (1); } else { return (0); } } }