/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * NMEEFSD * Non-dominated Multi-objective Evolutionary algorithm for Extracting Fuzzy rules in Subgroup Discovery * </p> * <p> * Algorithm for the discovery of rules describing subgroups * @author Crist�bal J. Carmona * @version 1.0 * @since JDK1.5 * </p> */ package keel.Algorithms.Subgroup_Discovery.NMEEFSD.NMEEFSD; import keel.Algorithms.Subgroup_Discovery.NMEEFSD.Calculate.*; import keel.Dataset.*; import org.core.*; import java.util.Vector; import java.io.IOException; import java.io.FileNotFoundException; import java.text.DecimalFormat; import java.util.StringTokenizer; public class NMEEFSD { private static int seed; // Seed for the random generator private static String nombre_alg; // Algorithm Name private static boolean claseSelec; // Indicates if there is a selected class to run the algorithm or not private static String input_file_ref; // Input mandatory file training private static String input_file_tra; // Input mandatory file training private static String input_file_tst; // Input mandatory file test private static String output_file_tra; // Output file training private static String output_file_tst; // Output file test private static String rule_file; // Auxiliary output file for rules private static String measure_file; // Auxiliary output file for quality measures of the rules private static String seg_file; // Auxiliary output file for tracking private static String qmeasure_file; // Output quality measure file // Structures static InstanceSet Data; static TableVar Variables; // Set of variables of the dataset and their characteristics static TableDat Ejemplos; // Set of instances of the dataset static Genetic AG; // Genetic Algorithm /** * <p> * Auxiliar Gets the name for the output files, eliminating "" and skiping "=" * </p> * @param s String of the output files */ private static void GetOutputFiles(StringTokenizer s) { String val = s.nextToken(); output_file_tra = s.nextToken().replace('"',' ').trim(); output_file_tst = s.nextToken().replace('"',' ').trim(); rule_file = s.nextToken().replace('"',' ').trim(); measure_file = s.nextToken().replace('"',' ').trim(); seg_file = s.nextToken().replace('"',' ').trim(); qmeasure_file= s.nextToken().replace('"',' ').trim(); } /** * <p> * Auxiliar Gets the name for the input files, eliminating "" and skiping "=" * </p> * @param s String of the input files */ private static void GetInputFiles(StringTokenizer s) { String val = s.nextToken(); // skip "=" input_file_ref = s.nextToken().replace('"',' ').trim(); input_file_tra = s.nextToken().replace('"',' ').trim(); input_file_tst = s.nextToken().replace('"',' ').trim(); } /** * <p> * Reads the parameters from the file specified and stores the values * </p> * @param nFile Fichero of parameters */ public static void ReadParameters (String nFile) { claseSelec = false; String contents; try { int nl; String fichero, linea, tok; StringTokenizer lineasFichero, tokens; fichero = Files.readFile (nFile); fichero = fichero.toLowerCase() + "\n "; lineasFichero = new StringTokenizer(fichero,"\n\r"); for (nl=0, linea=lineasFichero.nextToken(); lineasFichero.hasMoreTokens(); linea=lineasFichero.nextToken()) { nl++; tokens = new StringTokenizer(linea," ,\t"); if (tokens.hasMoreTokens()) { tok = tokens.nextToken(); if (tok.equalsIgnoreCase("algorithm")) nombre_alg = Utils.getParamString(tokens); else if (tok.equalsIgnoreCase("inputdata")) GetInputFiles(tokens); else if (tok.equalsIgnoreCase("outputdata")) GetOutputFiles(tokens); else if (tok.equalsIgnoreCase("RulesRep")) AG.setRulesRep(Utils.getParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("StrictDominance")) AG.setStrictDominance(Utils.getParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("seed")) seed = Utils.getParamInt(tokens); else if (tok.equalsIgnoreCase("targetClass")) { Variables.setNameClassObj(Utils.getParamString(tokens)); claseSelec=true; } else if (tok.equalsIgnoreCase("nLabels")) Variables.setNLabel(Utils.getParamInt(tokens)); else if (tok.equalsIgnoreCase("nEval")) AG.setNEval(Utils.getParamInt(tokens)); else if (tok.equalsIgnoreCase("popLength")) AG.setLengthPopulation(Utils.getParamInt(tokens)); else if (tok.equalsIgnoreCase("crossProb")) AG.setProbCross(Utils.getParamFloat(tokens)); else if (tok.equalsIgnoreCase("mutProb")) AG.setProbMutation(Utils.getParamFloat(tokens)); else if (tok.equalsIgnoreCase("diversity")) AG.setDiversity(Utils.getParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("ReInitCob")) AG.setReInitCob(Utils.getParamString(tokens)); else if (tok.equalsIgnoreCase("porcCob")) AG.setPorcCob(Utils.getParamFloat(tokens)); else if (tok.equalsIgnoreCase("minCnf")) AG.setMinCnf(Utils.getParamFloat(tokens)); else if (tok.equalsIgnoreCase("Obj1")){ AG.setNumObjectives(3); AG.iniNObjectives(); AG.setNObjectives(0, Utils.getParamString(tokens).toUpperCase()); } else if (tok.equalsIgnoreCase("Obj2")) AG.setNObjectives(1, Utils.getParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("Obj3")){ String nil = Utils.getParamString(tokens); if(nil.toUpperCase().compareTo("NULL")!=0) AG.setNObjectives(2, nil.toUpperCase()); else AG.setNumObjectives(2); } else throw new IOException("Syntax error on line "+nl+": ["+tok+"]\n"); } } } catch(FileNotFoundException e) { System.err.println(e+" Parameter file"); } catch(IOException e) { System.err.println(e+"Aborting program"); System.exit(-1); } Files.writeFile(seg_file,""); contents = "--------------------------------------------\n"; contents+= "| Parameters Echo |\n"; contents+= "--------------------------------------------\n"; contents+= "Algorithm name: " + nombre_alg + "\n"; contents+= "Input file name training: " + input_file_tra + "\n"; contents+= "Rules file name: " + rule_file + "\n"; contents+= "Tracking file name: " + seg_file + "\n"; contents+= "Representation of the Rules: " + AG.getRulesRep() + "\n"; contents+= "Strict dominance: " + AG.getStrictDominance() + "\n"; contents+= "Random generator seed: " + seed + "\n"; contents+= "Selected class of the target variable: "; if (claseSelec) contents+= Variables.getNameClassObj() + "\n"; else contents+= "not established\n"; contents+= "Number of labels for the continuous variables: " + Variables.getNLabel() + "\n"; contents+= "Number of evaluations: " + AG.getNEval() + "\n"; contents+= "Number of individuals in the Population: " + AG.getLengthPopulation() + "\n"; contents+= "Cross probability: " + AG.getProbCross() + "\n"; contents+= "Mutation probability: " + AG.getProbMutation() + "\n"; contents+= "Diversity: " + AG.getDiversity() + "\n"; contents+= "Perform ReInitCob: " + AG.getReInitCob() + "\n"; contents+= "Percentage of the ReInitCob: " + AG.getPorcCob() + "\n"; contents+= "Minimum confidence threshold: " + AG.getMinCnf() + "\n"; contents+= "Number of objetives: " + AG.getNumObjectives() + "\n"; for(int i=1; i<=AG.getNumObjectives(); i++){ contents+= "\tObjetive "+i+": " + AG.getNObjectives(i-1) + "\n"; } Files.addToFile(seg_file,contents); } /** * <p> * Read the dataset and stores the values * </p> */ public static void CaptureDataset () throws IOException { try { // Declaration of the dataset and load in memory Data = new InstanceSet(); Data.readSet(input_file_ref,true); // Check that there is only one output variable if (Attributes.getOutputNumAttributes()>1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs=false; if (Attributes.getOutputNumAttributes()<1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs=true; } // Chek that the output variable is nominal if (Attributes.getOutputAttribute(0).getType()!=Attribute.NOMINAL) { // If the output variables is not enumeratad, the algorithm can not be run try { throw new IllegalAccessException("Finish"); } catch( IllegalAccessException term) { System.err.println("Target variable is not a discrete one."); System.err.println("Algorithm can not be run."); System.out.println("Program aborted."); System.exit(-1); } } // Set the number of classes of the output attribute - this attribute must be nominal Variables.setNClass(Attributes.getOutputAttribute(0).getNumNominalValues()); // Screen output of the output variable and selected class System.out.println ( "Output variable: " + Attributes.getOutputAttribute(0).getName()); // Creates the space for the variables and load the values. Variables.Load (Attributes.getInputNumAttributes()); // Setting and file writing of fuzzy sets characteristics for continuous variables String nombreF = seg_file; Variables.InitSemantics (nombreF); // Creates the space for the examples and load the values Ejemplos.Load(Data,Variables); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } } /** * <p> * Dataset file writting to output file * </p> * @param filename Output file */ public static void WriteOutDataset (String filename) { String contents; contents = Data.getHeader(); contents+= Attributes.getInputHeader() + "\n"; contents+= Attributes.getOutputHeader() + "\n\n"; contents+= "@data \n"; Files.writeFile(filename, contents); } /** * <p> * Dataset file writting to tracking file * </p> * @param filename Tracking file */ public static void WriteSegDataset (String filename) { String contents="\n"; contents+= "--------------------------------------------\n"; contents+= "| Dataset Echo |\n"; contents+= "--------------------------------------------\n"; contents+= "Number of examples: " + Ejemplos.getNEx() + "\n"; contents+= "Number of variables: " + Variables.getNVars()+ "\n"; contents+= Data.getHeader() + "\n"; if (filename!="") Files.addToFile(filename, contents); } /** * <p> * Writes the rule and the quality measures * </p> * @param pob Actual population * @param nobj Number of objectives * @param fileRule Files of rules * @param fileQuality File of quality measures * @param nclase Number of the class * @param cab_measure_file File of header measure * @param vmarca Vector which marks if the rule is repeated */ static void WriteRule (Population pob, int nobj, String fileRule, String fileQuality, int nclase, String cab_measure_file, Vector vmarca) { String contents; int NumRules; int marca = 0; if(nclase==0){ Files.writeFile(fileRule, ""); Files.writeFile(fileQuality, cab_measure_file); } NumRules = -1; for(int aux=0; aux<pob.getNumIndiv(); aux++){ // Write the quality measures of the rule in "measure_file" QualityMeasures Result = new QualityMeasures(nobj); Result = pob.getIndiv(aux).getMeasures(); marca = (Integer) vmarca.get(aux); if((Result.getCnf()>AG.getMinCnf())&&(marca!=1)){ NumRules++; // Rule File contents = "GENERATED RULE " + NumRules + "\n"; contents+= "\tAntecedent\n"; //Canonical rules if(AG.getRulesRep().compareTo("CAN")==0){ CromCAN regla = pob.getIndivCromCAN(aux); for (int auxi=0; auxi<Variables.getNVars(); auxi++) { if (!Variables.getContinuous(auxi)) { // Discrete variable if (regla.getCromElem(auxi)<Variables.getNLabelVar(auxi)) { contents+= "\t\tVariable " + Attributes.getInputAttribute(auxi).getName() + " = " ; contents+= Attributes.getInputAttribute(auxi).getNominalValue(regla.getCromElem(auxi)) + "\n"; } } else { // Continuous variable if (regla.getCromElem(auxi)<Variables.getNLabelVar(auxi)) { contents+= "\t\tVariable " + Attributes.getInputAttribute(auxi).getName() + " = "; contents+= "Label " + regla.getCromElem(auxi); contents+= " \t (" + Variables.getX0(auxi,(int) regla.getCromElem(auxi)); contents+= " " + Variables.getX1(auxi,(int) regla.getCromElem(auxi)); contents+= " " + Variables.getX3(auxi,(int) regla.getCromElem(auxi)) +")\n"; } } } } else { //DNF rules CromDNF regla = pob.getIndivCromDNF(aux); for (int i=0; i<Variables.getNVars(); i++) { if (regla.getCromGeneElem(i,Variables.getNLabelVar(i))==true){ if (!Variables.getContinuous(i)) { // Discrete variable contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = " ; for (int j=0; j<Variables.getNLabelVar(i); j++) { if (regla.getCromGeneElem(i, j)==true) contents+= Attributes.getInputAttribute(i).getNominalValue(j) + " "; } contents+= "\n"; } else { // Continuous variable contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = "; for (int j=0; j<Variables.getNLabelVar(i); j++) { if (regla.getCromGeneElem(i, j)==true) { contents+= "Label " + j; contents+= " (" + Variables.getX0(i,j); contents+= " " + Variables.getX1(i,j); contents+= " " + Variables.getX3(i,j) +")\t"; } } contents+= "\n"; } } } } contents+= "\tConsecuent: " + Variables.getNameClassObj()+"\n\n"; Files.addToFile(fileRule, contents); DecimalFormat sixDecimals = new DecimalFormat("0.000000"); contents = "" + Variables.getNumClassObj(); for(int auxi=0; auxi<AG.getNumObjectives(); auxi++){ contents+= "\t"+sixDecimals.format(Result.getObjectiveValue(auxi)); } contents+= "\t"+sixDecimals.format(Result.getCnf())+ "\n"; Files.addToFile(fileQuality, contents); } } } /** * <p> * Main method of the algorithm * </p> **/ public static void main(String[] args) throws Exception { String contents; // String for the file contents String NameRule, NameMeasure; // String containing de original names for the rules and measures files boolean terminar = false; // Indicates no more repetition for the rule generation of diferent classes if (args.length != 1) { System.err.println("Syntax error. Usage: java AGI <parameterfile.txt>" ); return; } // Initial echo System.out.println("\nNMEEF-SD implementation"); Variables = new TableVar(); Ejemplos = new TableDat(); AG = new Genetic(); // Read parameter file and initialize parameters ReadParameters (args[0]); NameRule = rule_file; NameMeasure = measure_file; // Read the dataset, store values and echo to output and seg files CaptureDataset (); //WriteOutDataset(output_file); // Creates and writes the file WriteSegDataset (seg_file); // Create and initilize gain information array Variables.GainInit(Ejemplos, seg_file); // Screen output of same parameters System.out.println ("\nSeed: " + seed); // Random Seed System.out.println ("\nOutput variable: " + Attributes.getOutputAttribute(0).getName() ); // Output variable // Generation of rules for one class or all the classes if (claseSelec) { // If there is one class indicated as a parameter, only generate rules of thas class terminar = true; // Set the number and the name of the selected class of the output variable from its value Variables.setNumClassObj(-1); // To assure an invalid value if the class name in the param file is invalid for (int z=0; z<Variables.getNClass(); z++) { if (Attributes.getOutputAttribute(0).getNominalValue(z).equalsIgnoreCase(Variables.getNameClassObj())) { Variables.setNameClassObj(Attributes.getOutputAttribute(0).getNominalValue(z)); Variables.setNumClassObj(z); } } // If the value is invalid, generate rules for all the classes if (Variables.getNumClassObj()==-1) { System.out.println ( "Class name invalid (" + Variables.getNameClassObj() + "). Generate rules for all the classes"); claseSelec=false; terminar = false; Variables.setNumClassObj(0); } else System.out.println ( "Generate rules for class " + Variables.getNameClassObj() + " only"); } else{ // No class indicated, so generate rules of all the classes Variables.setNumClassObj(0); System.out.println ( "Generate rules for all the classes"); } // Initialize measure file String cab_measure_file; cab_measure_file = "--------------------------------------------\n"; cab_measure_file+= "| Measures file |\n"; cab_measure_file+= "--------------------------------------------\n\n"; cab_measure_file+= "MEASURES USED AS OBJECTIVES:\n\t"; for(int i=0; i<AG.getNumObjectives(); i++){ cab_measure_file+= AG.getNObjectives(i)+"\t"; } // Include in this header the measures to be written in the quality measures file cab_measure_file+= "\n\nCLASS"; for(int i=0; i<AG.getNumObjectives(); i++){ cab_measure_file+= "\t"+AG.getNObjectives(i).toUpperCase(); } cab_measure_file += "\tFCNF\n"; int nclase=0; do { // Initialization of random generator seed. Done after load param values if (seed!=0) Randomize.setSeed (seed); // If no class especified, define the class for each iteration if (!claseSelec) // Set the nominal value of the class Variables.setNameClassObj(Attributes.getOutputAttribute(0).getNominalValue(Variables.getNumClassObj())); // Tracking to file and "seg" file System.out.println ("\nTarget class number: " + Variables.getNumClassObj() + " (value " + Variables.getNameClassObj() + ")"); contents = "\n"; contents+= "--------------------------------------------\n"; contents+= "| Class "+Variables.getNumClassObj()+" |\n"; contents+= "--------------------------------------------\n\n"; Files.addToFile(seg_file, contents); // Set all the examples as not covered for (int ej=0; ej<Ejemplos.getNEx(); ej++) Ejemplos.setCovered (ej,false); // Set example to not covered // Load the number of examples of the target class Ejemplos.setExamplesClassObj(Variables.getNumClassObj()); // Variables Initialization //Ejemplos.setExamplesCovered(0); System.out.println("Processing"); Population result = AG.GeneticAlgorithm(Variables,Ejemplos,seg_file); Vector marcar; if(AG.getRulesRep().compareTo("CAN")==0){ marcar = AG.RemoveRepeatedCAN(result); } else { marcar = AG.RemoveRepeatedDNF(result,Variables); } WriteRule(result, AG.getNumObjectives(), NameRule, NameMeasure, nclase, cab_measure_file, marcar); // Termination Echo System.out.println("Target class terminated."); if (!claseSelec) { // No class indicated to generate de rules (generate rules for all the classes) // Set num_clase as the next class Variables.setNumClassObj(Variables.getNumClassObj()+1); // If there are no more classes, set terminar as true if (Variables.getNumClassObj()>=Variables.getNClass()) terminar = true; } nclase++; } while (terminar==false); System.out.println("Algorithm terminated\n"); System.out.println("--------------------\n"); System.out.println("Calculating values of the quality measures\n"); //Calculate the quality measures Files.writeFile(output_file_tra,Data.getHeader()); Files.writeFile(output_file_tst,Data.getHeader()); Calculate.Calculate(output_file_tra, output_file_tst, input_file_tra, input_file_tst, rule_file, qmeasure_file, Variables.getNLabel()); } }