/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Writed by Pedro Gonz�lez (University of Jaen) 15/02/2004 * @author Modified by Pedro Gonz�lez (University of Jaen) 4/08/2007 * @author Modified by Crist�bal J. Carmona (University of Jaen) 20/04/2010 * @version 2.0 * @since JDK1.5 * </p> */ package keel.Algorithms.Subgroup_Discovery.SDIGA.SDIGA; import keel.Algorithms.Subgroup_Discovery.SDIGA.Calculate.*; import keel.Dataset.*; import org.core.*; import java.io.IOException; import java.io.FileNotFoundException; import java.text.DecimalFormat; import java.util.StringTokenizer; public class SDIGA { /** * <p> * SDIGA * </p> * <p> * Algorithm for the discovery of rules describing subgroups * </p> */ private static int seed; // Seed for the random generator private static String nombre_alg; // Algorithm Name private static boolean claseSelec; // Indicates if there is a selected class to run the algorithm or not private static String input_file_ref; // Input mandatory file training private static String input_file_tra; // Input mandatory file training private static String input_file_tst; // Input mandatory file test private static String output_file_tra; // Output mandatory file training private static String output_file_tst; // Output mandatory file test private static String rule_file; // Auxiliary output file for rules private static String measure_file; // Auxiliary output file for quality measures of the rules private static String seg_file; // Auxiliary output file for tracking private static String qmeasure_file; // Output quality measure file private static boolean echo=true; // Write or not seg file; default=true // Structures static InstanceSet Data; static TableVar Variables; // Set of variables of the dataset and their characteristics static TableDat Examples; // Set of instances of the dataset static Genetic AG; // Genetic Algorithm /** * <p> * Auxiliar Gets the name for the output files, eliminating "" and skiping "=" * </p> * @param s String of the output files */ private static void GetOutputFiles(StringTokenizer s) { String val = s.nextToken(); // skip "=" output_file_tra = s.nextToken().replace('"',' ').trim(); output_file_tst = s.nextToken().replace('"',' ').trim(); rule_file = s.nextToken().replace('"',' ').trim(); measure_file = s.nextToken().replace('"',' ').trim(); seg_file = s.nextToken().replace('"',' ').trim(); qmeasure_file= s.nextToken().replace('"',' ').trim(); } /** * <p> * Auxiliar Gets the name for the input files, eliminating "" and skiping "=" * </p> * @param s String of the input files */ private static void GetInputFiles(StringTokenizer s) { String val = s.nextToken(); // skip "=" input_file_ref = s.nextToken().replace('"',' ').trim(); input_file_tra = s.nextToken().replace('"',' ').trim(); input_file_tst = s.nextToken().replace('"',' ').trim(); } /** * <p> * Auxiliar Gets the name for the input files, eliminating "" and skiping "=" * </p> * @param nFile String of the input files */ public static void ReadParameters (String nFile) { claseSelec = false; // By default, there is a selected target class to run the algorihtm String contents; try { int nl; // Aux var to reed the param file String file, linea, tok; StringTokenizer lineasFichero, tokens; file = Files.readFile(nFile); file = file.toLowerCase() + "\n "; lineasFichero = new StringTokenizer(file,"\n\r"); // True initialization of LSearch AG.setLSearch (true); // l_search for (nl=0, linea=lineasFichero.nextToken(); lineasFichero.hasMoreTokens(); linea=lineasFichero.nextToken()) { nl++; tokens = new StringTokenizer(linea," ,\t"); if (tokens.hasMoreTokens()) { tok = tokens.nextToken(); if (tok.equalsIgnoreCase("algorithm")) nombre_alg = Utils.GetParamString(tokens); else if (tok.equalsIgnoreCase("inputdata")) GetInputFiles(tokens); else if (tok.equalsIgnoreCase("outputdata")) GetOutputFiles(tokens); else if (tok.equalsIgnoreCase("seed")) seed = Utils.GetParamInt(tokens); else if (tok.equalsIgnoreCase("targetClass")) { Variables.setNameClassObj(Utils.GetParamString(tokens)); claseSelec=true; } else if (tok.equalsIgnoreCase("nLabels")) Variables.setNLabel(Utils.GetParamInt(tokens)); else if (tok.equalsIgnoreCase("nEval")) AG.setNEval(Utils.GetParamInt(tokens)); else if (tok.equalsIgnoreCase("popLength")) AG.setLenghtPop(Utils.GetParamInt(tokens)); else if (tok.equalsIgnoreCase("crossProb")) AG.setProbCross(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("mutProb")) AG.setProbMut(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("minConf")) AG.setMinConf(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("RulesRep")) AG.setRulesRep(Utils.GetParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("Obj1")) AG.setObj1(Utils.GetParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("Obj2")) AG.setObj2(Utils.GetParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("Obj3")) AG.setObj3(Utils.GetParamString(tokens).toUpperCase()); else if (tok.equalsIgnoreCase("W1")) AG.setW1(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("W2")) AG.setW2(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("W3")) AG.setW3(Utils.GetParamFloat(tokens)); else if (tok.equalsIgnoreCase("lSearch")) AG.setLSearch (Utils.GetParamString(tokens).equals("yes")); // l_search else if (tok.equalsIgnoreCase("echo")) echo = Utils.GetParamString(tokens).equals("yes"); // seg echo else throw new IOException("Syntax error on line "+nl+": ["+tok+"]\n"); } } } catch(FileNotFoundException e) { System.err.println(e+" Parameter file"); } catch(IOException e) { System.err.println(e+"Aborting program"); System.exit(-1); } // Echo of the parameters Files.writeFile(seg_file,""); // Creates tracking file contents = "--------------------------------------------\n"; contents+= "| Parameters Echo |\n"; contents+= "--------------------------------------------\n"; contents+= "Algorithm name: " + nombre_alg + "\n"; contents+= "Input file name training: " + input_file_tra + "\n"; contents+= "Input file name test: " + input_file_tst + "\n"; contents+= "Rules file name: " + rule_file + "\n"; contents+= "Tracking file name: " + seg_file + "\n"; contents+= "Random generator seed: " + seed + "\n"; contents+= "Selected class of the target variable: "; if (claseSelec) contents+= Variables.getNameClassObj() + "\n"; else contents+= "not established\n"; contents+= "Number of labels for the continuous variables: " + Variables.getNLabel() + "\n"; contents+= "Number of evaluations: " + AG.getNEval() + "\n"; contents+= "Number of individuals in the Population: " + AG.getLenghtPop() + "\n"; contents+= "Cross probability: " + AG.getProbCross() + "\n"; contents+= "Mutation probability: " + AG.getProbMut() + "\n"; contents+= "Minimum confidence: " + AG.getMinConf() + "\n"; contents+= "Rules representation: " + AG.getRulesRep() + "\n"; contents+= "Objective 1: " + AG.getObj1() + "(Weight: " + AG.getW1() + ")\n"; contents+= "Objective 2: " + AG.getObj2() + "(Weight: " + AG.getW2() + ")\n"; if(AG.getObj3().compareTo("NULL")!=0) {contents+= "Objective 3: " + AG.getObj3() + "(Weight: " + AG.getW3() + ")\n";} contents+= "Perform Local Search: " + AG.getLSearch() + "\n"; if (echo) Files.addToFile(seg_file,contents); // Creates and writes on tracking file } /** * <p> * Read the dataset and stores the values * </p> */ public static void capturaDataset () throws IOException { try { // Declaration of the dataset and load in memory Data = new InstanceSet(); Data.readSet(input_file_ref,true); // Check that there is only one output variable if (Attributes.getOutputNumAttributes()>1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs=false; if (Attributes.getOutputNumAttributes()<1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs=true; } // Chek that the output variable is nominal if (Attributes.getOutputAttribute(0).getType()!=Attribute.NOMINAL) { // If the output variables is not enumeratad, the algorithm can not be run try { throw new IllegalAccessException("Finish"); } catch( IllegalAccessException term) { System.err.println("Target variable is not a discrete one."); System.err.println("Algorithm can not be run."); System.out.println("Program aborted."); System.exit(-1); } } // Set the number of classes of the output attribute - this attribute must be nominal Variables.setNClass(Attributes.getOutputAttribute(0).getNumNominalValues()); // Screen output of the output variable and selected class System.out.println ( "Output variable: " + Attributes.getOutputAttribute(0).getName()); // Creates the space for the variables and load the values. Variables.Load (Attributes.getInputNumAttributes()); // Setting and file writing of fuzzy sets characteristics for continuous variables String nombreF; if (echo) nombreF = seg_file; else nombreF = ""; Variables.InitSemantics (nombreF); // Creates the space for the examples and load the values Examples.Load(Data,Variables); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } } /** * <p> * Dataset file writting to output file * </p> * @param filename Output file */ public static void WriteOutDataset (String filename) { String contents; contents = Data.getHeader(); contents+= Attributes.getInputHeader() + "\n"; contents+= Attributes.getOutputHeader() + "\n\n"; contents+= "@data \n"; Files.writeFile(filename, contents); } /** * <p> * Dataset file writting to tracking file * </p> * @param filename Tracking file */ public static void WriteSegDataset (String filename) { String contents="\n"; contents+= "--------------------------------------------\n"; contents+= "| Dataset Echo |\n"; contents+= "--------------------------------------------\n"; contents+= "Number of examples: " + Examples.getNEx() + "\n"; contents+= "Number of variables: " + Variables.getNVars()+ "\n"; contents+= Data.getHeader() + "\n"; if (filename!="") Files.addToFile(filename, contents); } /** * <p> * Writes the canonical rule and the quality measures * </p> * @param NumRule * @param nobj Number of generated rules * @param rule Chromosome to write * @param Result Quality measures of the individual * @param fileRule File to write the rule * @param fileQuality File to write the quality measures */ static void WriteRuleCAN (int NumRule, CromCAN rule, QualityMeasures Result, String fileRule, String fileQuality) { String contents; // Rule File contents = "GENERATED RULE " + NumRule + "\n"; contents+= " Antecedent\n"; for (int i=0; i<Variables.getNVars(); i++) { if (!Variables.getContinuous(i)) { // Discrete Variable if (rule.getCromElem(i)<Variables.getNLabelVar(i)) { System.out.println ("\tVariable (C) " + Attributes.getInputAttribute(i).getName() + " = ..." ); contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = " ; contents+= Attributes.getInputAttribute(i).getNominalValue(rule.getCromElem(i)) + "\n"; } } else { // Continuous Variable if (rule.getCromElem(i)<Variables.getNLabelVar(i)) { System.out.println ("\tVariable (D) " + Attributes.getInputAttribute(i).getName() + " = ..." ); contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = "; contents+= "Label " + rule.getCromElem(i); contents+= " \t (" + Variables.getX0(i,(int) rule.getCromElem(i)); contents+= " " + Variables.getX1(i,(int) rule.getCromElem(i)); contents+= " " + Variables.getX3(i,(int) rule.getCromElem(i)) +")\n"; } } } contents+= " Consecuent: " + Variables.getNameClassObj(); contents+= "\n\n"; Files.addToFile(fileRule, contents); // Write the quality measures of the rule DecimalFormat sixDecimals = new DecimalFormat("0.000000"); // The head is defined in "main" method: contents = "" + Variables.getNumClassObj(); contents+= "\t" + sixDecimals.format(Result.getSup()); contents+= "\t" + sixDecimals.format(Result.getCnf()); if(AG.getObj3().compareTo("NULL")!=0) contents+= "\t" + sixDecimals.format(Result.getVal3()); contents+= "\t" + sixDecimals.format(Result.getFitness()) + "\n"; Files.addToFile(measure_file, contents); } /** * <p> * Writes the DNF rule and the quality measures * </p> * @param NumRule * @param nobj Number of generated rules * @param rule Chromosome to write * @param Result Quality measures of the individual * @param fileRule File to write the rule * @param fileQuality File to write the quality measures */ static void WriteRuleDNF (int NumRule, CromDNF rule, QualityMeasures Result, String fileRule, String fileQuality) { String contents; // Rule File contents = "GENERATED RULE " + NumRule + "\n"; contents+= " Antecedent\n"; for (int i=0; i<Variables.getNVars(); i++) { if (rule.getCromElemGene(i,Variables.getNLabelVar(i))!=0) { // Variable takes part in the rule if (!Variables.getContinuous(i)) { // Variable discreta System.out.println ("\tVariable (D) " + Attributes.getInputAttribute(i).getName() + " = ..." ); contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = " ; for (int j=0; j<Variables.getNLabelVar(i); j++) { if (rule.getCromElemGene(i,j)==1) contents+= Attributes.getInputAttribute(i).getNominalValue(j) + " "; } contents+= "\n"; } else { // Variable continua System.out.println ("\tVariable (C) " + Attributes.getInputAttribute(i).getName() + " = ..." ); contents+= "\tVariable " + Attributes.getInputAttribute(i).getName() + " = "; for (int j=0; j<Variables.getNLabelVar(i); j++) { if (rule.getCromElemGene(i,j)==1) { contents+= "Label " + j; contents+= " (" + Variables.getX0(i,(int) rule.getCromElemGene(i,j)); contents+= " " + Variables.getX1(i,(int) rule.getCromElemGene(i,j)); contents+= " " + Variables.getX3(i,(int) rule.getCromElemGene(i,j)) +")\t"; } } contents+= "\n"; } } } contents+= " Consecuent: " + Variables.getNameClassObj(); contents+= "\n"; Files.addToFile(fileRule, contents); // Write the quality measures of the rule DecimalFormat sixDecimals = new DecimalFormat("0.000000"); contents = "" + Variables.getNumClassObj(); contents+= "\t" + sixDecimals.format(Result.getSup()); contents+= "\t" + sixDecimals.format(Result.getCnf()); if(AG.getObj3().compareTo("NULL")!=0) contents+= "\t" + sixDecimals.format(Result.getVal3()); contents+= "\t" + sixDecimals.format(Result.getFitness()) + "\n"; Files.addToFile(measure_file, contents); } /** * <p> * Main method of the algorithm * </p> **/ public static void main(String[] args) throws Exception { int NumReglasGeneradas; // Number of generated rules (number of iteration of the GA QualityMeasures Resultados; // Stores the quality values of the rule evaluation String contents; // Strings for the files contents float savePrMut; // To backup the mutation probability boolean mejora_local = false; // Indicates if the local search (if applied) has improved the rule boolean terminar = false; // Indicates no more repetition for the rule generation of diferent classes if (args.length != 1) { System.err.println("Syntax error. Usage: java AGI <parameterfile.txt>" ); return; } // Initial echo System.out.println(); System.out.println("Iterative Genetic Algorithm"); Variables = new TableVar(); Examples = new TableDat(); AG= new Genetic(); // Read parameter file and initialize parameters ReadParameters (args[0]); // Saves the mutation probability savePrMut = AG.getProbMut(); // Read the dataset, store values and echo to output and seg files capturaDataset (); if (echo) WriteSegDataset (seg_file); // Create and initilize gain information array if (echo) Variables.GainInit(Examples, seg_file); else Variables.GainInit(Examples, ""); // Screen output of same parameters System.out.println ("\nSeed: " + seed); // Random Seed System.out.println ("Local Search?: " + AG.getLSearch()); // LocalImprovemente System.out.println ("\nOutput variable: " + Attributes.getOutputAttribute(0).getName() ); // Output variable // Generation of rules for one class or all the classes if (claseSelec) { // If there is one class indicated as a parameter, only generate rules of thas class terminar = true; // Set the number and the name of the selected class of the output variable from its value Variables.setNumClassObj(-1); // To assure an invalid value if the class name in the param file is invalid for (int z=0; z<Variables.getNClass(); z++) { if (Attributes.getOutputAttribute(0).getNominalValue(z).equalsIgnoreCase(Variables.getNameClassObj())) { Variables.setNameClassObj(Attributes.getOutputAttribute(0).getNominalValue(z)); Variables.setNumClassObj(z); } } // If the value is invalid, generate rules for all the classes if (Variables.getNumClassObj()==-1) { System.out.println ( "Class name invalid (" + Variables.getNameClassObj() + "). Generate rules for all the classes"); claseSelec=false; terminar = false; Variables.setNumClassObj(0); } else System.out.println ( "Generate rules for class " + Variables.getNameClassObj() + " only"); } else { // No class indicated, so generate rules of all the classes Variables.setNumClassObj(0); System.out.println ( "Generate rules for all the classes"); } // Initialise rule file Files.writeFile(rule_file, ""); // Initialise measure file contents = "Measures used as objectives: "; contents += AG.getObj1()+", "; contents += AG.getObj2()+", "; if (AG.getObj3().compareTo("NULL")!=0) contents += AG.getObj3()+";"; // Include in this header the measures to be written in the quality measures file contents += "\nClass \t"; contents += AG.getObj1()+"\t"; contents += AG.getObj2(); if (AG.getObj3().compareTo("NULL")!=0) contents += "\t"+AG.getObj3(); contents += "\tFITNESS"; contents += "\n"; Files.writeFile(measure_file, contents); // Execution Header for the tracking file contents = "\n"; contents+= "--------------------------------------------\n"; contents+= "| Execution results |\n"; contents+= "--------------------------------------------\n"; Files.addToFile(seg_file, contents); do { // Initialisation of random generator seed. Done after load param values if (seed!=0) Randomize.setSeed (seed); // If no class especified, define the class for each iteration if (!claseSelec) // Set the nominal value of the class Variables.setNameClassObj(Attributes.getOutputAttribute(0).getNominalValue(Variables.getNumClassObj())); // Tracking to file and "seg" file System.out.println ("\nTarget class number: " + Variables.getNumClassObj() + " (value " + Variables.getNameClassObj() + ")"); Files.addToFile(seg_file, "\nClass " + Variables.getNumClassObj() + ":\n"); // Restores the original value for the mutation probability AG.setProbMut(savePrMut); // Set the mutation prob AG.setProbMut(AG.getProbMut() / Variables.getNVars()); // Gets the next gene to be muted if (AG.getProbMut() < 1) AG.setMuNext ((int) Math.ceil (Math.log(Randomize.Rand()) / Math.log(1.0 - AG.getProbMut()))); else AG.setMuNext(1); // Set all the examples as not covered for (int ej=0; ej<Examples.getNEx(); ej++) Examples.setCovered (ej,false); // Set example to not covered // Load the number of examples of the target class Examples.setExamplesClassObj(Variables.getNumClassObj()); // Variables Initialization NumReglasGeneradas = 0; // Number of generated rules Examples.setExamplesCovered(0); // Number of covered examples // Iterative GA run do { System.out.print ("# " + NumReglasGeneradas + ": "); // GA execution AG.GeneticAlgorithm(Variables, Examples, seg_file); // Store the support (comp) value Resultados = AG.getQualityMeasures(AG.getBestGuy(), seg_file); float oldsup = Resultados.getSup(); // Local search to improve the rule if (AG.getLSearch()){ if(AG.getRulesRep().compareTo("CAN")==0){ mejora_local = AG.LocalImprovementCAN(Variables, Examples); } else { mejora_local = AG.LocalImprovementDNF(Variables, Examples); } } // Evaluates the better rule AG.evalIndiv(AG.getBestGuy(), AG, Variables, Examples, true); // Obtain the results of the better rule Resultados = AG.getQualityMeasures(AG.getBestGuy(), seg_file); if (NumReglasGeneradas==0 ||(Resultados.getCnf()>=AG.getMinConf() && Resultados.getLSup()>0)) if(AG.getRulesRep().compareTo("CAN")==0){ WriteRuleCAN(NumReglasGeneradas, AG.getIndivCromCAN(AG.getBestGuy()), Resultados, rule_file, measure_file); } else { WriteRuleDNF(NumReglasGeneradas, AG.getIndivCromDNF(AG.getBestGuy()), Resultados, rule_file, measure_file); } // Local search info to "seg" file if (AG.getLSearch()) { // Local search applied if (mejora_local) // The rule was improved contents = "\tRule support improved by local search from " + oldsup + " to " + Resultados.getSup(); else // The rule was not improved contents = "\tRule not improved by Local search. Support = " + oldsup; } else // Local search not applied contents = "\tLocal search not applied. Support = " + oldsup; contents+= "\n"; if (!(NumReglasGeneradas==0 ||(Resultados.getCnf()>=AG.getMinConf() && Resultados.getLSup()>0))) { // If the rule does not overcomes minimum confidence and support contents = "\tRule not stored (low confidence or support)\n"; System.out.print (contents); } Files.addToFile(seg_file, contents); // Increments the number of generated rules NumReglasGeneradas++; // While reaches minimum confidence and covers non-covered examples } while (Resultados.getCnf()>=AG.getMinConf() && Resultados.getLSup()>0 ); if (!claseSelec) { // No class indicated to generate de rules (generate rules for all the classes) // Set num_clase as the next class Variables.setNumClassObj(Variables.getNumClassObj()+1); // If there are no more classes, set terminar as true if (Variables.getNumClassObj()>=Variables.getNClass()) terminar = true; } } while (terminar==false); System.out.println("Algorithm terminated\n"); System.out.println("--------------------\n"); System.out.println("Calculating values of the quality measures\n"); //Calculate the quality measures Files.writeFile(output_file_tra,Data.getHeader()); Files.writeFile(output_file_tst,Data.getHeader()); Calculate.Calculate(output_file_tra, output_file_tst, input_file_tra, input_file_tst, rule_file, qmeasure_file, Variables.getNLabel()); } }