/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Genetic_Rule_Learning.M5Rules; import java.util.Vector; import org.core.Fichero; /** * Class to implement the C4.5Rules algorithm * @author Antonio Alejandro Tortosa Urdiales (UGR) * @author Modified by Victoria Lopez (University of Granada) 03/05/2011 * @version 1.0 (05-04-08) */ class M5Rules { public static int COVERAGE=0; public static int RMS=1; public static int MAE=2; public static int CC=3; //Inputs & Outputs MyDataset train, val, test; //the datasets for training, validation and test String outputTr, outputTst, outputRules; //the names for the output files //General private Vector classification_rules; //generated rules private String default_class_name; //name of the default class //Options private double pruningFactor; //factor for pruning private int verbosity; //verbosity level private boolean unsmoothed=true; private int heuristic = COVERAGE; //heuristic for rule's selection /********************************PRIVATE METHODS************************************/ /** * Extract recursively the rules out of a tree * @param node Tree the current node in cosideration * @param base_rule Rule the rule that generates the father of the node * @param link_to_father SimpleRule the simple rule that connects the node with its father * @return an array with all the rules extracted from the leafs of the subtree for wich the node is root */ private Vector convert(M5TreeNode node,Rule base_rule,SimpleRule link_to_father){ //1.Producing of the rule linked to this node: node rule <- father rule + path rule Rule node_rule=base_rule.getCopy(); if (link_to_father!=null) node_rule.grow(link_to_father); //2.Checking wether this node is a leaf Vector output=new Vector(); if (node!=null){ if (node.isLeaf()) { //2.A If so, adding the node rule to the ruleset if (unsmoothed) node_rule.setFunction(node.getUnsmoothedFunction()); else node_rule.setFunction(node.getSmoothedFunction()); output.add(node_rule); } else { //2.B Else, compacting the ruleset linked to the subtrees int cut_attribute = node.getSplitingAttribute(); SimpleRule left_link = new SimpleRule(); SimpleRule right_link = new SimpleRule(); left_link.setAttribute(cut_attribute); left_link.setValue(node.getSplitingValue()); right_link.setAttribute(cut_attribute); right_link.setValue(node.getSplitingValue()); left_link.setOperator(SimpleRule.LOWER); right_link.setOperator(SimpleRule.GREATER); Vector right_rules = convert(node.getRightChild(), node_rule,right_link); Vector left_rules = convert(node.getLeftChild(), node_rule,left_link); output.addAll(right_rules); output.addAll(left_rules); } } return output; } /** * It generates the output file from a given dataset and stores it in a file. * @param dataset myDataset input dataset * @param filename String the name of the file * @param classification String[] gererated classification of the dataset */ private void doOutput(MyDataset dataset, String filename,double[] classification) { String output = new String(""); output = dataset.copyHeader(); //we insert the header in the output file //We write the output for each example for (int i = 0; i < dataset.size(); i++) { double class_name=dataset.itemset(i).getClassValue(); output += class_name + " " +classification[i] + "\n"; } Fichero.escribeFichero(filename, output); } /** * It generates the output rules file from a given ruleset and stores it in a file * @param filename String the name of the file * @param rules Vector the rules */ private void doRulesOutput(String filename,Vector rules) { //String output = new String("@Generated tree \n"); String output = new String(""); /*for (int i=0;i<rules.size()-1;i++){ output+="if("; Rule current=(Rule) rules.elementAt(i); for (int k=0;k<current.size();k++){ output+=current.getSimpleRule(k); if (k!=current.size()-1) output+=" && "; } output+=")\n\t"; output+=((Rule) rules.elementAt(i)).getFunction()+"\nelse "; } output+="\n\t"+((Rule)rules.lastElement()).getFunction();*/ //output = output + "\n\n@Number of rules: " + rules.size() +"\n\n"; output = output + "@Number of rules: " + rules.size() +"\n\n"; for(int i=0;i<rules.size();i++) output = output + "Rule " + (i+1) + ": " +(Rule)rules.elementAt(i) + "\n"; Fichero.escribeFichero(filename, output); } /************************************************************************************/ /** * Constructor for Simulated Annealig Option. * @param paramFile parseParameters the algorithm's parameters. * @throws Exception if the class is not numeric. */ public M5Rules(parseParameters paramFile) throws Exception{ //Input File Names String trainFileName=paramFile.getTrainingInputFile(); String valFileName=paramFile.getValidationInputFile(); String testFileName=paramFile.getTestInputFile(); //Output File Names outputTr=paramFile.getTrainingOutputFile(); outputTst=paramFile.getTestOutputFile(); outputRules=paramFile.getOutputFile(0); //Options pruningFactor=Double.parseDouble(paramFile.getParameter(0)); //pruning factor (a in (n+a)/(n-k)) unsmoothed=true; //whether the tree must be smoothed or not verbosity = Integer.parseInt(paramFile.getParameter(1)); //verbosity level String heuristic_name = paramFile.getParameter(2); //verbosity level if (pruningFactor < 0 || pruningFactor > 10) { pruningFactor = 2; System.err.println("Error: Pruning Factor must be in the interval [0,10]"); System.err.println("Using default value: 2"); } if (verbosity < 0 || verbosity > 2) { verbosity = 0; System.err.println("Error: Verbosity must be 0, 1 or 2"); System.err.println("Using default value: 0"); } if (heuristic_name.equalsIgnoreCase("Coverage")) heuristic=COVERAGE; else if (heuristic_name.equalsIgnoreCase("RMS")) heuristic=RMS; else if (heuristic_name.equalsIgnoreCase("MAE")) heuristic=MAE; else if (heuristic_name.equalsIgnoreCase("CC")) heuristic=CC; else{ heuristic=COVERAGE; System.err.println("Error: heuristic must be Coverage, RMS, MAE or CC"); System.err.println("Using default value: Coverage"); } /* Initializes the dataset. */ train = new MyDataset( trainFileName, true ); val = new MyDataset( valFileName, false ); test = new MyDataset( testFileName, false ); if (train.getClassAttribute().isDiscret()) { throw new Exception("Class has to be numeric."); } classification_rules=null; default_class_name=null; } /** * It coverts a given C4.5 tree into an array of rules. * @param tree the C45 tree * @return an array of rules */ public Vector treeToRules(M5TreeNode tree){ Vector output=null; if (!tree.isLeaf()){ output = convert(tree, new Rule(),null); } else{ Rule r=new Rule(); if (unsmoothed) r.setFunction(tree.getUnsmoothedFunction()); else r.setFunction(tree.getSmoothedFunction()); output=new Vector(); output.add(r); } return output; } /** * Removes the duplicated rules from a vector of rules * @param rules Vector vector of rules */ public void removeDuplicates(Vector rules){ for (int i=0;i<rules.size();i++){ Rule current = (Rule) rules.elementAt(i); if (current.size()!=0){ for (int j = i + 1; j < rules.size(); j++) { if (current.isEqual( (Rule) rules.elementAt(j))) { rules.remove(j); j--; } } } else{ rules.remove(i); i--; } } } /** * Runs the algorithm * @param remained_data the dataset * @throws Exception if there are problems with the algorithm */ public void executeAlgorithm(MyDataset remained_data) throws Exception{ classification_rules=new Vector(); while (remained_data.numItemsets()>0){ M5 subtree=null; //Get the subtree subtree = new M5(remained_data, pruningFactor, unsmoothed, verbosity); System.out.println("The partial tree\n"+subtree); //Get the rules Vector rules=treeToRules(subtree.getTree()); System.out.println("The rules"); for(int i=0;i<rules.size();i++) System.out.println((Rule)rules.elementAt(i)); //Get the best rule (coverage heuristic) int best_rule=-1; double best_value=Double.MAX_VALUE; for(int i=0;i<rules.size();i++){ double curr_value=Double.MAX_VALUE; Rule ri = (Rule) rules.elementAt(i); if (heuristic==COVERAGE) curr_value = -ri.apply(remained_data); else if (heuristic==RMS) curr_value = remained_data.ruleDeviation(ri)/remained_data.classSTD(); else if (heuristic==MAE) curr_value = remained_data.ruleMeanAbsoluteError(ri); else if (heuristic==CC) curr_value = remained_data.ruleCorrelation(ri); if (curr_value<best_value){ best_rule=i; best_value=curr_value; } } //Add the best rule classification_rules.add((Rule) rules.elementAt(best_rule)); //Remove the exemples covered by the rule MyDataset[] division=remained_data.split((Rule) rules.elementAt(best_rule)); remained_data=division[1]; //Uncovered exemples } //Choosing a default rule //default_class_name=train.getMostFrequentClass(); //Rule default_rule=new Rule(); //default_rule.setType(default_class_name); //classification_rules.add(default_rule); } /** * It launches the algorithm. * @throws Exception if there are problems with the algorithm */ public void execute() throws Exception{ //We do here the algorithm's operations this.executeAlgorithm(train); //Classificates the datasets' entries, according the generated rulesets double[] classification_train=train.classify(classification_rules); double[] classification_val=val.classify(classification_rules); double[] classification_test=test.classify(classification_rules); //Finally we should fill the training and test output files doOutput(this.val, this.outputTr, classification_val); doOutput(this.test, this.outputTst, classification_test); doRulesOutput(this.outputRules,classification_rules); System.out.println("Algorithm Finished"); } }