/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Antonio Alejandro Tortosa (University of Granada) 01/07/2008 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 16/12/2008 * @version 1.1 * @since JDK1.2 * </p> */ package keel.Algorithms.Rule_Learning.C45RulesSA; import java.util.Vector; import org.core.*; class C45RulesSA { /** * <p> * Class to implement the C4.5Rules (Simulated Anneling version) algorithm * </p> */ //Inputs & Outputs //the datasets for training, validation and test MyDataset train, val, test; //the names for the output files String outputTr, outputTst, outputRules; //General //C4.5 tree private Tree root; // private MyDataset data; //generated rules private Ruleset[] classification_rules; //name of the default class private String default_class_name; //Class filters //filter of each class Mask[] class_filter; //inverse filter of each class Mask[] inverse_class_filter; //Options private static int GREEDY=0; private static int SA=1; //Algorithm for searching: greedy or simulated anneling private int SearchAlgorithm; //Maximum size of a ruleset for wich the Exhaustive Search is used private int treshold; //Confidence level for prunning private double CF; //Simulated Anneling Options //Number of coldings private int Nmax=10; //Number of neighbours per temperature level private int max_trials=5; //used in the establishing of the initial temperature private double mu=0.5; //used in the establishing of the initial temperature private double phi=0.5; //colding speed private double alpha=0.5; Randomize rand; /********************************PRIVATE METHODS************************************/ /** * Returns a vector of random generated integers in a given (closed) interval * @param n int number of numbers to generate * @param low int lowest number of the interval (include) * @param high int highest number of the interval (include) * @return a vector of n random generated integers between low and high (both include) */ private Vector getRandomNumbers(int n,int low,int high){ Vector random=new Vector(); int[] numbers=new int[high-low+1]; for (int i=low;i<=high;i++) numbers[i-low]=i; int remained=high-low+1; for (int i=0;i<n;i++){ int new_number=Randomize.Randint(1,remained); random.add(new Integer(numbers[new_number])); numbers[new_number]=numbers[remained-1]; remained--; } return random; } /** * Extract recursively the rules out of a tree * @param node Tree the current node in cosideration * @param base_rule Rule the rule that generates the father of the node * @param link_to_father SimpleRule the simple rule that connects the node with its father * @param type String the class label for this node (only if the node is a leaf) * @return an array with all the rules extracted from the leafs of the subtree for wich the node is root */ private Vector convert(Tree node,Rule base_rule,SimpleRule link_to_father,String type){ //1.Producing of the rule linked to this node: node rule <- father rule + path rule Rule node_rule=base_rule.getCopy(); if (link_to_father!=null) node_rule.grow(link_to_father); //2.Checking wether this node is a leaf Vector output=new Vector(); if (node.isLeaf){ //2.A If so, adding the node rule to the ruleset node_rule.setType(type); output.add(node_rule); } else{ //2.B Else, compacting the ruleset linked to the subtrees int cut_attribute=node.nodeModel.attributeIndex(); int class_index=train.getClassIndex(); if (cut_attribute>class_index){ cut_attribute++; } for (int i=0;i<node.getNChildren();i++){ SimpleRule link_child=new SimpleRule(); link_child.setAttribute(cut_attribute); if (train.getAttribute(cut_attribute).isDiscret()){ link_child.setValue(i); link_child.setOperator(SimpleRule.EQUAL); } else{ link_child.setValue(node.nodeModel.getCutPoint()); if (i==0) link_child.setOperator(SimpleRule.LOWER); else link_child.setOperator(SimpleRule.GREATER); } String child_type=""; if (node.getChild(i).isLeaf) child_type=node.nodeModel.label(i,train); Vector child_rules=convert(node.getChild(i),node_rule,link_child,child_type); output.addAll(child_rules); } } return output; } /** * Returns the masks generated by each rule from a ruleset * @param rules Ruleset the ruleset * @return the masks generated by each rule from a ruleset */ private Mask[] getAllMasks(Ruleset rules){ Mask[] output=new Mask[rules.size()]; for (int i=0;i<rules.size();i++){ output[i]=new Mask(train.size()); train.filter(output[i],(Rule) rules.getRule(i)); } return output; } /** * Makes recursively an exhaustive search of all posible subsets of a given ruleset * and returns the numbers of the rules that makes the one with the lowest MDL. * Each call to the method takes into account a combination (card) of rules form by the combination * of rules of the method that call it plus a new rule that it is not yet in the combination. * So, new card=base card + next rule * @param pool Ruleset All the available rules * @param all_masks Mask[] The mask of each one of the pool's rules * @param next_rule int the number of the next rule to considerate in this method * @param base_card int[] the number of the rules that has been considerated in the previous method * @param base_card_length int the length of the base card * @param base_card_theory_cost double The theory cost (for the DL) of the rules from the base card. * @param base_mask Mask Mask generated by all the rules in the base card * @param class_value int the target class * @return the numbers of the rules that makes the ruleset with the lowest MDL. */ private Report allCombinations(Ruleset pool,Mask[] all_masks,int next_rule, int[] base_card,int base_card_length,double base_card_theory_cost, Mask base_mask, int class_value){ Mask class_mask=class_filter[class_value], inverse_class_mask=inverse_class_filter[class_value]; //it generates the "card" for this node with the base card information and the next rule int[] new_card=new int[base_card.length]; int new_card_length=base_card_length+1; for (int i=0;i<base_card_length;i++) new_card[i]=base_card[i]; new_card[base_card_length]=next_rule; //it filters the data with the new rule Mask new_mask=base_mask.or(all_masks[next_rule]); //now it generates the new stats int tp=new_mask.and(class_mask).getnActive(); //true positives int fp=new_mask.and(inverse_class_mask).getnActive(); //false positives int fn=class_mask.getnActive()-tp; //false negatives int tn=inverse_class_mask.getnActive()-fp; //true negatives double new_card_theory_cost=base_card_theory_cost+pool.getRule(next_rule).theoryDL(train); double new_card_value=new_card_theory_cost+Rule.getExceptionCost(train,tp,tn,fp,fn); //It initialize the best_report, that will store the best report so far Report best_report=new Report(new_card,new_card_length,new_card_value); for (int i=next_rule+1;i<pool.size();i++){ Report current=allCombinations(pool,all_masks,i,new_card,new_card_length,new_card_theory_cost,new_mask,class_value); if(current.getValue()<best_report.getValue()) best_report=current; } return best_report; } /** * It generates the output file from a given dataset and stores it in a file. * @param dataset myDataset input dataset * @param filename String the name of the file * @param classification String[] gererated classification of the dataset */ private void doOutput(MyDataset dataset, String filename,String[] classification) { String output = new String(""); output = dataset.copyHeader(); //we insert the header in the output file //We write the output for each example for (int i = 0; i < dataset.size(); i++) { String class_name=dataset.getClassAttribute().value((int)dataset.itemset(i).getClassValue()); output += class_name + " " +classification[i] + "\n"; } Fichero.escribeFichero(filename, output); } /** * It generates the output rules file from a given ruleset and stores it in a file * @param filename String the name of the file * @param rulesets Rulesets[] the rulesets (one for each class) */ private void doRulesOutput(String filename,Ruleset[] rulesets) { String output = new String(""); for (int i=0;i<rulesets.length-1;i++){ output+="if("; for(int j=0;j<rulesets[i].size();j++){ Rule current=rulesets[i].getRule(j); output+="("; for (int k=0;k<current.size();k++){ output+=current.getSimpleRule(k); if (k!=current.size()-1) output+=" && "; } output+=")"; if (j!=rulesets[i].size()-1) output+=" || "; } output+=")\n\t"; output+="output="+rulesets[i].getType()+"\nelse "; } output+="\n\toutput="+rulesets[rulesets.length-1].getType(); Fichero.escribeFichero(filename, output); } /** * It generates the output rules file from a given ruleset and stores it in a file * @param filename String the name of the file * @param rulesets Rulesets[] the rulesets (one for each class) */ private void doRulesOutput2(String filename,Ruleset[] rulesets) { String output = new String(""); for (int i=0;i<rulesets.length-1;i++){ for(int j=0;j<rulesets[i].size();j++){ output+="if("; Rule current=rulesets[i].getRule(j); for (int k=0;k<current.size();k++){ output+=current.getSimpleRule(k); if (k!=current.size()-1) output+=" && "; } int class_id=train.getClassAttribute().valueIndex(rulesets[i].getType()); int covered=current.apply(train); int accuracy=current.apply(train,class_filter[class_id]); output+=") ("+accuracy+"/"+covered+")\n\t"; output+="output="+rulesets[i].getType()+"\nelse "; } } output+="\n\toutput="+rulesets[rulesets.length-1].getType(); Fichero.escribeFichero(filename, output); } /************************************************************************************/ /** * Constructor for Simulated Annealig Option * @param root Tree the C45 tree from wich the algorithm will extract the rules * @param parameters parseParameters the algorithm's parameters */ public C45RulesSA(Tree root, parseParameters parameters){ this.root=root; //Files String trainFileName=parameters.getTrainingInputFile(); String valFileName=parameters.getValidationInputFile(); String testFileName=parameters.getTestInputFile(); train = new MyDataset(trainFileName,false); val = new MyDataset(valFileName,false); test = new MyDataset(testFileName,false); outputTr = parameters.getTrainingOutputFile(); outputTst = parameters.getTestOutputFile(); outputRules = parameters.getOutputFile(0); long seed = Long.parseLong(parameters.getParameter(0)); //rand=new Randomize(); Randomize.setSeed(seed); //Options treshold=Integer.parseInt(parameters.getParameter(3)); //Maximum size of a ruleset for wich the Exhaustive Search is used CF=Double.parseDouble(parameters.getParameter(1)); //confidence level for the uniform distribution if (CF < 0 || CF > 1) { CF = 0.25F; System.err.println("Error: confidence must be in the interval [0,1]"); System.err.println("Using default value: 0.25"); } if (treshold <= 0) { treshold = 10; System.err.println("Error: treshold must be greater than 0"); System.err.println("Using default value: 10"); } SearchAlgorithm=SA; /*********Simulated Annealing Parameters**********/ Nmax=Integer.parseInt(parameters.getParameter(4)); // max_trials=Integer.parseInt(parameters.getParameter(5)); // mu=Double.parseDouble(parameters.getParameter(6)); phi=Double.parseDouble(parameters.getParameter(7)); alpha=Double.parseDouble(parameters.getParameter(8)); if (Nmax<=0){ Nmax=10; System.err.println("Error: nColdings must be greater than 0"); System.err.println("Using default value: 10"); } if (max_trials<=0){ max_trials=10; System.err.println("Error: maxTrialsPerTemperature must be greater than 0"); System.err.println("Using default value: 10"); } if (mu<0 || mu>1){ mu=0.5; System.err.println("Error: mu must be in the interval [0,1]"); System.err.println("Using default value: 0.5"); } if (phi<0 || phi>1){ phi=0.5; System.err.println("Error: phi must be in the interval [0,1]"); System.err.println("Using default value: 0.5"); } if (alpha<0 || alpha>1){ alpha=0.5; System.err.println("Error: alpha must be in the interval [0,1]"); System.err.println("Using default value: 0.5"); } classification_rules=null; default_class_name=null; } /** * It coverts a given C4.5 tree into an array of rules. * @return an array of rules */ public Vector treeToRules(){ Vector output=null; if (!root.isLeaf){ output = convert(root, new Rule(),null,""); } return output; } public void pruneRule(Rule rule){ int class_value=train.getClassAttribute().valueIndex(rule.getType()); Mask class_mask=class_filter[class_value]; Mask inverse_class_mask=inverse_class_filter[class_value]; int tp=rule.apply(train,class_mask); int fp=rule.apply(train,inverse_class_mask); double current_U,next_U=(fp+Extra.AddErrs(tp+fp,fp,CF))/(tp+fp); int to_prune=-1; boolean seguir_podando; do{ current_U=next_U; seguir_podando=false; for (int i=0;i<rule.size();i++){ int tp_i = rule.apply(train, class_mask, i); int fp_i = rule.apply(train, inverse_class_mask, i); double U_i = (fp_i + Extra.AddErrs(tp_i+fp_i, fp_i, CF)) / (tp_i + fp_i); if (U_i<=next_U){ to_prune=i; next_U=U_i; seguir_podando=true; } } if (seguir_podando){ rule.prune(to_prune); } }while(seguir_podando && rule.size()>0); } /** * Removes the duplicated rules from a vector of rules * @param rules Vector vector of rules */ public void removeDuplicates(Vector rules){ for (int i=0;i<rules.size();i++){ Rule current = (Rule) rules.elementAt(i); if (current.size()!=0){ for (int j = i + 1; j < rules.size(); j++) { if (current.isEqual( (Rule) rules.elementAt(j))) { rules.remove(j); j--; } } } else{ rules.remove(i); i--; } } } /** * Takes an array of rules and makes sets of rules according to the right side * @param rules an array of rules * @return an array of data.numClasses rulesets, the rules of each one have the same right side */ public Ruleset[] classifyRules(Vector rules){ Ruleset[] groups=new Ruleset[train.numClasses()]; //it assigns a class to each ruleset for (int i=0;i<groups.length;i++){ groups[i]=new Ruleset(); groups[i].setType(train.getAttribute(train.getClassIndex()).value(i)); } //it assign a ruleset for each rule, according to its right side while (rules.size()>0){ String class_name=((Rule) rules.elementAt(0)).getType(); int class_index=train.getAttribute(train.getClassIndex()).valueIndex(class_name); groups[class_index].addRule((Rule) rules.elementAt(0)); rules.remove(0); } return groups; } /** * Makes an exhaustive search of all posible subsets of a given ruleset * and returns the one with the lowest MDL. * @param rules Ruleset the ruleset * @return returns the subset of rules with the lowest MDL. */ public Ruleset exhaustiveSearch(Ruleset rules){ Mask[] all_masks=getAllMasks(rules); //it contains the masks of each rule int class_value=train.getClassAttribute().valueIndex(rules.getType()); Mask blank=new Mask(train.size(),false); //a mask with all the exemples off //Initial call to the recursive method Report best_report=allCombinations(rules,all_masks,0,new int[rules.size()],0,0.0,blank,class_value); for (int i=1;i<rules.size();i++){ Report current=allCombinations(rules,all_masks,i,new int[rules.size()],0,0.0,blank,class_value); if(current.getValue()<best_report.getValue()) best_report=current; } //Construction of the final ruleset with the selected rules Ruleset selected_rules=new Ruleset(); selected_rules.setType(rules.getType()); for (int i=0;i<best_report.length();i++){ selected_rules.addRule(rules.getRule(best_report.get(i))); } return selected_rules; } /** * Makes an greedy search to find the best subsets of a given ruleset * @param rules Ruleset the ruleset * @return the best found subset */ public Ruleset greedySearch(Ruleset rules){ Mask[] all_masks=getAllMasks(rules); //it contains the masks of each rule int class_value=train.getClassAttribute().valueIndex(rules.getType()); Mask class_mask=class_filter[class_value]; Mask inverse_class_mask=inverse_class_filter[class_value]; Report best_report=null; for (double pct=0.1;pct<=1.0;pct+=0.1){ //************1.Construction of the base combination************// int base_length=(int)Math.ceil(pct*rules.size()); int[] base_card=new int[base_length]; IncrementalMask base_mask=new IncrementalMask(train.size()); int[] exclude_rules=new int[rules.size()]; for (int i=0;i<rules.size();i++) exclude_rules[i]=i; int remained=rules.size(); //generation of the base card itself double theory_cost=0.0; for (int i=0;i<base_length;i++){ int new_number=Randomize.Randint(0,remained); base_card[i]=exclude_rules[new_number]; base_mask=base_mask.plus(all_masks[base_card[i]]); exclude_rules[new_number]=exclude_rules[remained-1]; remained--; theory_cost+=rules.getRule(base_card[i]).theoryDL(train); } //now it generates the stats for the base combination int tp=base_mask.and(class_mask).getnActive(); //true positives int fp=base_mask.and(inverse_class_mask).getnActive(); //false positives int fn=class_mask.getnActive()-tp; //false negatives int tn=inverse_class_mask.getnActive()-fp; //true negatives double base_card_value=theory_cost+Rule.getExceptionCost(train,tp,tn,fp,fn); //************2.Evaluation of the neighbourhood: first, deleting rules************// Report base_report=new Report(base_card,base_length,base_card_value); if (best_report==null || base_report.getValue()<best_report.getValue()) best_report=base_report; for (int i=0;i<base_report.length();i++){ int rule_index=base_report.get(i); IncrementalMask without_rulei=base_mask.minus(all_masks[rule_index]); //now it generates the stats for the combination without the rule i tp=without_rulei.and(class_mask).getnActive(); //false positives fp=without_rulei.and(inverse_class_mask).getnActive(); //true positives fn=class_mask.getnActive()-tp; //false negatives tn=inverse_class_mask.getnActive()-fp; //true negatives double theory_cost_without_i=theory_cost-rules.getRule(rule_index).theoryDL(train); double without_rulei_value=theory_cost_without_i+Rule.getExceptionCost(train,tp,tn,fp,fn); if (without_rulei_value<best_report.getValue()){ int[] new_card=new int[base_report.length()-1]; for (int j=0;j<i;j++) new_card[j]=base_report.get(j); for (int j=i+1;j<base_report.length();j++) new_card[j-1]=base_report.get(j); best_report=new Report(new_card,base_report.length()-1,without_rulei_value); } } //************3.Evaluation of the neighbourhood: now, adding rules************// for (int i=0;i<remained;i++){ int rule_index=exclude_rules[i]; IncrementalMask with_rulei=base_mask.plus(all_masks[rule_index]); //now it generates the stats for the combination without the rule i tp=with_rulei.and(class_mask).getnActive(); //true positives fp=with_rulei.and(inverse_class_mask).getnActive(); //false positives fn=class_mask.getnActive()-tp; //false negatives tn=inverse_class_mask.getnActive()-fp; //true negatives double theory_cost_with_i=theory_cost+rules.getRule(rule_index).theoryDL(train); double with_rulei_value=theory_cost_with_i+Rule.getExceptionCost(train,tp,tn,fp,fn); if (with_rulei_value<best_report.getValue()){ int[] new_card=new int[base_report.length()+1]; for (int j=0;j<base_length;j++) new_card[j]=base_report.get(j); new_card[base_report.length()]=rule_index; best_report=new Report(new_card,base_report.length()+1,with_rulei_value); } } } //Construction of the final ruleset with the selected rules Ruleset selected_rules=new Ruleset(); selected_rules.setType(rules.getType()); for (int i=0;i<best_report.length();i++){ selected_rules.addRule(rules.getRule(best_report.get(i))); } return selected_rules; } /** * Makes a Simulated Annealing search to find the best subsets of a given ruleset. * @param rules Ruleset the ruleset * @param Nmax int number of coolings * @param max_trials int max number of neighbours * @param mu double a param between [0,1] for establishing the initial temperature * @param phi double a param between [0,1] for establishing the initial temperature * @param alpha double a param between [0,1] that determine the speed of cooling * @return the best found subset */ public Ruleset simulatedAnnealing(Ruleset rules, int Nmax,int max_trials,double mu,double phi,double alpha){ Mask[] all_masks=getAllMasks(rules); //it contains the masks of each rule int class_value=train.getClassAttribute().valueIndex(rules.getType()); Mask class_mask=class_filter[class_value]; Mask inverse_class_mask=inverse_class_filter[class_value]; //**************1.Generation of the initial combination****************************// int initial_length=Randomize.Randint(0,rules.size()); int[] initial_card=new int[initial_length]; IncrementalMask initial_mask=new IncrementalMask(train.size()); // all_rules -> {exclude_rules|include rules} // |<-remained-->| int[] all_rules=new int[rules.size()]; for (int i=0;i<rules.size();i++) all_rules[i]=i; int remained=rules.size(); //generation of the initial card itself double theory_cost=0.0; for (int i=0;i<initial_length;i++){ int new_number=Randomize.Randint(0,remained); initial_card[i]=all_rules[new_number]; initial_mask=initial_mask.plus(all_masks[initial_card[i]]); //The selected number goes to the "include rules section" int aux=all_rules[new_number]; all_rules[new_number]=all_rules[remained-1]; all_rules[remained-1]=aux; remained--; theory_cost+=rules.getRule(initial_card[i]).theoryDL(train); } //now it generates the stats for the initial combination int tp=initial_mask.and(class_mask).getnActive(); //true positives int fp=initial_mask.and(inverse_class_mask).getnActive(); //false positives int fn=class_mask.getnActive()-tp; //false negatives int tn=inverse_class_mask.getnActive()-fp; //true negatives double initial_card_value=theory_cost+Rule.getExceptionCost(train,tp,tn,fp,fn); //**************2.Main Loop****************************************************// Report best_report=new Report(initial_card,initial_length,initial_card_value); IncrementalMask current_mask=initial_mask; double current_value=initial_card_value; double t=(mu-Math.log(phi))*initial_card_value; //Initial temperature boolean success=true; int max_succeses=(int) 0.1*max_trials; for (int iter=0;iter<Nmax && success;iter++){ int nsuccesses=0; for (int trial=0;trial<max_trials && nsuccesses<max_succeses;trial++){ //*****Candidate generation********// int next=Randomize.Randint(0,rules.size()); int rule_index=all_rules[next]; IncrementalMask next_mask=null; double new_theory_cost=theory_cost; if(next<remained){ //next belongs to the "exclude rules section" so we include it next_mask=current_mask.plus(all_masks[rule_index]); new_theory_cost+=rules.getRule(rule_index).theoryDL(train); } else{ //next belongs to the "include rules section" so we exclude it next_mask=current_mask.minus(all_masks[rule_index]); new_theory_cost-=rules.getRule(rule_index).theoryDL(train); } //now it generates the stats for the candidate tp=next_mask.and(class_mask).getnActive(); //false positives fp=next_mask.and(inverse_class_mask).getnActive(); //true positives fn=class_mask.getnActive()-tp; //false negatives tn=inverse_class_mask.getnActive()-fp; //true negatives double next_value=new_theory_cost+Rule.getExceptionCost(train,tp,tn,fp,fn); //********Admission**************// double delta=next_value-current_value; double rand=Randomize.Rand(); if(next_value<current_value || rand<Math.exp(-delta/t)){ //current<-next if (next<remained){ //adding rule int aux = all_rules[next]; all_rules[next] = all_rules[remained - 1]; all_rules[remained - 1] = aux; remained--; current_mask=current_mask.plus(all_masks[rule_index]); } else{ //removing rule int aux = all_rules[next]; all_rules[next] = all_rules[remained]; all_rules[remained] = aux; remained++; current_mask=current_mask.minus(all_masks[rule_index]); } if (next_value<current_value){ nsuccesses++; success=true; } current_value=next_value; theory_cost=new_theory_cost; //current<best?=>best<-current if (current_value<best_report.getValue()){ int [] new_best_card=new int[rules.size()-remained]; for(int i=0;i<rules.size()-remained;i++){ new_best_card[i]=all_rules[remained+i]; } best_report=new Report(new_best_card,rules.size()-remained,current_value); } } } t=alpha*t; } //Construction of the final ruleset with the selected rules Ruleset selected_rules=new Ruleset(); selected_rules.setType(rules.getType()); for (int i=0;i<best_report.length();i++){ selected_rules.addRule(rules.getRule(best_report.get(i))); } return selected_rules; } /** * Sorts the rulesets according to the false positive value of each one, * and selects the default class. * @param rulesets Ruleset[] the rulesets to sort * @param all_ruleset_masks Masks[] the masks with the covered exemple of each ruleset * @return the default class */ public String sortingRulesets(Ruleset[] rulesets,Mask[] all_ruleset_masks){ //Sorting Mask filter=new Mask(train.size()); for (int i=0;i<train.numClasses()-1;i++){ int best_candidate=-1;int best_fp=train.size()+1; for (int j = i; j < train.numClasses(); j++) { int class_value = train.getClassAttribute().valueIndex(rulesets[j].getType()); Mask candidate_mask=all_ruleset_masks[class_value].and(filter); int candidate_fp=candidate_mask.and(inverse_class_filter[class_value]).getnActive(); if (candidate_fp<best_fp){ best_candidate=j; best_fp=candidate_fp; } } //Swap the best with the i-th position Ruleset aux=rulesets[i]; rulesets[i]=rulesets[best_candidate]; rulesets[best_candidate]=aux; int class_value =train.getClassAttribute().valueIndex(rulesets[i].getType()); filter=filter.and(all_ruleset_masks[class_value]).complement(); } //Substracting the last ruleset int class_value = train.getClassAttribute().valueIndex(rulesets[train.numClasses()-1].getType()); filter=filter.and(all_ruleset_masks[class_value]).complement(); //Selecting the default class int[] remained_class_frequency=train.getClassFequency(filter); int[] class_frequency=train.getClassFequency(); int higher_rel_freq=-1;int higher_freq=-1;int default_class=-1; for (int i=0;i<train.numClasses();i++){ if(remained_class_frequency[i]>higher_rel_freq){ higher_rel_freq=remained_class_frequency[i]; higher_freq=class_frequency[i]; default_class=i; } else if (remained_class_frequency[i]==higher_freq && class_frequency[i]>higher_freq){ higher_rel_freq=remained_class_frequency[i]; higher_freq=class_frequency[i]; default_class=i; } } return train.getClassAttribute().value(default_class); } /** * Runs the algorithm */ public void executeAlgorithm(){ //Phase Zero: Constructing the class filters class_filter=new Mask[train.numClasses()]; inverse_class_filter=new Mask[train.numClasses()]; for (int i=0;i<train.numClasses();i++){ class_filter[i]=new Mask(train.size()); String class_name=train.getClassAttribute().value(i); train.filterByClass(class_filter[i],class_name); inverse_class_filter[i]=class_filter[i].complement(); } System.out.println("1.Original Rules:"); //Phase One: Tree to Rules Vector rules=treeToRules(); for (int i=0;i<rules.size();i++) System.out.println((Rule) rules.elementAt(i)); System.out.println("2.Pruned Rules:"); //Phase Two: Prune rules for (int i=0;i<rules.size();i++){ pruneRule( (Rule) rules.elementAt(i)); } for (int i=0;i<rules.size();i++) System.out.println((Rule) rules.elementAt(i)); //Phase Three: Removing duplicates removeDuplicates(rules); System.out.println("3.Rules without duplicates:"); for (int i=0;i<rules.size();i++) System.out.println((Rule) rules.elementAt(i)); //Phase Four: Each rule in its right ruleset Ruleset[] rulesets=classifyRules(rules); System.out.println("4.Classified Rules:"); for (int i=0;i<train.numClasses();i++){ System.out.println("Ruleset "+rulesets[i].getType()+":"); for (int j=0;j<rulesets[i].size();j++) System.out.println(rulesets[i].getRule(j)+"->t:"+rulesets[i].getRule(j).theoryDL(train)); } //Phase Five: Getting the final subsets of rules Ruleset[] final_rulesets=new Ruleset[train.numClasses()+1]; for (int i=0;i<train.numClasses();i++){ if (rulesets[i].size()>0){ if (rulesets[i].size() < treshold) //Exhaustive Search final_rulesets[i] = exhaustiveSearch(rulesets[i]); else if (SearchAlgorithm == GREEDY) //Greedy Search final_rulesets[i] = greedySearch(rulesets[i]); else //Simulated Anneling Search final_rulesets[i] = simulatedAnnealing(rulesets[i], Nmax, max_trials,mu, phi, alpha); } else final_rulesets[i]=rulesets[i]; } System.out.println("5.Remaining Rules:"); for (int i=0;i<train.numClasses();i++){ if (final_rulesets[i]!=null){ System.out.println("Ruleset " + final_rulesets[i].getType() + ":"); for (int j = 0; j < final_rulesets[i].size(); j++) System.out.println(final_rulesets[i].getRule(j)); } } //Phase Six: Sorting the rulesets //Mask[][] all_masks=new Mask[rulesets.length][]; Mask[] all_ruleset_masks=new Mask[train.numClasses()]; for (int i=0;i<train.numClasses();i++){ int class_value = train.getClassAttribute().valueIndex(final_rulesets[i].getType()); Mask[] ruleset_mask=getAllMasks(final_rulesets[i]); all_ruleset_masks[class_value]=new Mask(train.size(),false); all_ruleset_masks[class_value]=all_ruleset_masks[class_value].or(ruleset_mask); } this.default_class_name=sortingRulesets(final_rulesets,all_ruleset_masks); System.out.println("6.Sorted Rules:"); for (int i=0;i<train.numClasses();i++){ System.out.println(i+"- Ruleset: "+final_rulesets[i].getType()); for (int j=0;j<final_rulesets[i].size();j++) System.out.println(final_rulesets[i].getRule(j)); } System.out.println("Clase por defecto: "+default_class_name); //Phase Seven: Polishing for (int i=0;i<train.numClasses();i++){ int class_value=train.getClassAttribute().valueIndex(final_rulesets[i].getType()); final_rulesets[i].pulish(train,class_filter[class_value],inverse_class_filter[class_value]); } System.out.println("7.Polish:"); for (int i=0;i<train.numClasses();i++){ System.out.println(i+"- Ruleset: "+final_rulesets[i].getType()); for (int j=0;j<final_rulesets[i].size();j++) System.out.println(final_rulesets[i].getRule(j)); } System.out.println("Clase por defecto: "+default_class_name); classification_rules=final_rulesets; Ruleset dflt=new Ruleset(); dflt.setType(default_class_name); classification_rules[train.numClasses()]=dflt; } /** * It launches the algorithm. */ public void execute() { //We do here the algorithm's operations this.executeAlgorithm(); //Classificates the datasets' entries, according the generated rulesets String[] classification_train=train.classify(classification_rules,classification_rules.length); String[] classification_val=val.classify(classification_rules,classification_rules.length); String[] classification_test=test.classify(classification_rules,classification_rules.length); //Finally we should fill the training and test output files doOutput(this.val, this.outputTr, classification_val); doOutput(this.test, this.outputTst, classification_test); doRulesOutput2(this.outputRules,classification_rules); System.out.println("Algorithm Finished"); } }