/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Antonio Alejandro Tortosa (University of Granada) 15/10/2008
* @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008
* @version 1.4
* @since JDK1.2
* </p>
*/
package keel.Algorithms.Rule_Learning.PART;
import java.util.Vector;
import org.core.Fichero;
class PART {
/**
* <p>
* Class to implement the PART algorithm
* </p>
*/
//Inputs & Outputs
//the datasets for training, validation and test
MyDataset train, val, test;
//the names for the output files
String outputTr, outputTst, outputRules;
//General
//generated rules
private Vector classification_rules;
//name of the default class
private String default_class_name;
//Options
//Confidence level for prunning
private double CF;
//Minimum number of itemsets per leaf
private int minItemsets;
/********************************PRIVATE METHODS************************************/
/**
* Extract recursively the rules out of a tree
* @param node Tree the current node in cosideration
* @param base_rule Rule the rule that generates the father of the node
* @param link_to_father SimpleRule the simple rule that connects the node with its father
* @param type String the class label for this node (only if the node is a leaf)
* @return an array with all the rules extracted from the leafs of the subtree for wich the node is root
*/
private Vector convert(Tree node,Rule base_rule,SimpleRule link_to_father,String type){
//1.Producing of the rule linked to this node: node rule <- father rule + path rule
Rule node_rule=base_rule.getCopy();
if (link_to_father!=null)
node_rule.grow(link_to_father);
//2.Checking wether this node is a leaf
Vector output=new Vector();
if (node!=null && !node.isUnexplored){
if (node.isLeaf) {
//2.A If so, adding the node rule to the ruleset
node_rule.setType(type);
output.add(node_rule);
}
else {
//2.B Else, compacting the ruleset linked to the subtrees
int cut_attribute = node.nodeModel.attributeIndex();
for (int i = 0; i < node.getNChildren(); i++) {
SimpleRule link_child = new SimpleRule();
link_child.setAttribute(cut_attribute);
if (train.getAttribute(cut_attribute).isDiscret()) {
link_child.setValue(i);
link_child.setOperator(SimpleRule.EQUAL);
}
else {
link_child.setValue(node.nodeModel.getCutPoint());
if (i == 0)
link_child.setOperator(SimpleRule.LOWER);
else
link_child.setOperator(SimpleRule.GREATER);
}
String child_type = "";
if (node.getChild(i)!=null && !node.isUnexplored){
if ( node.getChild(i).isLeaf)
child_type = node.nodeModel.label(i, train);
Vector child_rules = convert(node.getChild(i), node_rule,
link_child,
child_type);
output.addAll(child_rules);
}
}
}
}
return output;
}
/**
* It generates the output file from a given dataset and stores it in a file.
* @param dataset myDataset input dataset
* @param filename String the name of the file
* @param classification String[] gererated classification of the dataset
*/
private void doOutput(MyDataset dataset, String filename,String[] classification) {
String output = new String("");
output = dataset.copyHeader(); //we insert the header in the output file
//We write the output for each example
for (int i = 0; i < dataset.size(); i++) {
String class_name=dataset.getAttribute(dataset.classIndex).value((int)dataset.itemset(i).getClassValue());
output += class_name + " " +classification[i] + "\n";
}
Fichero.escribeFichero(filename, output);
}
/**
* It generates the output rules file from a given ruleset and stores it in a file
* @param filename String the name of the file
* @param rulesets Rulesets[] the rulesets (one for each class)
*/
private void doRulesOutput(String filename,Ruleset[] rulesets) {
String output = new String("");
for (int i=0;i<rulesets.length-1;i++){
output+="if(";
for(int j=0;j<rulesets[i].size();j++){
Rule current=rulesets[i].getRule(j);
output+="(";
for (int k=0;k<current.size();k++){
output+=current.getSimpleRule(k);
if (k!=current.size()-1) output+=" && ";
}
output+=")";
if (j!=rulesets[i].size()-1) output+=" || ";
}
output+=")\n\t";
output+="output="+rulesets[i].getType()+"\nelse ";
}
output+="\n\toutput="+rulesets[rulesets.length-1].getType();
Fichero.escribeFichero(filename, output);
}
/**
* It generates the output rules file from a given ruleset and stores it in a file
* @param filename String the name of the file
* @param rules Vector the rules
*/
private void doRulesOutput(String filename,Vector rules) {
String output = new String("");
output += "@Number of Rules: "+rules.size()+"\n"; //NUEVO
for (int i=0;i<rules.size()-1;i++){
output+="if(";
Rule current=(Rule) rules.elementAt(i);
for (int k=0;k<current.size();k++){
output+=current.getSimpleRule(k);
if (k!=current.size()-1) output+=" && ";
}
Mask class_filter=new Mask(train.size());
train.filterByClass(class_filter,((Rule) rules.elementAt(i)).getType());
int covered=current.apply(train);
int accuracy=current.apply(train,class_filter);
output+=") ("+accuracy+"/"+covered+")\n\t";
output+="output="+((Rule) rules.elementAt(i)).getType()+"\nelse ";
}
output+="\n\toutput="+((Rule)rules.lastElement()).getType();
Fichero.escribeFichero(filename, output);
}
/************************************************************************************/
/**
* Constructor
* @param root Tree the C45 tree from wich the algorithm will extract the rules
* @param train MyDataset the training dataset from wich the tree has been extracted
* @param seed long seed for the random numbers generator
* @param CF double Confidence level for prunning
* @param treshold int maximum size of a ruleset for wich the Exhaustive Search is used
*/
/* public C45Rules(Tree root,MyDataset train,long seed,double CF,int treshold){
this.root=root;
this.data=train;
this.treshold=treshold;
Randomize.setSeed(seed);
classification_rules=null;
default_class_name=null;
SearchAlgorithm=GREEDY;
this.CF=CF;
}*/
/**
* Constructor for Simulated Annealig Option
* @param root Tree the C45 tree from wich the algorithm will extract the rules
* @param train MyDataset the training dataset from wich the tree has been extracted
* @param seed long seed for the random numbers generator
* @param treshold int maximum size of a ruleset for wich the Exhaustive Search is used
* @param CF double Confidence level for prunning
* @param Nmax int number of coolings
* @param max_trials int max number of neighbours
* @param mu double a param between [0,1] for establishing the initial temperature
* @param phi double a param between [0,1] for establishing the initial temperature
* @param alpha double a param between [0,1] that determine the speed of cooling
*/
/* public C45Rules(Tree root,MyDataset train,long seed,double CF,int treshold, int Nmax,int max_trials,double mu,double phi,double alpha){
this.root=root;
this.data=train;
this.treshold=treshold;
Randomize.setSeed(seed);
classification_rules=null;
default_class_name=null;
SearchAlgorithm=SA;
this.CF=CF;
}*/
/**
* Constructor
* @param parameters parseParameters the algorithm's parameters
*/
public PART(parseParameters parameters){
//Files
String trainFileName=parameters.getTrainingInputFile();
String valFileName=parameters.getValidationInputFile();
String testFileName=parameters.getTestInputFile();
System.out.println(trainFileName);
System.out.println(valFileName);
System.out.println(testFileName);
train = new MyDataset(trainFileName,true);
val = new MyDataset(valFileName,false);
test = new MyDataset(testFileName,false);
outputTr = parameters.getTrainingOutputFile();
outputTst = parameters.getTestOutputFile();
outputRules = parameters.getOutputFile(0);
//Options
CF=Double.parseDouble(parameters.getParameter(0)); //confidence level for the uniform distribution
minItemsets = Integer.parseInt(parameters.getParameter(1)); //itemset per Leaf
if (CF < 0 || CF > 1) {
CF = 0.25F;
System.err.println("Error: confidence must be in the interval [0,1]");
System.err.println("Using default value: 0.25");
}
if (minItemsets <= 0) {
minItemsets = 2;
System.err.println("Error: itemsetPerLeaf must be greater than 0");
System.err.println("Using default value: 2");
}
classification_rules=null;
default_class_name=null;
}
/**
* It coverts a given C4.5 tree into an array of rules.
* @param tree the C45 tree
* @return an array of rules
*/
public Vector treeToRules(Tree tree){
Vector output=null;
if (!tree.isLeaf){
output = convert(tree, new Rule(),null,"");
}
else
output = new Vector();
return output;
}
/**
* Removes the duplicated rules from a vector of rules
* @param rules Vector vector of rules
*/
public void removeDuplicates(Vector rules){
for (int i=0;i<rules.size();i++){
Rule current = (Rule) rules.elementAt(i);
if (current.size()!=0){
for (int j = i + 1; j < rules.size(); j++) {
if (current.isEqual( (Rule) rules.elementAt(j))) {
rules.remove(j);
j--;
}
}
}
else{
rules.remove(i);
i--;
}
}
}
/**
* Runs the algorithm
* @param remained_data the dataset
* @throws Exception if there are problems with the algorithm
*/
public void executeAlgorithm(MyDataset remained_data) throws Exception{
classification_rules=new Vector();
boolean end=false;
while (remained_data.numItemsets()>2*minItemsets+1 && !end){
C45 subtree=null;
//Get the subtree
subtree = new C45(remained_data, true, (float) CF, minItemsets);
System.out.println("The partial tree\n"+subtree);
//Get the rules
Vector rules=treeToRules(subtree.getTree());
System.out.println("The rules");
for(int i=0;i<rules.size();i++)
System.out.println((Rule)rules.elementAt(i));
//Get the best rule (coverage heuristic)
int best_rule=-1;
int best_value=-1;
for(int i=0;i<rules.size();i++){
int curr_value=((Rule) rules.elementAt(i)).apply(remained_data);
if (curr_value>best_value){
best_rule=i;
best_value=curr_value;
}
}
//Add the best rule
//******************************************************
//Julian - instead of testing if best_rule is not -1,
//we test if it is greater than 0, since a 0 value will implies
//no example is covered by the best rule, and then the C4.5 tree
//is either capable of build a tree which covers any example.
//The original condition if (best_rule!=-1){ will produce infinite loop
//if this scenary occurs, since no remaining example is covered, we allways
//have the same set of remaining examples, for which C4.5 produces the same tree
//which is not capable of cover any example :(
//******************************************************
if (best_rule>0){
classification_rules.add( (Rule) rules.elementAt(best_rule));
//Remove the exemples covered by the rule
MyDataset[] division = remained_data.split( (Rule) rules.elementAt(
best_rule));
remained_data = division[1]; //Uncovered exemples
}
else
end=true;
System.out.println("The Final rules");
for(int i=0;i<classification_rules.size();i++)
System.out.println((Rule)classification_rules.elementAt(i));
}
//Choosing a default rule
default_class_name=remained_data.getMostFrequentClass();
Rule default_rule=new Rule();
default_rule.setType(default_class_name);
classification_rules.add(default_rule);
}
/**
* It launches the algorithm.
* @throws Exception if there are problems with the algorithm
*/
public void execute() throws Exception{
//We do here the algorithm's operations
this.executeAlgorithm(train);
//Classificates the datasets' entries, according the generated rulesets
String[] classification_train=train.classify(classification_rules);
String[] classification_val=val.classify(classification_rules);
String[] classification_test=test.classify(classification_rules);
//Finally we should fill the training and test output files
doOutput(this.val, this.outputTr, classification_val);
doOutput(this.test, this.outputTst, classification_test);
doRulesOutput(this.outputRules,classification_rules);
System.out.println("Algorithm Finished");
}
}