/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Antonio Alejandro Tortosa (University of Granada) 15/10/2008
* @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008
* @version 1.1
* @since JDK1.2
* </p>
*/
package keel.Algorithms.Rule_Learning.PART;
import java.util.Vector;
public class Ruleset {
/**
* <p>
* Representation of a disjuction of rules with a common consecuent.
* It may be represented as: <b>if (rule1 || rule2) then output=consecuent<\b>
* </p>
*/
//set of rules
private Vector rules;
//class (consecuent)
private String type;
/**
*
* Constructs an empty ruleset.
*/
public Ruleset() {
rules=new Vector();
}
/**
* Adds a new rule to the ruleset.
* @param r Rule the new rule
*/
public void addRule(Rule r){
rules.add(r);
}
/**
* It returns the number of true positives,true negatives,false positives and false negatives of the whole ruleset in a given dataset.
* This methods takes into account the right part (consecuent) of the rules
* @param data MyDataset the dataset
* @return number of true positives, false positives, true negatives and false negatives of the whole ruleset in the following order: {tp,tn,fp,fn}
*/
public Stats apply(MyDataset data){
//int tp,tn,fp,fn;
Stats stats=new Stats();
//It splits the positive and negative instances according to the consecuent
Mask positives=new Mask(data.size());
data.filterByClass(positives,type);
Mask negatives=positives.complement();
int npositives=positives.getnActive();
int nnegatives=negatives.getnActive();
for (int i=0;i<rules.size();i++){
//it extracts the instances covered by the i-th rule of the ruleset
data.substract(positives,(Rule) rules.elementAt(i));
data.substract(negatives,(Rule) rules.elementAt(i));
}
stats.fn=positives.getnActive(); //what remains are false positives
stats.tp=npositives-stats.fn; //true positives
stats.tn=negatives.getnActive(); //true negatives
stats.fp=nnegatives-stats.tn; //false negatives
return stats;
}
/**
* It returns the number of true positives,true negatives,false positives and false negatives of the whole ruleset in a given dataset.
* (This methods doesn't take into account the right part (consecuent) of the rules).
* @param data MyDataset the dataset
* @param positives active positive instances of data
* @param negatives active negative instances of data
* @return number of true positives, false positives, true negatives and false negatives of the whole ruleset in the following order: {tp,tn,fp,fn}
*/
public Stats apply(MyDataset data,Mask positives,Mask negatives){
Stats stats=new Stats();
int npositives=positives.getnActive();
int nnegatives=negatives.getnActive();
Mask p=positives.copy();
Mask n=negatives.copy();
for (int i=0;i<rules.size();i++){
//it extracts the instances covered by the i-th rule of the ruleset
data.substract(p,(Rule) rules.elementAt(i));
data.substract(n,(Rule) rules.elementAt(i));
}
stats.fn=p.getnActive(); //what remains are false positives
stats.tp=npositives-stats.fn; //true positives
stats.tn=n.getnActive(); //true negatives
stats.fp=nnegatives-stats.tn; //false negatives
return stats;
}
/**
* Returns the Minimum Data Length of a dataset given a theory (this ruleset). See [Quinlan95]
* @param data MyDataset the datasets
* @param positives Mask active positive entries of data
* @param negatives Mask active negative entries of data
* @return the MDL of data given this ruleset.
*/
public double getMDL(MyDataset data,Mask positives,Mask negatives){
Stats quartet=apply(data,positives,negatives);
double tp=quartet.tp,tn=quartet.tn,fp=quartet.fp,fn=quartet.fn;
double tp_prob,tn_prob,fp_prob,fn_prob;
double U=tn+fn,C=tp+fp; //uncovered & covered cases
double D=U+C,e=fn+fp;
double mdl=Double.MAX_VALUE;
if(C!=0 && U!=0){
if (C >= U){
/*mdl = Math.log(D + 1)
+ fp * ( -Math.log(e / (2 * C)))
+ (C - fp) * ( -Math.log(1 - (e / (2 * C))))
+ fn * ( -Math.log(fn / U))
+ (U - fn) * ( -Math.log(1 - fn / U));
*/
double aux_prob1=e/(2*C);
double aux_prob2=fn/U;
tp_prob=(1-aux_prob1==0)?tp:tp*(-Math.log(1-aux_prob1));
fp_prob=(aux_prob1==0)?fp:fp*(-Math.log(aux_prob1));
tn_prob=(1-aux_prob2==0)?tn:tn*(-Math.log(1-aux_prob2));
fn_prob=(aux_prob2==0)?fp:fp*(-Math.log(aux_prob2));;
}
else{
/* mdl = Math.log(D + 1)
+ fn * ( -Math.log(e / (2 * U)))
+ (U - fn) * ( -Math.log(1 - e / (2 * U)))
+ fp * ( -Math.log(fp / C))
+ (C - fp) * ( -Math.log(1 - (fp / C)));
*/
double aux_prob1=fp/C;
double aux_prob2=e/(2*U);
tp_prob=(1-aux_prob1==0)?tp:tp*(-Math.log(1-aux_prob1));
fp_prob=(aux_prob1==0)?fp:fp*(-Math.log(aux_prob1));
tn_prob=(1-aux_prob2==0)?tn:tn*(-Math.log(1-aux_prob2));
fn_prob=(aux_prob2==0)?fp:fp*(-Math.log(aux_prob2));
}
mdl = Math.log(D + 1) + tp_prob + tn_prob + fp_prob + fn_prob;
}
return mdl;
}
/**
* Returns the Minimum Data Length of a dataset given a theory (this ruleset). See [Quinlan95]
* @param data MyDataset the datasets
* @return the MDL of data given this ruleset.
*/
public double getMDL(MyDataset data){
Mask positives=new Mask(data.size());
data.filterByClass(positives,this.type);
Mask negatives=positives.complement();
return getMDL(data,positives,negatives);
}
/**
* Returns the rule in the i-th position of the ruleset.
* @param pos int position of the rule in the ruleset
* @return the rule in the pos-th position of the ruleset.
*/
public Rule getRule(int pos){
return (Rule) rules.elementAt(pos);
}
/**
* Returns the common output (consecuent) of the rules in the ruleset.
* @return the common output (consecuent) of the rules in the ruleset.
*/
public String getType(){
return type;
}
/**
* Inserts a new rule in a given position of the ruleset.
* @param r Rule the new rule
* @param pos int the position where r must be inserted
*/
public void insertRule(Rule r,int pos){
rules.insertElementAt(r,pos);
}
/**
* Deletes a given rule of the ruleset.
* @param pos int position of the rule in the ruleset.
*/
public void removeRule(int pos){
rules.remove(pos);
}
/**
* Sets the common output (consecuent) of the rules in the ruleset.
* @param type String the common output (consecuent) of the rules in the ruleset.
*/
public void setType(String type){
this.type=type;
}
/**
* Returns the size (number of rules) of the ruleset.
* @return the size (number of rules) of the ruleset.
*/
public int size(){return rules.size();}
/**
* Returns a string representation of this Ruleset, containing the String representation of each Rule.
* @return a string representation of this Ruleset, containing the String representation of each Rule.
*/
public String toString(){
String output="";
for (int i=0;i<rules.size();i++)
output+=((Rule) rules.elementAt(i)).toString()+" -> "+type+"\n";
return output;
}
}