/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: RISE.java
*
* The RISE Algorithm.
* It induces a list of classification rules unifying two approaches:
* instance-based learning and rule induction.
*
* @author Written by Joaquin Derrac (University of Granada) 8/7/2009
* @author Modified by Joaquin Derrac (University of Granada) 17/10/2009
* @version 1.2
* @since JDK1.5
*
*/
package keel.Algorithms.Hyperrectangles.RISE;
import java.util.StringTokenizer;
import org.core.*;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Algorithms.Hyperrectangles.Basic.HyperrectanglesAlgorithm;
public class RISE extends HyperrectanglesAlgorithm{
private int Q; //SVDM parameter
private int S; //distance measure. Set S=2 for Euclidean distance
int classVotes[];
Rule ruleset[];
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public RISE (String script) {
readDataFiles(script);
//Naming the algorithm
name="RISE";
Rule.setQ(Q);
Rule.setS(S);
Rule.setSize(inputAtt);
Rule.setAttributes(inputs);
Rule.setNClasses(nClasses);
for(int i=0;i<inputAtt;i++){
if(inputs[i].getType()==Attribute.NOMINAL){
Rule.setNumValue(Attributes.getInputAttribute(i).getNumNominalValues(),i);
}
else{
Rule.setNumValue(1,i);
}
}
Rule.loadSVDMmatrix(trainData,trainOutput);
classVotes=new int[nClasses];
ruleset=new Rule[trainData.length];
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* Reads configuration script, to extract the parameter's values.
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Fichero.leeFichero (script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//Getting the Q SVDM parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
Q = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the S parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
S = Integer.parseInt(tokens.nextToken().substring(1));
}//end-method
/**
* Extract the rules from the training set. This is the main part of the
* RISE algorithm.
*/
public void getRules(){
boolean improvement;
int index;
double minDist,auxDist;
Rule aux;
double newDist;
int points;
boolean duplicates[];
int utilRule;
int pointer;
Rule newset[];
for(int i=0;i<trainData.length;i++){
ruleset[i]=new Rule(trainData[i],trainOutput[i]);
computeLaplaceAcc(ruleset[i]);
}
improvement=true;
while(improvement){
improvement=false;
//For each rule
for(int i=0;i<ruleset.length;i++){
//find the nearest example of its class not already covered
index=-1;
minDist=Double.MAX_VALUE;
for(int j=0;j<trainData.length;j++){
if(trainOutput[j]==ruleset[i].getOutput()){
if(ruleset[i].inside(trainData[j])==false){
auxDist=ruleset[i].distance(trainData[j]);
if(auxDist<minDist){
minDist=auxDist;
index=j;
}
}
}
}
//if a example is found
if(index>-1){
aux=ruleset[i].clone();
aux.mostSpecificGeneralization(trainData[index]);
computeLaplaceAcc(aux);
//compute accuracy change
points=0;
for(int j=0;j<trainData.length;j++){
newDist=aux.distance(trainData[j]);
if(newDist==0){
points+=tryClassification(j,i,aux);
}
}
//apply changes
if(points>=0){
improvement=true;
//change rule[i] for aux
ruleset[i]=aux.clone();
//discard duplicates
duplicates=new boolean [ruleset.length];
utilRule=0;
for(int j=0;j<ruleset.length;j++){
if((ruleset[j].equals(aux))&&(j!=i)){
duplicates[j]=true;
}
else{
duplicates[j]=false;
utilRule++;
}
}
if(utilRule!=ruleset.length){
newset=new Rule[utilRule];
pointer=0;
for(int j=0;j<ruleset.length;j++){
if(duplicates[j]==false){
newset[pointer]=ruleset[j].clone();
pointer++;
}
}
ruleset=new Rule[utilRule];
for(int j=0;j<ruleset.length;j++){
ruleset[j]=newset[j].clone();
}
}//end if-resize
}
}//end if-found
}//end-for
}//end-while
}//end-method
/**
* Tests the classification status of a given instance when an old rule is
* replaced by a new rule.
*
* @param instance Instance to be tested
* @param oldRule Rule to be removed
* @param aux New rule
* @return 1 if classification is improved with the new rule, -1 if the
* classification get worse, 0 if it remains equaly.
*/
private int tryClassification(int instance,int oldRule,Rule aux){
int oldOutput,newOutput;
Rule save;
//get old output
oldOutput=evaluate(trainData[instance]);
save=ruleset[oldRule].clone();
ruleset[oldRule]=aux.clone();
//get new output
newOutput=evaluate(trainData[instance]);
ruleset[oldRule]=save.clone();
if(oldOutput==trainOutput[instance]){
if(newOutput==trainOutput[instance]){
return 0;
}
else{
return -1;
}
}
else{
if(newOutput==trainOutput[instance]){
return 1;
}
else{
return 0;
}
}
}//end-method
/**
* Computes the Laplace Accuracy of a rule, as a measure of its quality
*
* @param aux Rule to be analized
*/
private void computeLaplaceAcc(Rule aux){
int pos=0;
double acc;
for(int i=0;i<trainData.length;i++){
if(aux.inside(trainData[i])){
pos++;
}
}
acc=(double)(pos+1.0)/(double)(trainData.length+nClasses);
aux.setLaplaceAcc(acc);
}//end-method
/**
* Classifies an instance using the ruleset
*
* @param instance Instance to classify
* @return Class assigned to the instance
*/
protected int evaluate(double [] instance){
int max;
int maxVotes=Integer.MIN_VALUE;
double maxAcc=Double.MIN_VALUE;
int selected=-1;
boolean draw;
double minDist=Double.MAX_VALUE;
draw=false;
for(int i=0;i<ruleset.length;i++){
if(ruleset[i].distance(instance)==minDist){
if(ruleset[i].getLaplaceAcc()>maxAcc){
maxAcc=ruleset[i].getLaplaceAcc();
selected=i;
draw=false;
}
if(ruleset[i].getLaplaceAcc()==maxAcc){
draw=true;
}
}
if(ruleset[i].distance(instance)<minDist){
minDist=ruleset[i].distance(instance);
maxAcc=ruleset[i].getLaplaceAcc();
selected=i;
draw=false;
}
}
selected=ruleset[selected].getOutput();
if(draw){
for(int i=0;i<nClasses;i++){
classVotes[i]=0;
}
for(int i=0;i<ruleset.length;i++){
if(ruleset[i].distance(instance)==minDist){
if(ruleset[i].getLaplaceAcc()==maxAcc){
classVotes[ruleset[i].getOutput()]++;
}
}
}
max=-1;
for(int i=0;i<nClasses;i++){
if(maxVotes<classVotes[i]){
max=classVotes[i];
max=i;
}
}
selected=max;
}
return selected;
}//end-method
/**
* Writes the final ruleset obtained, in the ruleSetText variable.
*
* @return The number of rules of the final rule set
*/
protected int writeRules(){
String text="";
text+="\n";
for(int i=0;i<ruleset.length;i++){
text+="\n";
text+=ruleset[i];
}
ruleSetText=text;
return ruleset.length;
}
} //end-class