/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: INNER.java
*
* The INNER Algorithm.
* It proceeds by selecting a random set of initial examples and inflating them to rules.
* The rules obtained are postprocessed to obtain a suitable set, read to classify
* new instaces based on both rule and distances approaches.
*
* @author Written by Joaquin Derrac (University of Granada) 8/7/2009
* @version 1.1
* @since JDK1.5
*
*/
package keel.Algorithms.Hyperrectangles.INNER;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.StringTokenizer;
import org.core.*;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Algorithms.Hyperrectangles.Basic.HyperrectanglesAlgorithm;
public class INNER extends HyperrectanglesAlgorithm{
private int initialInstances;
private int maxCycles;
private int cycles;
private int minExamples;
private int minPresentations;
private int presentations;
private int regularize;
private double minCoverage;
private double selectThreshold;
private boolean covered [];
private boolean instancesSelected [];
private Rule newRules[];
private Rule ruleset[];
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public INNER (String script) {
readDataFiles(script);
//Naming the algorithm
name="INNER";
denormalizeData();
Rule.setSize(inputAtt);
Rule.copyData(trainData,trainOutput);
Rule.setNClasses(nClasses);
Rule.setNInstances(nInstances);
for(int i=0;i<inputAtt;i++){
if(inputs[i].getType()==Attribute.NOMINAL){
Rule.setNumValue(Attributes.getInputAttribute(i).getNumNominalValues(),i);
}
else{
Rule.setNumValue(0,i);
}
}
Rule.computeDefaultImpurityLevels();
covered= new boolean [trainData.length];
ruleset= new Rule[0];
instancesSelected= new boolean [trainData.length];
cycles=0;
minExamples=Integer.MAX_VALUE;
for(int i=0;i<nInstances.length;i++){
if((nInstances[i]<minExamples)&&(nInstances[i]!=0)){
minExamples=nInstances[i];
}
}
Randomize.setSeed(seed);
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* Reads configuration script, to extract the parameter's values.
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Files.readFile(script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//Getting the seed
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
seed = Long.parseLong(tokens.nextToken().substring(1));
//Getting the Initial instances parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
initialInstances = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the Max Cycles parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
maxCycles = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the Min Coverage parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
minCoverage = Double.parseDouble(tokens.nextToken().substring(1));
//Getting the Min Presentations parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
minPresentations = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the regularize parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
regularize = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the select threshold parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
selectThreshold = Double.parseDouble(tokens.nextToken().substring(1));
}//end-method
/**
* The core of INNER algorithm. It obtains the rules needed to clasify.
*/
public void getRules(){
Arrays.fill(covered, false);
while(continueInflating()){
newRules=findBestRules();
ruleset=merge(ruleset,newRules);
computeCovered();
cycles+=1;
}
}//end-method
/**
* Prints the current ruleset selected
*/
private void printRuleset(){
System.out.println("********************************************");
for(int i=0;i<ruleset.length;i++){
System.out.println(ruleset[i]);
}
}//end-method
/**
* Mark which instances are already covered by the rule set.
*/
private void computeCovered(){
Arrays.fill(covered, false);
for(int i=0;i <trainData.length;i++){
for(int j=0;j<ruleset.length&&!covered[i];j++){
if(trainOutput[i]==ruleset[j].getOutput()){
covered[i]=ruleset[j].inside(trainData[i]);
}
}
}
}//end-method
/**
* Test if it is necessary to continue the inflating process
*
* @return True to continue, false if not
*/
private boolean continueInflating(){
if(cycles>=maxCycles){
return false;
}
if(coverage()>minCoverage){
return false;
}
return true;
}//end-method
/**
* Computes the coverage rate of the current rule set
*
* @return Coverage rate
*/
private double coverage(){
double count=0.0;
for(int i=0;i<covered.length;i++){
if(covered[i]){
count+=1.0;
}
}
count=count/(double)covered.length;
return count;
}//end-method
/**
* Merge two rule sets.
*
* @param a First rule set
* @param b Second rule set
*
* @return Final rule set
*/
private Rule [] merge(Rule a[],Rule b[]){
Rule result []= new Rule [((a.length)+(b.length))];
for(int i=0;i<a.length;i++){
result[i]=a[i].clone();
}
for(int i=0;i<b.length;i++){
result[i+a.length]= b[i].clone();
}
return result;
}//end-method
/**
* Gets the initial random subsets of rules from the training data
*
*
* @return Initial rule set
*/
private Rule [] findBestRules(){
int number;
Rule bestRules[];
Rule finalRules[];
int indexes [];
int needed[]= new int [nClasses];
int output;
int pointer;
int nSelected=0;
//select initial instances
number=selectNumberOfExamples();
Arrays.fill(needed, number);
indexes=generateIndex(instancesSelected.length);
for(int i=0;i<instancesSelected.length;i++){
pointer=indexes[i];
output=trainOutput[pointer];
if((needed[output]>0)&&(covered[pointer]==false)){
instancesSelected[pointer]=true;
needed[output]--;
nSelected++;
}
else{
instancesSelected[pointer]=false;
}
}
bestRules= new Rule[nSelected];
pointer=0;
for(int i=0;i<instancesSelected.length;i++){
if(instancesSelected[i]){
bestRules[pointer]=new Rule(trainData[i],trainOutput[i]);
pointer++;
}
}
bestRules=generalizeInstances(bestRules);
finalRules=pruneConditions(bestRules);
return finalRules;
}//end-method
/**
* Generalizes the initial rules, by inflating them trying to cover all the instances in the training set
*
* @param bestRules Initial rule set
*
* @return Final rule set
*/
private Rule [] generalizeInstances(Rule [] bestRules){
int indexes [];
int instance;
int nearestRule;
double learningRate, sigmoid, percentage;
double distance;
Rule otherRules [];
presentations=0;
indexes=new int [trainData.length];
otherRules=new Rule [bestRules.length];
for(int i=0;i<bestRules.length;i++){
otherRules[i]=bestRules[i].clone();
}
while(presentations<minPresentations){
indexes=generateIndex(indexes.length);
for(int i=0;i< indexes.length;i++){
instance=indexes[i];
nearestRule=findNearestRule(trainData[instance],otherRules);
//generalize rule
distance=otherRules[nearestRule].distance(trainData[instance]);
//compute general terms (numerical)
learningRate=0.75*(1.0+((double)presentations/(double)minPresentations));
sigmoid=1.0/(1.0+(Math.pow(Math.E, 20.0*distance-5.0)));
percentage=learningRate*sigmoid;
//compute general terms (nominal)
learningRate=0.675*(1.0-((double)presentations/(double)minPresentations));
sigmoid=1.0/(1.0+(Math.pow(Math.E, 3.0*distance-5.0)));
percentage=learningRate*sigmoid;
for(int j=0;j<trainData[instance].length;j++){
//generalize nominal attribute
if(inputs[j].getType()==Attribute.NOMINAL){
otherRules[nearestRule].generalizeNominal(j,trainData[instance][j],percentage,trainOutput[instance]);
}
//generalize continuous attribute
else{
otherRules[nearestRule].generalizeContinuous(j,trainData[instance][j],percentage,trainOutput[instance]);
}
}
if(presentations<minPresentations){
presentations++;
}
//regularize
if(presentations%regularize==0){
for(int k=0;k<otherRules.length;k++){
otherRules[k].computeImpurityLevel();
if(bestRules[k].getImpurityLevel()>otherRules[k].getImpurityLevel()){
bestRules[k]=otherRules[k].clone();
}
}
//get the new set of rules
for(int k=0;k<bestRules.length;k++){
otherRules[k]=bestRules[k].clone();
}
}
}//end-for
}//end-while
//regularize
for(int k=0;k<otherRules.length;k++){
otherRules[k].computeImpurityLevel();
if(bestRules[k].getImpurityLevel()>otherRules[k].getImpurityLevel()){
bestRules[k]=otherRules[k].clone();
}
}
return bestRules;
}//end-method
/**
* Finds the nearest rule of an example
*
* @param example A instance
* @param rules Current rule set
*
* @return Identifier of the nearest rule
*/
private int findNearestRule(double example[],Rule rules []){
double aux;
int result=-1;
double dist= Double.MAX_VALUE;
double impurity= Double.MAX_VALUE;
for(int i=0;i<rules.length;i++){
aux=rules[i].distance(example);
if(dist>aux){
dist=aux;
impurity=rules[i].getImpurityLevel();
result=i;
}
if((dist==aux)&&(impurity>rules[i].getImpurityLevel())){
dist=aux;
impurity=rules[i].getImpurityLevel();
result=i;
}
}
return result;
}//end-method
/**
* Finds the nearest rule of an example, without considering a given rule
*
* @param example A instance
* @param rules Current rule set
* @param rule Rule to be avoided
*
* @return Identifier of the nearest rule
*/
private int findNearestRuleWithout(double example[],Rule rules [],int rule){
double aux;
int result=-1;
double dist= Double.MAX_VALUE;
double impurity= Double.MAX_VALUE;
for(int i=0;i<rules.length;i++){
aux=rules[i].distance(example);
if(i==rule){
aux=Double.MAX_VALUE;
}
if(dist>aux){
dist=aux;
impurity=rules[i].getImpurityLevel();
result=i;
}
if((dist==aux)&&(impurity>rules[i].getImpurityLevel())){
dist=aux;
impurity=rules[i].getImpurityLevel();
result=i;
}
}
return result;
}//end-method
/**
* Prune conditions of the current rule set, to improve its generalizatio capabilities
*
* @param bestRules Current rule set
*
* @return Final rule set
*/
private Rule [] pruneConditions(Rule [] bestRules){
boolean copy[];
int size,pointer=0;
Rule prunedRules[];
for(int i=0;i<bestRules.length;i++){
bestRules[i].pruneConditions();
}
size=bestRules.length;
copy=new boolean[size];
Arrays.fill(copy, true);
//remove inner rules
for(int i=0;i<bestRules.length;i++){
for(int j=0;j<bestRules.length&©[j];j++){
if((i!=j)&&(bestRules[i].contains(bestRules[j]))){
copy[j]=false;
size--;
}
}
}
prunedRules=new Rule[size];
for(int i=0;i<bestRules.length;i++){
if(copy[i]){
prunedRules[pointer]=bestRules[i].clone();
pointer++;
}
}
return prunedRules;
}//end-method
/**
* Generates a randomized list of indexes, from 0 to an especified limit
*
* @param top Limit (not included)
*
* @return The list of indexes
*/
private int [] generateIndex(int top){
int indexes [] = new int [top];
int aux1,aux2,aux;
for(int i=0;i<top;i++){
indexes[i]=i;
}
for(int i=0;i<top;i++){
aux1=Randomize.Randint(0, top);
aux2=Randomize.Randint(0, top);
aux=indexes[aux1];
indexes[aux1]=indexes[aux2];
indexes[aux2]=aux;
}
return indexes;
}//end-method
/**
* Computes the number of examples to be selected
*
* @return Number of examples
*/
private int selectNumberOfExamples(){
int result;
int value;
value= (int)Math.floor(Math.pow(Math.E,(double)minExamples/(50.0/(Math.log(10.0)))));
if(value<1){
value=1;
}
result=Math.min(initialInstances, value);
return result;
}//end-method
/**
* Performs the postprocessing phase of INNER
*
*/
public void postProcess(){
firstGeneralize();
selection();
secondGeneralize();
computeCovered();
if(coverage()<100.0){
finalCoverage();
}
}//end-method
/**
* First generalization process. It tryes to extend the rules, without allowing intersections
*
*/
private void firstGeneralize(){
ArrayList<Pair> list;
Pair actual;
boolean intersect;
Rule aux[];
Rule extended;
int index;
for(int k=0;k<nClasses;k++){
list=computeExtensibleList(k);
for(int i=0;i<list.size();i++){
actual=list.get(i);
extended=ruleset[actual.A()].testExtension(ruleset[actual.B()]);
if(extended!=null){
intersect=false;
for(int l=0;l<ruleset.length&&!intersect;l++){
if(extended.getOutput()!=ruleset[l].getOutput()){
intersect=extended.intersect(ruleset[l]);
}
}
if(!intersect){
//extend rule
ruleset[actual.A()]=extended.clone();
//eliminar la interior
if(extended.contains(ruleset[actual.B()])){
index=actual.B();
aux= new Rule [ruleset.length];
System.arraycopy(ruleset, 0, aux, 0, ruleset.length);
ruleset= new Rule [aux.length-1];
for(int pointer=0;pointer<index;pointer++){
ruleset[pointer]=aux[pointer].clone();
}
for(int pointer=index;pointer<ruleset.length;pointer++){
ruleset[pointer]=aux[pointer+1].clone();
}
list=computeExtensibleList(k);
i=0;
}
}
}
}
}//end-for class
}//end-method
/**
* Computes the list of extensible pairs of rules.
*
* @param clas Class of the rules
*
* @return List of extensible pairs
*/
@SuppressWarnings("unchecked")
private ArrayList<Pair> computeExtensibleList(int clas){
ArrayList<Pair> list= new ArrayList<Pair>();
for(int i=0;i<ruleset.length;i++){
if(ruleset[i].getOutput()==clas){
for(int j=i+1;j<ruleset.length;j++){
if(ruleset[j].getOutput()==clas){
if(extensible(ruleset[i],ruleset[j])){
list.add(new Pair(i,j,ruleset[i].ruleDistance(ruleset[j])));
}
}
}
}//end-if
}//end-for rules (A)
Collections.sort(list);
return list;
}//end-method
/**
* Test if a given pair of rules is extensible or not
*
* @param a First rule
* @param b Second rule
*
* @return True if the pair is extensible. False, if not.
*/
private boolean extensible(Rule a, Rule b){
int fails=0;
for(int i=0;i<inputAtt&&fails<2;i++){
if(a.inclusionDegree(b, i)>0.0){
fails++;
}
}
if(fails<2){
return true;
}
else{
return false;
}
}//end-method
/**
* Removes form the current rule set those rules with an higher impurity level, if
* the classification rates do not became lower.
*/
@SuppressWarnings("unchecked")
private void selection(){
int hitsWith;
int hitsWithout;
Rule aux[];
Arrays.sort(ruleset,ruleset[0]);
hitsWith=hitsTraining();
for(int i=0;i<ruleset.length;i++){
if(ruleset[i].getImpurityLevel()>selectThreshold){
hitsWithout=hitsTrainingWithout(i);
if(hitsWith<=hitsWithout){
aux= new Rule [ruleset.length];
System.arraycopy(ruleset, 0, aux, 0, ruleset.length);
ruleset= new Rule [aux.length-1];
for(int pointer=0;pointer<i;pointer++){
ruleset[pointer]=aux[pointer].clone();
}
for(int pointer=i;pointer<ruleset.length;pointer++){
ruleset[pointer]=aux[pointer+1].clone();
}
hitsWith=hitsWithout;
i--;
}
}
}
}//end-method
/**
* Computes the number of hits with the current rule set.
*
* @return Number of hits.
*/
private int hitsTraining(){
int hits=0;
for(int i=0;i<trainData.length;i++){
if(ruleset[(findNearestRule(trainData[i], ruleset))].getOutput()==trainOutput[i]){
hits++;
}
}
return hits;
}//end-method
/**
* Computes the number of hits with the current rule set, excluding a given rule.
*
* @param Rule to be avoided
* @return Number of hits.
*/
private int hitsTrainingWithout(int rule){
int hits=0;
for(int i=0;i<trainData.length;i++){
if(ruleset[(findNearestRuleWithout(trainData[i], ruleset,rule))].getOutput()==trainOutput[i]){
hits++;
}
}
return hits;
}//end-method
/**
* Second generalization process. It tryes to extend the rules, allowing intersections
*
*/
private void secondGeneralize(){
ArrayList<Pair> list;
Pair actual;
Rule aux[];
Rule extended;
int index;
for(int k=0;k<nClasses;k++){
list=computeExtensibleList(k);
for(int i=0;i<list.size();i++){
actual=list.get(i);
extended=ruleset[actual.A()].testExtension(ruleset[actual.B()]);
if(extended!=null){
//extend rule
ruleset[actual.A()]=extended.clone();
//eliminar la interior
if(extended.contains(ruleset[actual.B()])){
index=actual.B();
aux= new Rule [ruleset.length];
System.arraycopy(ruleset, 0, aux, 0, ruleset.length);
ruleset= new Rule [aux.length-1];
for(int pointer=0;pointer<index;pointer++){
ruleset[pointer]=aux[pointer].clone();
}
for(int pointer=index;pointer<ruleset.length;pointer++){
ruleset[pointer]=aux[pointer+1].clone();
}
list=computeExtensibleList(k);
i=0;
}
}
}
}//end-for class
}//end-method
/**
* Performs a final inflating process. This time, the impurity level of the rules
* is computed employing only the instances already covered by each rule.
*
*/
private void finalCoverage(){
int indexes [];
int instance;
int nearestRule;
double learningRate, sigmoid, percentage;
double distance;
Rule otherRules [];
Rule bestRules [];
bestRules=new Rule [ruleset.length];
for(int i=0;i<ruleset.length;i++){
bestRules[i]=ruleset[i].clone();
}
presentations=0;
indexes=new int [trainData.length];
otherRules=new Rule [bestRules.length];
for(int i=0;i<bestRules.length;i++){
otherRules[i]=bestRules[i].clone();
}
while(presentations<minPresentations){
indexes=generateIndex(indexes.length);
for(int i=0;i< indexes.length;i++){
instance=indexes[i];
nearestRule=findNearestRule(trainData[instance],otherRules);
//generalize rule
distance=otherRules[nearestRule].distance(trainData[instance]);
//compute general terms (numerical)
learningRate=0.75*(1.0+((double)presentations/(double)minPresentations));
sigmoid=1.0/(1.0+(Math.pow(Math.E, 20.0*distance-5.0)));
percentage=learningRate*sigmoid;
//compute general terms (nominal)
learningRate=0.675*(1.0-(presentations/minPresentations));
sigmoid=1.0/(1.0+(Math.pow(Math.E, 10.0*distance-5.0)));
percentage=learningRate*sigmoid;
for(int j=0;j<trainData[instance].length;j++){
//generalize nominal attribute
if(inputs[j].getType()==Attribute.NOMINAL){
otherRules[nearestRule].generalizeNominal(j,trainData[instance][j],percentage,trainOutput[instance]);
}
//generalize continuous attribute
else{
otherRules[nearestRule].generalizeContinuous(j,trainData[instance][j],percentage,trainOutput[instance]);
}
}
if(presentations<minPresentations){
presentations++;
}
//regularize (special)
if(presentations%regularize==0){
for(int k=0;k<otherRules.length;k++){
if(bestRules[k].getSpecialImpurityLevel(bestRules)>=otherRules[k].getSpecialImpurityLevel(bestRules)){
bestRules[k]=otherRules[k].clone();
}
}
//get the new set of rules
for(int k=0;k<bestRules.length;k++){
otherRules[k]=bestRules[k].clone();
}
}
}//end-for
}//end-while
//regularize
for(int k=0;k<otherRules.length;k++){
otherRules[k].computeImpurityLevel();
if(bestRules[k].getSpecialImpurityLevel(bestRules)>=otherRules[k].getSpecialImpurityLevel(bestRules)){
bestRules[k]=otherRules[k].clone();
}
}
ruleset=new Rule [bestRules.length];
for(int i=0;i<bestRules.length;i++){
ruleset[i]=bestRules[i].clone();
}
}//end-method
/**
* Classifies an instance using the ruleset
*
* @param instance Instance to classify
* @return Class assigned to the instance
*/
protected int evaluate(double instance[]){
int selected=-1;
int index;
index=findNearestRule(instance,ruleset);
selected=ruleset[index].getOutput();
return selected;
}//end-method
/**
* Denormalizes nominal data
*
*/
private void denormalizeData(){
for (int i=0; i<train.getNumInstances(); i++) {
for (int j = 0; j < inputAtt; j++) {
if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) {
if(Attributes.getInputAttribute(j).getNominalValuesList().size()>1){
trainData[i][j] *= Attributes.getInputAttribute(j).getNominalValuesList().size()-1;
}
}
}
}
for (int i=0; i<test.getNumInstances(); i++) {
for (int j = 0; j < inputAtt; j++) {
if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) {
if(Attributes.getInputAttribute(j).getNominalValuesList().size()>1){
testData[i][j] *= Attributes.getInputAttribute(j).getNominalValuesList().size()-1;
}
}
}
}
}//end-method
/**
* Writes the final ruleset obtained, in the ruleSetText variable.
*
* @return The number of rules of the final rule set
*/
protected int writeRules(){
String text="";
text+="\n";
for(int i=0;i<ruleset.length;i++){
text+="\n";
text+=ruleset[i];
}
ruleSetText=text;
return ruleset.length;
}//end-method
} //end-class