/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Juli�n Luengo Mart�n 08/02/2007
* @version 0.2
* @since JDK 1.5
* </p>
*/
package keel.Algorithms.Genetic_Rule_Learning.ILGA;
import org.core.Randomize;
import keel.Dataset.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* <p>
* This class implements the SEM algorithm of the OIGA method, which evolves
* mono-attribute rules.
* </p>
*/
public class SEM {
int long_poblacion = 100;
int n_genes;
int nAtt;
double prob_mutacion = 0.01;
double crossoverRate = 1.0;
int numberRules;
int Mu_next;
int stagnationLimit = 15;
int generationLimit = 30;
double survivorsPercent = 0.5;
RuleSet poblacion[];
RuleSet previousPob[];
RuleSet intermediatePob[];
int attributeSelected = 0;
InstanceSet IS;
InstanceSet IStest;
double bestCR = -1;
static int attributeOrder[] = new int[Attributes.getInputNumAttributes()];
/**
* <p>
* Default constructor. No memory allocated
* </p>
*/
public SEM(){
poblacion = null;
}
/**
* Parametrized constructor
* @param pobSize number of chromosomes (rule sets)
* @param numberRules number of rules of each chromosome
* @param attSel attribute used to evolve in this SEM
* @param IS the data set used for train the SEM
*/
public SEM(int pobSize,int numberRules,int attSel,InstanceSet IS){
poblacion = new RuleSet[pobSize];
long_poblacion = pobSize;
this.numberRules = numberRules;
nAtt = 1;
n_genes = numberRules*(3*nAtt+1);
attributeSelected = attSel;
this.IS = IS;
for(int i=0;i<pobSize;i++){
poblacion[i] = new RuleSet(numberRules,nAtt);
poblacion[i].createRules(numberRules, 1);
poblacion[i].randomizeRules(IS);
}
}
/**
* Sets the reference data set for this SEM
* @param dataset new data set for training
*/
public void setIS(InstanceSet dataset){
IS = dataset;
}
/**
* Set the generations limit
* @param iters maximum number of iterations of the SEM
*/
public void setGenerationLimit(int iters){
generationLimit = iters;
}
/**
* Set the parameters for this SEM
* @param mutationProb the mutation probability
* @param crossoverRate the crossoverProbability between 2 parents
* @param survivorsPercent the percent of parents that will be maintained from a generation to next one
*/
public void setGAparams(double mutationProb, double crossoverRate, double survivorsPercent){
this.prob_mutacion = mutationProb;
this.crossoverRate = crossoverRate;
this.survivorsPercent = survivorsPercent;
}
/**
* Gets the Classification Rate of this SEM
* @return retuns the Classification Rate of this SEM
*/
public double getCR(){
return bestCR;
}
/**
* Gets the i-th chromosome
* @param i the rule set we want to retrieve
* @return the selected rule set
*/
public RuleSet getChromosome(int i){
return poblacion[i];
}
/**
* One-point crossover
* @param cr1 index of parent 1 in poblation
* @param cr2 index of parent 2 in poblation
*/
public void onePointCrossover(int cr1,int cr2){
RuleSet rule1 = poblacion[cr1];
RuleSet rule2 = poblacion[cr2];
//there are 3*number of attribute elements, plus class value in each cromosome
int cutpoint = Randomize.Randint(0, n_genes);
int cutpoint_rule = cutpoint/(3*nAtt+1);
int cutpoint_variable = cutpoint%(3*nAtt+1);
//rule1 is replaced from cutpoint (inclusive) to the end of his rule set
rule1.copyFromPointtoEnd(rule2, cutpoint_rule, cutpoint_variable);
//rule2 is replaced from the begining of his rule set to cutpoint (not inclusive)
rule2.copyFromBegintoPoint(rule1, cutpoint_rule, cutpoint_variable);
//childs must be evaluated
rule1.setEvaluated(false);
rule2.setEvaluated(false);
}
/**
* It performs a one point crossover in the new poblation, using adjacent chromosomes as parents
*/
public void crossOver(){
int parentspreserved = (int)(long_poblacion*survivorsPercent);
for(int i=0;i<long_poblacion;i=i+2){
if(Randomize.Rand() < this.crossoverRate && i+1 < long_poblacion)
onePointCrossover(i,i+1);
}
}
/**
* Copy the survivorsPercent proportion of the old poblation into the bottom half of
* the new one
*/
public void elitism(){
int parentspreserved = (int)(long_poblacion*survivorsPercent);
// we keep the best parents and sons
// Arrays.sort(poblacion,Collections.reverseOrder());
for(int i=parentspreserved,j=0;i<long_poblacion;i++,j++){
poblacion[i] = previousPob[j];
}
}
/**
* Applies mutation in the new poblation
*/
public void mutate(){
int posiciones, i, j;
double m;
posiciones=n_genes*long_poblacion;
if (prob_mutacion>0)
while (Mu_next<posiciones){
/* Se determina el cromosoma y el gen que corresponden a la posicion que
se va a mutar */
i=Mu_next/n_genes;
j=Mu_next%n_genes;
/* Se efectua la mutacion sobre ese gen */
poblacion[i].mutate(j);
/* Se marca el cromosoma mutado para su posterior evaluacion */
poblacion[i].setEvaluated(false);
/* Se calcula la siguiente posicion a mutar */
if (prob_mutacion<1)
{
m = Randomize.Rand();
Mu_next += Math.ceil (Math.log(m) / Math.log(1.0 - prob_mutacion));
}
else
Mu_next += 1;
}
Mu_next -= posiciones;
}
/**
* Applies a roulette wheel selection
*/
public void selection(){
RuleSet temp[];
double probability[] = new double [long_poblacion];
double total;
double prob;
int sel;
temp = new RuleSet[long_poblacion];
//sort the poblation in order of fitness
Arrays.sort(poblacion, Collections.reverseOrder());
probability[0] = poblacion[0].getFitness();
for(int i=1;i<long_poblacion;i++){
probability[i] = probability[i-1]+poblacion[i].getFitness();
}
total = probability[long_poblacion-1];
for(int i=0;i<long_poblacion;i++){
probability[i] /= total;
}
for(int i=0;i<long_poblacion;i++){
prob = Randomize.Rand();
sel = -1;
for(int j=0;j<long_poblacion && sel==-1;j++){
if(probability[j]>prob)
sel = j;
}
temp[i] = new RuleSet(poblacion[sel]);
}
previousPob = poblacion;
poblacion = temp;
}
/**
* Applies a tournament selection, with tournament size of 2
*/
public void tournament_selection(){
int i, j, k, mejor_torneo;
int tam_torneo = 2;
int Torneo[] = new int[tam_torneo];
boolean repetido;
RuleSet sample[] = new RuleSet[long_poblacion];
for (i=0;i<long_poblacion;i++){
Torneo[0] = Randomize.Randint(0,long_poblacion);
mejor_torneo=Torneo[0];
for (j=1;j<tam_torneo;j++)
{
do
{
Torneo[j] = Randomize.Randint(0,long_poblacion);
repetido=false; k=0;
while ((k<j) && (!repetido)){
if (Torneo[j]==Torneo[k])
repetido=true;
else
k++;
}
}
while (repetido);
if (poblacion[Torneo[j]].fitness > poblacion[mejor_torneo].fitness)
mejor_torneo=Torneo[j];
}
sample[i] = new RuleSet(poblacion[mejor_torneo]);
}
previousPob = poblacion;
poblacion = sample;
}
/**
* Its evaluate the NEW poblation, using a metric which summarizes the train CR and
* test CR
*/
public void evaluate(){
double fitness_train,fitness_test;
for(int j=0;j<long_poblacion;j++){
fitness_train = poblacion[j].classify(IS);
poblacion[j].setEvaluated(false);
fitness_test = poblacion[j].classify(IStest);
poblacion[j].fitness = (fitness_train+fitness_test)/2.0;
}
}
/**
* It runs the Single-attribute Evolution Module (SEM) algorithm to obtain a rule set of ONE attribute
*/
public void run(){
boolean endCondition = false;
int gen = 0;
int stagnation = 0;
evaluate();
while(!endCondition){
tournament_selection();
crossOver();
mutate();
elitism();
evaluate();
Arrays.sort(poblacion,Collections.reverseOrder());
gen++;
if(bestCR!=poblacion[0].getFitness())
stagnation = 0;
else
stagnation++;
if(gen>generationLimit || stagnation > stagnationLimit || poblacion[0].getFitness()==1.0)
endCondition = true;
bestCR = poblacion[0].getFitness();
}
}
}