/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Genetic_Rule_Learning.LogenPro;
/**
* <p>
* @author Written by Alberto Fern�ndez (University of Granada) 01/01/2007
* @author Modified by Francisco Jos� Berlanga (University of Ja�n) 09/12/2008
* @version 1.0
* @since JDK 1.6
* </p>
*/
import java.io.IOException;
import java.util.*;
import org.core.*;
public class Algorithm {
/**
* <p>
* It contains the implementation of the algorithm
* </p>
*/
myDataset train, val, test;
String outputTr, outputTst, output;
double classProb[];
double attrProb[][][]; //atribute value, atribute position, class
int nClasses;
ArrayList<Individual> poblacion;
long semilla;
int tamPoblacion, numGeneraciones;
double min_support;
double w1, w2;
double probCross, probMut, probDrop;
boolean[] tokensGlobal;
//We may declare here the algorithm's parameters
private boolean somethingWrong = false; //to check if everything is correct.
/**
* <p>
* Default constructor
* </p>
*/
public Algorithm() {
}
/**
* <p>
* It reads the data from the input files (training, validation and test) and parse all the parameters
* from the parameters array.
* </p>
* @param parameters parseParameters It contains the input files, output files and parameters
*/
public Algorithm(parseParameters parameters) {
train = new myDataset();
val = new myDataset();
test = new myDataset();
try {
System.out.println("\nReading the training set: " +
parameters.getTrainingInputFile());
train.readClassificationSet(parameters.getTrainingInputFile(), true);
System.out.println("\nReading the validation set: " +
parameters.getValidationInputFile());
val.readClassificationSet(parameters.getValidationInputFile(), false);
System.out.println("\nReading the test set: " +
parameters.getTestInputFile());
test.readClassificationSet(parameters.getTestInputFile(), false);
} catch (IOException e) {
System.err.println(
"There was a problem while reading the input data-sets: " +
e);
somethingWrong = true;
}
//We may check if there are some numerical attributes, because our algorithm may not handle them:
//somethingWrong = somethingWrong || train.hasNumericalAttributes();
//somethingWrong = somethingWrong || train.hasMissingAttributes();
outputTr = parameters.getTrainingOutputFile();
outputTst = parameters.getTestOutputFile();
output = parameters.getOutputFile(0);
//Now we parse the parameters, for example:
semilla = Long.parseLong(parameters.getParameter(0));
//...
tamPoblacion = Integer.parseInt(parameters.getParameter(1));
numGeneraciones = Integer.parseInt(parameters.getParameter(2));
probCross = Double.parseDouble(parameters.getParameter(3));
probMut = Double.parseDouble(parameters.getParameter(4));
probDrop = Double.parseDouble(parameters.getParameter(5));
min_support = Double.parseDouble(parameters.getParameter(6));
w1 = Double.parseDouble(parameters.getParameter(7));
w2 = Double.parseDouble(parameters.getParameter(8));
tokensGlobal = new boolean[train.getnData()];
}
/**
* <p>
* It launches the algorithm
* </p>
*/
public void execute() {
if (somethingWrong) { //We do not execute the program
System.err.println("An error was found, either the data-set have numerical values or missing values.");
System.err.println("Aborting the program");
//We should not use the statement: System.exit(-1);
} else {
//We do here the algorithm's operations
Randomize.setSeed(semilla);
System.out.println("Initializing population");
initializePopulation();
System.out.println("Evaluating population");
evaluatePopulation();
Collections.sort(poblacion);
/*for (int j = 0; j < poblacion.size(); j++) {
System.out.println("Rule[" + (j + 1) + "]: " +
poblacion.get(j).printIndividual());
}
System.out.println("");*/
for (int i = 0; i < numGeneraciones; i++) {
System.out.println("Generation " + (i + 1));
while (poblacion.size() < 2 * tamPoblacion) {
int uno = selection( -1);
double aleatorio = Randomize.Rand();
if (aleatorio <= probCross) {
int dos = selection(uno);
//System.out.println("Applying Crossover");
crossover(uno, dos);
} else if (aleatorio <= (probCross + probMut)) {
//System.out.println("Applying Mutation");
mutation(uno);
} else {
//System.out.println("Applying Dropping");
dropping(uno);
}
}
//System.out.println("Evaluating Population");
evaluatePopulation();
/*for (int j = 0; j < poblacion.size(); j++) {
System.out.println("Rule[" + (j + 1) + "]: " +
poblacion.get(j).printIndividual());
}
System.out.println("");
*/
//System.out.println("Token Competition");
tokenCompetition();
/* for (int j = 0; j < poblacion.size(); j++) {
System.out.println("Rule[" + (j + 1) + "]: " +
poblacion.get(j).printIndividual());
}
System.out.println("");
*/
//System.out.println("Replacing Redundant Rules");
replaceRedundantRules();
//System.out.println("Evaluating Population (again)");
evaluatePopulation(); //por si generamos alguno nuevo
/*
for (int j = 0; j < poblacion.size(); j++) {
System.out.println("Rule[" + (j + 1) + "]: " +
poblacion.get(j).printIndividual());
}
System.out.println("");
*/
Collections.sort(poblacion);
//System.out.println("Cutting Population");
cutPopulation();
/*for (int j = 0; j < poblacion.size(); j++) {
System.out.println("Rule[" + (j + 1) + "]: " +
poblacion.get(j).printIndividual());
}
System.out.println("");
*/
}
Collections.sort(poblacion);
//nClasses = train.getnOutputs();
//Finally we should fill the training and test output files
double accTr = doOutput(this.val, this.outputTr);
double accTst = doOutput(this.test, this.outputTst);
String cadena = new String("");
System.out.println("Training Accuracy: "+accTr);
System.out.println("Test Accuracy: "+accTst);
cadena += "@Training Accuracy: "+accTr+"\nTest Accuracy: "+accTst+"\nRule Base:\n";
for (int i = 0;
(i < poblacion.size()) &&
(poblacion.get(i).getFitness() > 0); i++) {
System.out.println((i + 1) + ":" +
poblacion.get(i).printIndividual());
cadena += (i + 1) + ":" +
poblacion.get(i).printIndividual()+"\n";
}
System.out.println("Algorithm Finished");
Files.writeFile(output,cadena);
}
}
/**
* <p>
* It generates the output file from a given dataset and stores it in a file
* </p>
* @param dataset myDataset input dataset
* @param filename String the name of the file
* @return The accuracy
*/
private double doOutput(myDataset dataset, String filename) {
String output = new String("");
int aciertos = 0;
output = dataset.copyHeader(); //we insert the header in the output file
//We write the output for each example
for (int i = 0; i < dataset.getnData(); i++) {
//for classification:
output += dataset.getOutputAsString(i) + " " +
this.classificationOutput(dataset.getExample(i),dataset.getMissing(i)) + "\n";
if (dataset.getOutputAsString(i).compareTo(this.classificationOutput(dataset.getExample(i),dataset.getMissing(i))) == 0){
aciertos++;
}
}
Files.writeFile(filename, output);
return 1.0*aciertos/dataset.getnData();
}
/**
* <p>
* It returns the algorithm classification output given an input example
* </p>
* @param example double[] The input example
* @param missing boolean [] A boolean array that stores the value "true" if any value of the example is missing
* @return String the output generated by the algorithm
*/
private String classificationOutput(double[] example, boolean [] missing) {
String output = new String("?");
boolean salir = false;
/**
Here we should include the algorithm directives to generate the
classification output from the input example
*/
for (int i = 0; (i < poblacion.size()) && (!salir); i++) {
if (poblacion.get(i).matching(example, missing)) {
output = poblacion.get(i).getClase();
salir = true;
}
}
return output;
}
/**
* <p>
* It initializes each individual in the population
* </p>
*/
private void initializePopulation() {
poblacion = new ArrayList<Individual>(tamPoblacion);
for (int i = 0; i < tamPoblacion; i++) {
Individual indi = new Individual(train, min_support, w1, w2);
poblacion.add(indi);
}
}
/**
* <p>
* It evaluates each individual in the population
* </p>
*/
private void evaluatePopulation() {
for (int i = 0; i < poblacion.size(); i++) {
if (poblacion.get(i).non_evaluated()) {
poblacion.get(i).evaluate(train);
}
}
}
/**
* <p>
* It selects an individual in the population (by rank based roulette wheel selection). This individual
* can not be the one in position "positionNo"
* </p>
* @param posicionNo int Invididual in position "positionNo" is not allowed to be selected
*/
private int selection(int posicionNo) {
int i = 0;
int posicion = 0;
double rank_min = 0.75;
double rank_max = 2.0 - rank_min;
double[] Ruleta = new double[tamPoblacion];
/* Ordenamos la poblacion de mayor a menor rango */
Collections.sort(poblacion);
/* Calculamos la probabilidad de selection de cada individuo mediante
el ranking lineal en funcion de su posicion en el orden y construimos
la ruleta */
for (i = 0; i < tamPoblacion; i++) {
if (i != 0) {
Ruleta[i] = Ruleta[i - 1] +
(rank_max -
(rank_max - rank_min) * i /
(double) (tamPoblacion - 1)) /
(double) tamPoblacion;
} else {
Ruleta[i] = (rank_max -
(rank_max - rank_min) * i /
(double) (tamPoblacion - 1)) /
(float) tamPoblacion;
}
}
boolean salir = false;
while (!salir) {
double u = Randomize.Rand();
posicion = 0;
for (i = 0; i < tamPoblacion; i++) {
for (; Ruleta[posicion] < u; posicion++) {
;
}
}
if (posicion != posicionNo) {
salir = true;
}
}
return posicion;
}
/**
* <p>
* It applies crossover genetic operator between individual in position "pos1" and "pos2" in the population.
* The new generated child is added at the end of the population
* </p>
* @param pos1 int Parent number 1 is in position "pos1" in the population
* @param pos2 int Parent number 2 is in position "pos2" in the population
*/
private void crossover(int pos1, int pos2) {
Individual padre = poblacion.get(pos1);
Individual madre = poblacion.get(pos2);
Individual hijo = new Individual(padre);
//System.out.println("Cruce (" + pos1 + "," + pos2 + ") Hijo (antes): " +
// hijo.printIndividual());
int atributo;
boolean salir = false;
do {
atributo = Randomize.RandintClosed(0, train.getnInputs() + 1);
if (atributo == train.getnInputs()) {
salir = true;
}
} while (!salir && madre.isAny(atributo));
if (!salir) {
hijo.setCondition(atributo, madre.getCondition(atributo));
} else {
hijo.clase = madre.clase;
}
// int atributo;
// atributo = Randomize.RandintClosed(0, train.getnInputs());
// hijo.setCondition(atributo, madre.getCondition(atributo));
//System.out.println("Cruce (" + pos1 + "," + pos2 + ") Hijo (despu�s): " +
// hijo.printIndividual());
poblacion.add(hijo);
}
/**
* <p>
* It applies mutation genetic operator to the individual in position "pos" in the population.
* The new generated child is added at the end of the population
* </p>
* @param pos int Parent is in position "pos" in the population
*/
private void mutation(int pos) {
Individual padre = poblacion.get(pos);
Individual hijo = new Individual(padre);
//System.out.println("Mutaci�n (" + pos + ") Hijo (antes): " +
// hijo.printIndividual());
int atributo = Randomize.RandintClosed(0, train.getnInputs() + 1);
//System.out.println("Tiene "+train.getnInputs()+ " entradas ("+atributo+")");
if (atributo == train.getnInputs()) {
hijo.assignNewClass(train);
} else {
hijo.assignConditionNoAny(atributo, train);
}
//System.out.println("Mutaci�n (" + pos + ") Hijo (despu�s): " +
// hijo.printIndividual());
poblacion.add(hijo);
//padre.printIndividual();
//hijo.printIndividual();
}
/**
* <p>
* It applies dropping condition genetic operator to the individual in position "pos" in the population.
* The new generated child is added at the end of the population
* </p>
* @param pos int Parent is in position "pos" in the population
*/
private void dropping(int pos) {
int atributo = 0;
Individual padre = poblacion.get(pos);
if (padre.applicableDropping(train.getnInputs())) {
//if (padre.applicableDropping(0)) {
Individual hijo = new Individual(padre);
//System.out.println("Dropping (" + pos + ") Hijo (antes): " +
// hijo.printIndividual());
do {
atributo = Randomize.RandintClosed(0, train.getnInputs());
} while (hijo.isAny(atributo));
hijo.setAny(atributo, train.nameVar(atributo));
//System.out.println("Dropping (" + pos + ") Hijo (despu�s): " +
// hijo.printIndividual());
poblacion.add(hijo);
}
}
/**
* <p>
* It applies Token Competition diversity mechanism to the population
* </p>
*/
private void tokenCompetition() {
Collections.sort(poblacion);
for (int i = 0; i < train.getnData(); i++) {
tokensGlobal[i] = false;
}
for (int i = 0; i < poblacion.size(); i++) {
int count = 0;
Individual ind = poblacion.get(i);
if (ind.ideal() == 0) {
ind.setFitness(0.0);
} else {
for (int j = 0; j < train.getnData(); j++) {
if ((ind.isCovered(j)) && (!tokensGlobal[j])) {
tokensGlobal[j] = true;
count++;
}
}
ind.setFitness(ind.getFitness() * (1.0 * count / ind.ideal()));
}
}
}
/**
* <p>
* It eliminates redundant rules (rules with their fitness equal to zero after Token Competition)
* If some of the training examples remain with their tokens free, a new rule (containing of the the variables)
* is generated for cover them
* </p>
*/
private void replaceRedundantRules() {
int i;
ArrayList libres = new ArrayList();
for (int j = 0; j < tokensGlobal.length; j++) {
if (!tokensGlobal[j]) {
libres.add(new Integer(j));
}
}
for (i = 0;
(i < poblacion.size()) && (poblacion.get(i).getFitness() > 0);
i++) {
;
}
for (int j = 0; (j < libres.size()) && (i + j < tamPoblacion); j++) {
poblacion.get(i +
j).replace(train.getExample(((Integer) libres.get(
j)).
intValue()), train);
}
}
/**
* <p>
* It sets the population size to its half
* </p>
*/
private void cutPopulation() {
for (int i = (tamPoblacion * 2) - 1; i >= tamPoblacion; i--) {
poblacion.remove(i);
}
}
}