/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Genetic_Rule_Learning.DMEL;
/**
* <p>Title: Algorithm</p>
*
* <p>Description: It contains the implementation of the algorithm</p>
*
*
* <p>Company: KEEL </p>
*
* @author Alberto Fern�ndez
* @version 1.0
*/
import java.io.IOException;
import java.util.*;
import org.core.*;
public class DMEL {
myDataset train, val, test;
String outputTr, outputTst, outputRule;
int nClasses;
//Parameters
long seed;
//We may declare here the algorithm's parameters
int popSize;
double pCross;
double pMut;
int numGenerations;
private boolean somethingWrong = false; //to check if everything is correct.
/**
* Default constructor
*/
public DMEL () {
}
/**
* It reads the data from the input files (training, validation and test) and parse all the parameters
* from the parameters array.
* @param parameters parseParameters It contains the input files, output files and parameters
*/
public DMEL(parseParameters parameters) {
train = new myDataset();
val = new myDataset();
test = new myDataset();
try {
System.out.println("\nReading the training set: " +
parameters.getTrainingInputFile());
train.readClassificationSet(parameters.getTrainingInputFile(), true);
System.out.println("\nReading the validation set: " +
parameters.getValidationInputFile());
val.readClassificationSet(parameters.getValidationInputFile(), false);
System.out.println("\nReading the test set: " +
parameters.getTestInputFile());
test.readClassificationSet(parameters.getTestInputFile(), false);
} catch (IOException e) {
System.err.println(
"There was a problem while reading the input data-sets: " +
e);
somethingWrong = true;
}
//We may check if there are some numerical attributes, because our algorithm may not handle them:
somethingWrong = somethingWrong || train.hasRealAttributes();
outputTr = parameters.getTrainingOutputFile();
outputTst = parameters.getTestOutputFile();
outputRule = parameters.getOutputFile(0);
//Now we parse the parameters, for example:
seed = Long.parseLong(parameters.getParameter(0));
popSize = Integer.parseInt(parameters.getParameter(1));
pCross = Double.parseDouble(parameters.getParameter(2));
pMut = Double.parseDouble(parameters.getParameter(3));
numGenerations = Integer.parseInt(parameters.getParameter(4));
//...
}
/**
* It launches the algorithm
*/
public void execute() {
int i, j, k, l;
int t;
int ele;
double prob[];
double aux;
double NUmax = 1.5; //used for lineal ranking
double NUmin = 0.5; //used for lineal ranking
double pos1, pos2;
int sel1, sel2;
int data[][];
int infoAttr[];
int classData[];
Vector <Rule> contenedor = new Vector <Rule> ();
Vector <Rule> conjR = new Vector <Rule> ();
Rule tmpRule;
Condition tmpCondition[] = new Condition[1];
RuleSet population[];
RuleSet hijo1, hijo2;
if (somethingWrong) { //We do not execute the program
System.err.println("An error was found, the data-set has numerical values.");
System.err.println("Aborting the program");
//We should not use the statement: System.exit(-1);
} else {
Randomize.setSeed (seed);
nClasses = train.getnClasses();
/*Build the nominal data information*/
infoAttr = new int[train.getnInputs()];
for (i=0; i<infoAttr.length; i++) {
infoAttr[i] = train.numberValues(i);
}
data = new int[train.getnData()][train.getnInputs()];
for (i=0; i<data.length; i++) {
for (j=0; j<data[i].length; j++) {
if (train.isMissing(i, j))
data[i][j] = -1;
else
data[i][j] = train.valueExample(i, j);
}
}
classData = new int[train.getnData()];
for (i=0; i<classData.length; i++) {
classData[i] = train.getOutputAsInteger(i);
}
/*Find first-order rules which result interesting*/
for (i=0; i<nClasses; i++) {
for (j=0; j<infoAttr.length; j++) {
for (k=0; k<infoAttr[j]; k++) {
tmpCondition[0] = new Condition(j,k);
tmpRule = new Rule(tmpCondition);
if (Math.abs(computeAdjustedResidual(data, classData, tmpRule, i)) > 1.96) {
if (!contenedor.contains(tmpRule)) {
contenedor.add(tmpRule);
conjR.add(tmpRule);
}
}
}
}
}
//Construct the Baker selection roulette
prob = new double[popSize];
for (j=0; j<popSize; j++) {
aux = (double)( NUmax-NUmin)*((double)j/(popSize-1));
prob[j]=(double)(1.0/(popSize)) * (NUmax-aux);
}
for (j=1; j<popSize; j++)
prob[j] = prob[j] + prob[j-1];
/*Steady-State Genetic Algorithm*/
ele = 2;
population = new RuleSet[popSize];
while (conjR.size() >= 2) {
t = 0;
System.out.println ("Producing rules of level " + ele);
for (i=0; i<population.length; i++) {
population[i] = new RuleSet(conjR);
population[i].computeFitness(data, classData, infoAttr, contenedor, nClasses);
}
Arrays.sort(population);
while (t < numGenerations && !population[0].equals(population[popSize-1])) {
System.out.println ("Generation " + t);
t++;
/*Baker's selection*/
pos1 = Randomize.Rand();
pos2 = Randomize.Rand();
for (l=0; l<popSize && prob[l]<pos1; l++);
sel1 = l;
for (l=0; l<popSize && prob[l]<pos2; l++);
sel2 = l;
hijo1 = new RuleSet(population[sel1]);
hijo2 = new RuleSet(population[sel2]);
if (Randomize.Rand() < pCross) {
RuleSet.crossover1(hijo1, hijo2);
} else {
RuleSet.crossover2(hijo1, hijo2);
}
RuleSet.mutation(hijo1, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);
RuleSet.mutation(hijo2, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);
hijo1.computeFitness(data, classData, infoAttr, contenedor, nClasses);
hijo2.computeFitness(data, classData, infoAttr, contenedor, nClasses);
population[popSize-2] = new RuleSet(hijo1);
population[popSize-1] = new RuleSet(hijo2);
Arrays.sort(population);
}
/*Decode function*/
ele++;
conjR.removeAllElements();
System.out.println ("Fitness of the best chromosome in rule level " + ele + ": " + population[0].fitness);
for (i=0; i<population[0].getRuleSet().length; i++) {
if (Math.abs(computeAdjustedResidual(data, classData, population[0].getRule(i), i)) > 1.96 ) {
if (validarRegla(population[0].getRule(i)) && !contenedor.contains(population[0].getRule(i))) {
contenedor.add(population[0].getRule(i));
conjR.add(population[0].getRule(i));
}
}
}
}
//Finally we should fill the training and test output files
doOutput(this.val, this.outputTr, data, classData, infoAttr, contenedor, nClasses);
doOutput(this.test, this.outputTst, data, classData, infoAttr, contenedor, nClasses);
/*Print the rule obtained*/
for (i=contenedor.size()-1; i>=0; i--) {
if (reglaPositiva(this.train, data,classData,infoAttr,nClasses,contenedor.elementAt(i))) {
Fichero.AnadirtoFichero(outputRule, contenedor.elementAt(i).toString(train));
Fichero.AnadirtoFichero(outputRule, " -> " + consecuente(this.train, data,classData,infoAttr,nClasses,contenedor.elementAt(i)) + "\n");
}
}
System.out.println("Algorithm Finished");
}
}
private boolean validarRegla (Rule regla) {
int i, j;
for (i=0; i<regla.getRule().length; i++) {
for (j=i+1; j<regla.getRule().length; j++) {
if (regla.getiCondition(i).getAttribute() == regla.getiCondition(j).getAttribute())
return false;
}
}
return true;
}
private boolean reglaPositiva (myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) {
int k, l;
double tmp1, tmp2;
double Waip;
Waip = 0;
tmp1 = Double.NEGATIVE_INFINITY;
for (l=0; l<nClases; l++) {
tmp2 = 0;
for (k=0; k<rule.getRule().length; k++) {
tmp2 += RuleSet.computeWeightEvidence (data, classData, rule.getiCondition(k), l, infoAttr);
}
if (tmp2 > tmp1) {
tmp1 = tmp2;
}
}
Waip = tmp1;
return Waip >0?true:false;
}
private String consecuente (myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) {
int k, l;
double tmp1, tmp2;
int pos = 0, classPredicted;
double Waip;
classPredicted = -1;
Waip = 0;
tmp1 = Double.NEGATIVE_INFINITY;
for (l=0; l<nClases; l++) {
tmp2 = 0;
for (k=0; k<rule.getRule().length; k++) {
tmp2 += RuleSet.computeWeightEvidence (data, classData, rule.getiCondition(k), l, infoAttr);
}
if (tmp2 > tmp1) {
tmp1 = tmp2;
pos = l;
}
}
classPredicted = pos;
Waip = tmp1;
return dataset.getOutputValue(classPredicted) + " [" + Double.toString(Waip) + "]";
}
private double computeAdjustedResidual (int data[][], int classData[], Rule regla, int clase) {
double suma = 0;
int i;
for (i=0; i<regla.getRule().length; i++) {
suma += computeStandarizedResidual(data, classData, regla.getiCondition(i), clase) / Math.sqrt(computeMaximumLikelohoodEstimate(data, classData, regla.getiCondition(i), clase));
}
return suma;
}
private double computeStandarizedResidual (int data[][], int classData[], Condition cond, int clase) {
double tmp = computeEAipAjq(data, classData, cond, clase);
return (computeCountAipAjq (data, classData, cond, clase) - tmp) / Math.sqrt(tmp);
}
private double computeMaximumLikelohoodEstimate (int data[][], int classData[], Condition cond, int clase) {
int i;
double tmp1, tmp2, tmp3;
boolean hecho;
tmp1 = 0;
for (i=0; i<classData.length; i++) {
if (classData[i] == clase) {
hecho = true;
if (data[i][cond.getAttribute()] == -1) {
hecho = false;
}
if (hecho) {
tmp1++;
}
}
}
tmp2 = 0;
for (i=0; i<data.length; i++) {
hecho = true;
if (data[i][cond.getAttribute()] != cond.getValue()) {
hecho = false;
}
if (hecho) {
tmp2++;
}
}
tmp3 = 0;
for (i=0; i<data.length; i++) {
hecho = true;
if (data[i][cond.getAttribute()] == -1) {
hecho = false;
}
if (hecho) {
tmp3++;
}
}
return (1 - tmp1/tmp3) * (1 - tmp2/tmp3);
}
private double computeEAipAjq (int data[][], int classData[], Condition cond, int clase) {
int i;
double tmp;
double EAipAjq;
boolean hecho;
tmp = 0;
for (i=0; i<classData.length; i++) {
if (classData[i] == clase) {
hecho = true;
if (data[i][cond.getAttribute()] == -1) {
hecho = false;
}
if (hecho) {
tmp++;
}
}
}
EAipAjq = tmp;
tmp = 0;
for (i=0; i<data.length; i++) {
hecho = true;
if (data[i][cond.getAttribute()] != cond.getValue()) {
hecho = false;
}
if (hecho) {
tmp++;
}
}
EAipAjq += tmp;
tmp = 0;
for (i=0; i<data.length; i++) {
hecho = true;
if (data[i][cond.getAttribute()] == -1) {
hecho = false;
}
if (hecho) {
tmp++;
}
}
return EAipAjq / tmp;
}
private int computeCountAipAjq (int data[][], int classData[], Condition cond, int clase) {
int i;
boolean entra;
int cont = 0;
for (i=0; i<data.length; i++) {
if (classData[i] == clase) {
entra = true;
if (data[i][cond.getAttribute()] != cond.getValue()) {
entra = false;
}
if (entra) {
cont++;
}
}
}
return cont;
}
/**
* It generates the output file from a given dataset and stores it in a file
* @param dataset myDataset input dataset
* @param filename String the name of the file
* @param data containing integer identifiers of nominal values
* @param classData containing integer identifiers of classes
* @param infoAttr containing number of values for each attribute
* @param contenedor containing all the interesting rules
* @param nClasses indicates number of classes
*/
private void doOutput(myDataset dataset, String filename, int data[][], int classData[], int infoAttr[], Vector <Rule> contenedor, int nClases) {
String output = new String("");
output = dataset.copyHeader(); //we insert the header in the output file
//We write the output for each example
for (int i = 0; i < dataset.getnData(); i++) {
//for classification:
output += dataset.getOutputAsString(i) + " " +
this.classificationOutput(dataset, i, data, classData, infoAttr, contenedor, nClasses) + "\n";
}
Fichero.escribeFichero(filename, output);
}
private String classificationOutput(myDataset dataset, int ex, int data[][], int classData[], int infoAttr[], Vector <Rule> contenedor, int nClases) {
int j, k, l;
boolean match;
double tmp1, tmp2;
int pos = 0, classPredicted;
double Waip;
int ejemplo[] = new int[data[0].length];
for (j=0; j<ejemplo.length; j++) {
if (dataset.isMissing(ex, j))
ejemplo[j] = -1;
else
ejemplo[j] = dataset.valueExample(ex, j);
}
classPredicted = -1;
Waip = 0;
/*Search a match of the example (following by the container)*/
for (j=contenedor.size()-1; j>=0; j--) {
match = true;
for (k=0; k<contenedor.elementAt(j).getRule().length && match; k++) {
if (ejemplo[contenedor.elementAt(j).getiCondition(k).getAttribute()] != contenedor.elementAt(j).getiCondition(k).getValue()) {
match = false;
}
}
if (match) {
tmp1 = Double.NEGATIVE_INFINITY;
for (l=0; l<nClases; l++) {
tmp2 = 0;
for (k=0; k<contenedor.elementAt(j).getRule().length; k++) {
tmp2 += RuleSet.computeWeightEvidence (data, classData, contenedor.elementAt(j).getiCondition(k), l, infoAttr);
}
if (tmp2 > tmp1) {
tmp1 = tmp2;
pos = l;
}
}
if (tmp1 > Waip) {
classPredicted = pos;
Waip = tmp1;
}
}
}
if (classPredicted == -1)
return "Unclassified";
return dataset.getOutputValue(classPredicted);
}
}