/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Discretizers.HDD;
import java.util.*;
import keel.Algorithms.Discretizers.Basic.*;
import keel.Algorithms.Genetic_Rule_Learning.Globals.*;
/**
* <p>
* This class implements the HDD discretizer.
* </p>
*
* @author Written by Salvador Garcia (University of Ja�n - Ja�n) 20/03/2011
* @version 1.0
* @since JDK1.5
*/
public class HDD extends Discretizer {
double R;
/**
* Builder
*/
public HDD (double _coefficient) {
R = _coefficient;
}
protected Vector discretizeAttribute(int attribute,int []values,int begin,int end) {
int sumaAbajo[], sumaDerecha[], total[], quanta[][];
int ordenados[];
double fitness;
boolean parar = false;
Vector <Double> cp = new Vector <Double>();
Vector <Double> cpTmp = new Vector <Double>();
Vector <Double> mejorCP = new Vector <Double>();
int i, j, k;
int mejorPos;
double globalCAIM, CAIMexp, CAIMl, CAIMh;
/*First step of HDD algorithm*/
for (i=0; i < realValues[attribute].length - 1; i++) {
double cutPoint = (realValues[attribute][values[i]] + realValues[attribute][values[i+1]]) / 2.0;
if (cutPoint != realValues[attribute][values[i]])
cp.addElement(new Double(cutPoint));
}
/*Second step of HDD algorithm*/
CAIMexp = (end+1)*R/Parameters.numClasses;
CAIMl = (end+1)/Parameters.numClasses;
ordenados = new int[end - begin + 1];
for (i=begin, j=0; i<=end; i++, j++) {
ordenados[j] = values[i];
}
quanta = new int[Parameters.numClasses][mejorCP.size()+1];
sumaAbajo = new int[mejorCP.size()+1];
sumaDerecha = new int[Parameters.numClasses];
total = new int[1];
construyeQuanta(quanta, sumaAbajo, sumaDerecha, total, cpTmp, ordenados, attribute);
globalCAIM = computeFitness(quanta, sumaAbajo, sumaDerecha, total[0]);
k = 1;
while (!parar && cp.size()>0) {
quanta = new int[Parameters.numClasses][mejorCP.size()+2];
sumaAbajo = new int[mejorCP.size()+2];
sumaDerecha = new int[Parameters.numClasses];
total = new int[1];
mejorPos = 0;
cpTmp = new Vector <Double>(mejorCP);
cpTmp.addElement(cp.elementAt(0));
Collections.sort(cpTmp);
construyeQuanta(quanta, sumaAbajo, sumaDerecha, total, cpTmp, ordenados, attribute);
CAIMh = computeFitness(quanta, sumaAbajo, sumaDerecha, total[0]);
for (i=1; i<cp.size(); i++) {
cpTmp = new Vector <Double>(mejorCP);
cpTmp.addElement(cp.elementAt(i));
Collections.sort(cpTmp);
construyeQuanta(quanta, sumaAbajo, sumaDerecha, total, cpTmp, ordenados, attribute);
fitness = computeFitness(quanta, sumaAbajo, sumaDerecha, total[0]);
if (fitness > CAIMh) {
CAIMh = fitness;
mejorPos = i;
}
}
if (k >= Parameters.numClasses) {
if (CAIMh > globalCAIM || ((globalCAIM - CAIMh) < (CAIMl - CAIMexp)/(Parameters.numClasses-1))) {
globalCAIM = CAIMh;
mejorCP.addElement(cp.elementAt(mejorPos));
Collections.sort(mejorCP);
cp.removeElementAt(mejorPos);
} else {
parar = true;
}
} else {
globalCAIM = CAIMh;
mejorCP.addElement(cp.elementAt(mejorPos));
Collections.sort(mejorCP);
cp.removeElementAt(mejorPos);
}
k++;
}
return mejorCP;
}
private void construyeQuanta (int quanta[][], int sumaAbajo[], int sumaDerecha[], int total[], Vector <Double> cutPoints, int ordenados[], int attribute) {
int i, j;
int intervalo = 0;
for (i=0; i<quanta.length; i++) {
for (j=0; j<quanta[i].length; j++) {
quanta[i][j] = 0;
sumaAbajo[j] = 0;
}
sumaDerecha[i] = 0;
}
total[0] = 0;
for (i=0; i<ordenados.length; i++) {
if (intervalo < cutPoints.size()) {
if (realValues[attribute][ordenados[i]] >= cutPoints.elementAt(intervalo)) {
intervalo++;
}
} else {
intervalo = cutPoints.size();
}
quanta[classOfInstances[ordenados[i]]][intervalo]++;
}
for (i=0; i<quanta.length; i++) {
for (j=0; j<quanta[i].length; j++) {
sumaAbajo[j] += quanta[i][j];
sumaDerecha[i] += quanta[i][j];
total[0] += quanta[i][j];
}
}
}
private double computeFitness (int quanta[][], int sumaAbajo[], int sumaDerecha[], int total) {
int i, j;
int mejorCount;
double suma = 0.0;
double temp;
for (i=0; i<quanta[0].length; i++) {
mejorCount = quanta[0][i];
for (j=1; j<quanta.length; j++) {
if (quanta[j][i] > mejorCount) {
mejorCount = quanta[j][i];
}
}
temp = (double)mejorCount / (double)sumaAbajo[i];
temp *= (double)mejorCount;
suma += temp;
}
return suma / (double)quanta[0].length;
}
}