/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Pedro Gonz�lez (University of Jaen) 15/08/2004
* @version 1.0
* @since JDK1.5
* </p>
*/
package keel.Algorithms.Subgroup_Discovery.NMEEFSD.Calculate;
public class Gain {
/**
* <p>
* This class is defined to manage the information gain of each attributev of the dataset
* </p>
*/
/**
* <p>
* Computes and stores the info gain values
* </p>
**/
public static void Init () {
int i, j, h, v;
boolean encontrado;
float info_gk, suma, suma1, suma2, p_clase, logaritmo;
int num_clase[] = new int[StCalculate.n_clases];
float p[][] = new float[StCalculate.num_vars][StCalculate.MaxValores];
float p_cond[][][] = new float [StCalculate.n_clases][StCalculate.num_vars][StCalculate.MaxValores];
// Structure initialization
for (i=0; i<StCalculate.n_clases; i++)
num_clase[i] = 0;
for (i=0; i<StCalculate.num_vars; i++)
for (j=0; j<StCalculate.MaxValores; j++) {
p[i][j] = 0; // Simple probabilities matrix
for (h=0; h<StCalculate.n_clases; h++)
p_cond[h][i][j]=0; // Conditional probabilities matrix
}
for (i=0; i<StCalculate.n_eje; i++) {
num_clase[StCalculate.tabla[i].clase]++; // distribution by classes
for (j=0; j<StCalculate.num_vars; j++) { // distribution by values
if (!StCalculate.var[j].continua) {
// Discrete variable
if (!Calculate.getLost(i,j)) {
// if the value is not a lost one
p[j][(int)StCalculate.tabla[i].ejemplo[j]]++;
p_cond[(int)StCalculate.tabla[i].clase][j][(int)StCalculate.tabla[i].ejemplo[j]]++;
}
}
else {
// Continuous variable
encontrado = false;
h = 0;
while (!encontrado && h<StCalculate.var[j].n_etiq) {
if (StCalculate.tabla[i].ejemplo[j]<=StCalculate.intervalos[j][h])
encontrado = true;
else
h++;
}
if (encontrado == true) {
p[j][h]++;
p_cond[(int)StCalculate.tabla[i].clase][j][h]++;
}
else {
if (!Calculate.getLost(i,j)) {
// Lost value
System.out.println("Fallo al calcular la ganancia de infor, Variable " + j + " Ejemplo " + i);
return;
}
}
}
}
}
for (h=0; h<StCalculate.n_clases; h++)
for (i=0; i<StCalculate.num_vars; i++) {
if (!StCalculate.var[i].continua)
// Discrete variable
for (j=(int)StCalculate.var[i].min; j<=(int)StCalculate.var[i].max; j++)
p_cond[h][i][j] = p_cond[h][i][j] / StCalculate.n_eje;
else // Continuous variable
for (j=0; j<StCalculate.var[i].n_etiq; j++)
p_cond[h][i][j] = p_cond[h][i][j] / StCalculate.n_eje;
}
for (i=0; i<StCalculate.num_vars; i++) {
if (!StCalculate.var[i].continua) // Discrete variable
for (j=(int)StCalculate.var[i].min; j<=(int)StCalculate.var[i].max; j++)
p[i][j] = p[i][j] / StCalculate.n_eje;
else // Continuous variable
for (j=0; j<StCalculate.var[i].n_etiq; j++)
p[i][j] = p[i][j] / StCalculate.n_eje;
}
// Info Gk computation
suma = 0;
for (i=0; i<StCalculate.n_clases; i++) {
p_clase = ((float)num_clase[i])/StCalculate.n_eje;
if (p_clase>0) {
logaritmo = (float)(Math.log((double)p_clase)/Math.log(2));
suma += p_clase * logaritmo;
}
}
info_gk = (-1) * suma;
// Information gain computation for each attibute
for (v=0; v<StCalculate.num_vars; v++) {
suma = info_gk;
suma1 = 0;
if (!StCalculate.var[v].continua) { // Discrete variable
for (i=(int)StCalculate.var[v].min; i<=(int)StCalculate.var[v].max; i++) {
suma2=0;
for (j=0; j<StCalculate.n_clases; j++)
if (p_cond[j][v][i]>0){
logaritmo = (float) (Math.log(p_cond[j][v][i])/Math.log(2));
suma2+= p_cond[j][v][i]*logaritmo;
}
suma1+=p[v][i]*(-1)*suma2;
}
}
else { // Continuous variable
for (i=0; i<StCalculate.var[v].n_etiq; i++) {
suma2=0;
for (j=0; j<StCalculate.n_clases; j++)
if (p_cond[j][v][i]>0) {
logaritmo = (float)(Math.log(p_cond[j][v][i])/Math.log(2));
suma2+= p_cond[j][v][i]*logaritmo ;
}
suma1+=p[v][i]*(-1)*suma2;
}
}
StCalculate.GI[v] = suma + (-1)*suma1;
}
}
}