/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Discretizers.CACC;
import java.util.*;
import keel.Algorithms.Discretizers.Basic.*;
import keel.Algorithms.Genetic_Rule_Learning.Globals.Parameters;
/**
*
* <p>
* This class implements the CACC discretizer
* </p>
*
* <p>
* @author Written by Jose A. Saez Munoz (SCI2S research group, DECSAI in ETSIIT, University of Granada), 21/12/2009
* @version 1.0
* @since JDK1.6
* </p>
*/
public class CACC extends Discretizer {
private int numInstances; // total number of instances
private double[] cutpoints; // possible cutpoints
private int[] selected; // selected cutpoints
private int numcp; // number of selected cutpoints
private int[][] matrix; // quanta matrix
//******************************************************************************************************
/**
* <p>
* Constructor of the class
* </p>
*/
public CACC(){
}
//******************************************************************************************************
/**
* <p>
* Returns a vector with the discretized values
* </p>
* @param attribute index of the attribute to discretize
* @param values vector of indexes of the instances sorted from lowest to highest value of attribute
* @param begin index of the instance with the lowest value of attribute
* @param end index of the instance with the highest value of attribute
* @return vector with the discretized values
*/
protected Vector discretizeAttribute(int attribute, int []values, int begin, int end){
int i;
numInstances = realValues[attribute].length; //number of instances
// 1) Form a set of all distinct values in ascending order
double[] valuesNoRepeated = new double[end+1];
int size = 0;
double value = realValues[attribute][values[begin]];
valuesNoRepeated[size++] = value;
for(i = begin+1 ; i <= end ; ++i){
if(value != realValues[attribute][values[i]]){
valuesNoRepeated[size++] = realValues[attribute][values[i]];
value = realValues[attribute][values[i]];
}
}
// 2) Calculate the midpoints of all the adjacent pairs in the set
cutpoints = new double[size+1]; // midpoints + minimun + maximun
selected = new int[cutpoints.length];
cutpoints[0] = realValues[attribute][values[begin]];
selected[0] = 1;
cutpoints[cutpoints.length-1] = realValues[attribute][values[end]];
selected[cutpoints.length-1] = 1;
for(i = 1 ; i < cutpoints.length-1 ; ++i){
cutpoints[i] = (valuesNoRepeated[i-1]+valuesNoRepeated[i])/2;
selected[i] = 0;
}
// 3) Set the initial discretization scheme as D: {[d0,dn]} and Globalcacc = 0
numcp = 2;
double Globalcacc = 0, maxCACC = -1, auxCACC = 0;
int posCACC = 0;
int k = 1;
boolean finish = false;
do{
numcp++;
// for each inner boundary which is not already in scheme D
for(i = 1 ; i < cutpoints.length-1 ; i++){
// Add it into D;
if(selected[i] == 0){
selected[i] = 1;
// Calculate the corresponding cacc value;
auxCACC = caccValue(attribute);
if(auxCACC > maxCACC){
maxCACC = auxCACC;
posCACC = i;
}
selected[i] = 0;
}
}
numcp--;
// see if add the cutpoint with maximum cacc value
if( (maxCACC > Globalcacc) || (k < Parameters.numClasses) ){
selected[posCACC] = 1;
numcp++;
Globalcacc = maxCACC;
k = k+1;
}
// otherwise, finish the algorithm
else{
finish = true;
}
}while(!finish);
// return the selected cutpoints
Vector cp = new Vector();
selected[0] = selected[cutpoints.length-1] = 0;
for(i = 0 ; i < cutpoints.length ; ++i)
if(selected[i] == 1)
cp.add(cutpoints[i]);
return cp;
}
//******************************************************************************************************
/**
* <p>
* Computes the cacc value of a discretization
* </p>
* @param attribute index of the attribute to discretize
* @return the cacc value
*/
private double caccValue(int attribute){
int i, r;
double y = 0;
CreateQuantaMatrix(attribute);
for(i = 0 ; i < Parameters.numClasses ; ++i){
for(r = 0 ; r < numcp-1 ; ++r){
y += Math.pow(matrix[i][r],2)/(matrix[i][numcp-1]*matrix[Parameters.numClasses][r]);
}
}
y--;
y *= (numInstances/(Math.log(numcp)));
return Math.sqrt(y/(y+numInstances));
}
//******************************************************************************************************
/**
* <p>
* Creates the quanta matrix basing on the selected cutpoints
* </p>
* @param attribute index of the attribute
*/
private void CreateQuantaMatrix(int attribute){
int i, j, point, clase;
int suma;
// matrix creation
matrix = new int[Parameters.numClasses+1][];
for(i = 0 ; i < Parameters.numClasses+1 ; ++i){
matrix[i] = new int[numcp];
for(j = 0 ; j < numcp ; ++j)
matrix[i][j] = 0;
}
// create the quanta matrix
boolean continuar = true;
int intervalo = 0;
for(i = 0 ; i < numInstances ; ++i){
continuar = true;
intervalo = 0;
for(point = 1 ; point < cutpoints.length && continuar ; ++point){
if(realValues[attribute][i] <= cutpoints[point] && selected[point] == 1){
matrix[classOfInstances[i]][intervalo]++;
continuar = false;
}
if(selected[point] == 1)
intervalo++;
}
}
// sumatory per classes
for(clase = 0 ; clase < Parameters.numClasses ; ++clase){
suma = 0;
for(j = 0 ; j < numcp-1 ; ++j)
suma += matrix[clase][j];
matrix[clase][numcp-1] = suma;
}
// sumatory per intervals
for(j = 0 ; j < numcp-1 ; ++j){
suma = 0;
for(clase = 0 ; clase < Parameters.numClasses ; ++clase)
suma += matrix[clase][j];
matrix[Parameters.numClasses][j] = suma;
}
}
}