/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Discretizers.IDD; import java.util.*; import keel.Algorithms.Discretizers.Basic.*; import keel.Algorithms.Genetic_Rule_Learning.Globals.Parameters; import keel.Dataset.Attribute; import keel.Dataset.Attributes; /** * <p> * This class implements the IDD * </p> * * <p> * @author Written by Jose A. Saez Munoz (SCI2S research group, DECSAI in ETSIIT, University of Granada), 21/12/2009 * @version 1.0 * @since JDK1.6 * </p> */ public class IDD extends Discretizer { // rank class is used to sort values private class rank { int pos; double value; public rank(int i, double v){pos=i;value=v;} } // tags private static int HIGHER_FIRST = 0; private static int LOWER_FIRST = 1; // instance variables private double[] cutpoints; // possible cutpoints private int numcp; // number of possible cutpoints private int delta; // neigboorhood size private int windowsSize; // windows size to vote later private String distanceFunction; // kind of distance function private int[][] quantaMatrixOfClasses; // number of examples per class with value = cutpoint private int[] numInterAtt; // number of intervals of each attribute //****************************************************************************************************** /** * <p> * Constructor of the class * </p> */ public IDD(){ int i; numInterAtt = new int[Parameters.numAttributes]; if(Parameters.setConfig){ for(i = 0 ; i < Parameters.numAttributes ; ++i){ Attribute att = Attributes.getAttribute(i); if(att.getType() == Attribute.REAL || att.getType() == Attribute.INTEGER) numInterAtt[i] = Parameters.numIntervals; // default case else numInterAtt[i] = 0; } } else{ String[] inter = Parameters.numIntrvls.split("_"); int cont = 0; for(i = 0 ; i < Parameters.numAttributes ; ++i){ Attribute att = Attributes.getAttribute(i); if(att.getType() == Attribute.REAL || att.getType() == Attribute.INTEGER) numInterAtt[i] = Integer.parseInt(inter[cont++]); // default case else numInterAtt[i] = 0; } } } //****************************************************************************************************** /** * <p> * Returns a vector with the discretized values * </p> * @param attribute index of the attribute to discretize * @param values vector of indexes of the instances sorted from lowest to highest value of attribute * @param begin index of the instance with the lowest value of attribute * @param end index of the instance with the highest value of attribute * @return vector with the discretized values */ protected Vector discretizeAttribute(int attribute, int []values, int begin, int end){ int i, j; // loop indexes windowsSize = Parameters.WindowsSize; delta = Parameters.Neighborhood; distanceFunction = Parameters.DistanceFunction; // inicialize parameters cutpoints = new double[end+1]; numcp = 0; quantaMatrixOfClasses = new int[end+1][Parameters.numClasses]; for(i = 0 ; i < end+1 ; ++i) for(j = 0 ; j < Parameters.numClasses ; ++j) quantaMatrixOfClasses[i][j] = 0; //get the differents values of attribute and his classes quantaMatrixOfClasses[numcp][classOfInstances[values[begin]]]++; double value = realValues[attribute][values[begin]]; cutpoints[numcp++] = value; for(i = begin+1 ; i <= end ; ++i){ if(value != realValues[attribute][values[i]]){ quantaMatrixOfClasses[numcp][classOfInstances[values[i]]]++; cutpoints[numcp++] = realValues[attribute][values[i]]; value = realValues[attribute][values[i]]; } else quantaMatrixOfClasses[numcp-1][classOfInstances[values[i]]]++; } double max; int index; double[] fitness = new double[numcp]; int[] votes = new int[numcp]; for(i = 0 ; i < numcp ; ++i) fitness[i] = votes[i] = 0; // compute fitness for all the cutpoints for(i = delta ; i < numcp-delta ; ++i) fitness[i] = distance(i, attribute); // compute votes for each cutpoint for(i = 1 ; i < numcp-delta ; ++i){ max = fitness[i]; index = i; for(j = i+1 ; j < i+windowsSize ; ++j){ if(fitness[j] > max){ max = fitness[j]; index = j; } } votes[index]++; } //return cutpoints Vector cp = new Vector(); if(numInterAtt[attribute] == 0){ for(i = 0 ; i < numcp ; ++i){ if(votes[i] == windowsSize) cp.add(cutpoints[i]); } } // return the cutpoints with more votes else{ // get the numIntervals highest cutpoints basis on votes rank[] r = new rank[numcp]; for(i = 0 ; i < r.length ; ++i) r[i] = new rank(i,votes[i]*fitness[i]); sortValues(r,0,numcp-1,HIGHER_FIRST); // sort these cutpoints int numfinalcp = numInterAtt[attribute]-1; rank[] r1 = new rank[numfinalcp]; for(i = 0 ; i < numfinalcp ; ++i) r1[i] = new rank(i,cutpoints[r[i].pos]); sortValues(r1,0,numfinalcp-1,LOWER_FIRST); for(i = 0 ; i < numfinalcp ; ++i) cp.add(r1[i].value); } return cp; } //****************************************************************************************************** /** * <p> * Computes the distance between two intervals * </p> * @param i index of the cutpoint * @param attribute index of the attribute * @return the distance value */ private double distance(int i, int attribute){ if(distanceFunction.equals("nominalOutput1")) return NominalClassesDistance_1(i, attribute); if(distanceFunction.equals("nominalOutput2")) return NominalClassesDistance_2(i, attribute); return 0; } //****************************************************************************************************** /** * <p> * Computes the distance between two intervals as follows: first, finds the majority class at left * interval and counts the number of instances at right interval that are not of this class. Later, * finds the majority class at right interval and counts the number of instances at left interval * that are not of this class. At last, returns the minimum of these two values. * </p> * @param i the index of the cutpoint * @param attribute index of the attribute * @return the distance between the two intervals */ private double NominalClassesDistance_1(int i, int attribute){ int j, fi, si, s; // loop indexes int beginFirst = i-delta, beginSecond = i+1; // begin of first and second intervals int[][] numClassesAtInterval = new int[2][Parameters.numClasses]; for(j = 0 ; j < Parameters.numClasses ; ++j){ numClassesAtInterval[0][j] = 0; numClassesAtInterval[1][j] = 0; } // compute the majority class in first interval and second interval for(fi = beginFirst, si = beginSecond ; fi < beginFirst+delta ; ++fi, ++si){ for(s = 0 ; s < Parameters.numClasses ; ++s){ numClassesAtInterval[0][s] += quantaMatrixOfClasses[fi][s]; numClassesAtInterval[1][s] += quantaMatrixOfClasses[si][s]; } } // compute the number of instances for majority class int numFirst = numClassesAtInterval[0][0], numSecond = numClassesAtInterval[1][0]; int totalFirst = numClassesAtInterval[0][0] , totalSecond = numClassesAtInterval[1][0]; int pos1 = 0 , pos2 = 0; for(j = 1 ; j < Parameters.numClasses ; ++j){ totalFirst += numClassesAtInterval[0][j]; totalSecond += numClassesAtInterval[1][j]; if(numClassesAtInterval[0][j] > numFirst){ numFirst = numClassesAtInterval[0][j]; pos1 = j; } if(numClassesAtInterval[1][j] > numSecond){ numSecond = numClassesAtInterval[1][j]; pos2 = j; } } // compute the distance double distance1 = totalSecond-numClassesAtInterval[1][pos1]; double distance2 = totalFirst-numClassesAtInterval[0][pos2]; double distance = distance1<distance2?distance1:distance2; return distance; } //****************************************************************************************************** /** * <p> * Computes the distance between two intervals as follows: first, it computes the number of instances * of each class into each interval. Then, computes the euclidean distance between these arrays of number * of instances * </p> * @param i the index of the cutpoint * @param attribute index of the attribute * @return the distance between the two intervals */ private double NominalClassesDistance_2(int i, int attribute){ int fi, si, s; // loop indexes int beginFirst = i-delta, beginSecond = i+1; // begin of first and second intervals int[][] numClassesAtInterval = new int[2][Parameters.numClasses]; for(int j = 0 ; j < Parameters.numClasses ; ++j){ numClassesAtInterval[0][j] = 0; numClassesAtInterval[1][j] = 0; } // compute the majority class in first interval and second interval for(fi = beginFirst, si = beginSecond ; fi < beginFirst+delta ; ++fi, ++si){ for(s = 0 ; s < Parameters.numClasses ; ++s){ numClassesAtInterval[0][s] += quantaMatrixOfClasses[fi][s]; numClassesAtInterval[1][s] += quantaMatrixOfClasses[si][s]; } } // compute the distance double distance = 0; for(s = 0 ; s < Parameters.numClasses ; ++s) distance += Math.pow(numClassesAtInterval[0][s]-numClassesAtInterval[1][s],2); return Math.sqrt(distance); } //****************************************************************************************************** /** * <p> * Sorts an array * </p> * @param values array to sort * @param begin start position to sort * @param end end position to sort * @param type = HIGHER_FIRST (it sorts from highest to lowest), type = LOWER_FIRST (it sorts from lowest * to higest) */ protected void sortValues(rank[] values, int begin, int end, int type){ double pivot; rank temp; int i, j; i = begin; j = end; pivot = values[(i+j)/2].value; do { if(type == HIGHER_FIRST){ while(values[i].value > pivot) i++; while(values[j].value < pivot) j--; } if(type == LOWER_FIRST){ while(values[i].value < pivot) i++; while(values[j].value > pivot) j--; } if(i <= j){ if(i < j){ temp = values[i]; values[i] = values[j]; values[j] = temp; } i++; j--; } }while(i <= j); if(begin < j) sortValues(values,begin,j,type); if(i < end) sortValues(values,i,end,type); } }