/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Discretizers.Id3_Discretizer; import java.util.*; import keel.Algorithms.Discretizers.Basic.*; import keel.Algorithms.Genetic_Rule_Learning.Globals.*; /** * <p> * This class implements the Id3Discretizer discretizer. * </p> * * <p> * @author Written by Jaume Bacardit (La Salle, Ram�n Llull University - Barcelona) 28/03/2004 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008 * @version 1.1 * @since JDK1.5 * </p> */ public class Id3Discretizer extends Discretizer { protected Vector discretizeAttribute(int attribute,int []values,int begin,int end) { Vector cd=classDistribution(attribute,values,begin,end); if(cd.size()==1) return new Vector(); int numValues=sumValues(cd); double entAll=computeEntropy(cd,numValues); Vector candidateCutPoints = getCandidateCutPoints(attribute,values,begin,end); if(candidateCutPoints.size()==0) return new Vector(); int posMin=((Integer)candidateCutPoints.elementAt(0)).intValue(); double entMin=computePartitionEntropy(attribute,values,begin,posMin,end); for(int i=1,size=candidateCutPoints.size();i<size;i++) { int pos=((Integer)candidateCutPoints.elementAt(i)).intValue(); double ent=computePartitionEntropy(attribute,values,begin,pos,end); if(ent<entMin) { entMin=ent; posMin=pos; } } if(entMin<entAll) { Vector res1=discretizeAttribute(attribute,values,begin,posMin-1); double cutPoint=(realValues[attribute][values[posMin-1]]+realValues[attribute][values[posMin]])/2.0; res1.addElement(new Double(cutPoint)); Vector res2=discretizeAttribute(attribute,values,posMin,end); res1.addAll(res2); return res1; } return new Vector(); } double computePartitionEntropy(int attribute,int []values,int begin,int midPoint,int end) { Vector cd1=classDistribution(attribute,values,begin,midPoint-1); Vector cd2=classDistribution(attribute,values,midPoint,end); int numValues1=sumValues(cd1); int numValues2=sumValues(cd2); double ent1=computeEntropy(cd1,numValues1); double ent2=computeEntropy(cd2,numValues2); return ((double)numValues1*ent1+(double)numValues2*ent2)/(double)(numValues1+numValues2); } double computeEntropy(Vector v,int numValues) { double ent=0; for(int i=0,size=v.size();i<size;i++) { double prob=((Integer)v.elementAt(i)).intValue(); prob/=(double)numValues; ent+=prob*Math.log(prob)/Math.log(2); } return -ent; } int sumValues(Vector v) { int sum=0; for(int i=0,size=v.size();i<size;i++) { sum+=((Integer)v.elementAt(i)).intValue(); } return sum; } Vector getCandidateCutPoints(int attribute,int []values,int begin,int end) { Vector cutPoints = new Vector(); double valueAnt=realValues[attribute][values[begin]]; for(int i=begin;i<=end;i++) { double val=realValues[attribute][values[i]]; if(val!=valueAnt) cutPoints.addElement(new Integer(i)); valueAnt=val; } return cutPoints; } Vector classDistribution(int attribute,int []values,int begin,int end) { int []classCount = new int[Parameters.numClasses]; for(int i=0;i<Parameters.numClasses;i++) classCount[i]=0; for(int i=begin;i<=end;i++) classCount[classOfInstances[values[i]]]++; Vector res= new Vector(); for(int i=0;i<Parameters.numClasses;i++) { if(classCount[i]>0) res.addElement(new Integer(classCount[i])); } return res; } }