/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Discretizers.FUSINTER; import java.util.*; import keel.Algorithms.Discretizers.Basic.*; import keel.Algorithms.Genetic_Rule_Learning.Globals.*; /** * <p> * This class implements the FUSINTER discretizer. * </p> * * @author Written by Salvador Garc�a (University of Ja�n) 4/05/2011 * @version 1.1 * @since JDK1.5 */ public class FUSINTER extends Discretizer { double lambda; double alpha; /** * Builder * */ public FUSINTER(double _lambda, double _alpha) { lambda = _lambda; alpha = _alpha; } private class Interval { /** * <p> * Interval class. * </p> */ int attribute; int begin; int end; int []values; int []cd; /** * <p> * Compute the interval ratios. * </p> * @param _attribute * @param []_values * @param _begin * @param _end */ public Interval(int _attribute,int []_values,int _begin,int _end) { attribute=_attribute; begin=_begin; end=_end; values=_values; computeIntervalRatios(); } void computeIntervalRatios() { cd=classDistribution(attribute,values,begin,end); } /** * <p> * Enlarge the interval using a new "end" * </p> * @param newEnd indicates the new end */ public void enlargeInterval(int newEnd) { end=newEnd; computeIntervalRatios(); } } protected Vector discretizeAttribute(int attribute,int []values,int begin,int end) { Vector <Interval> intervals=mergeEqualValues(attribute,values,begin,end); boolean exit=false; double criterion; while(intervals.size()>1 && !exit) { int posMin=-1; double maxCri=0; double eval = eval_2 (intervals, alpha, lambda); for(int i=0;i<intervals.size()-1;i++) { criterion = eval - eval_2(intervals, alpha, lambda, i); if(posMin==-1) { posMin = i; maxCri = criterion; } else { if(criterion > maxCri) { posMin=i; maxCri = criterion; } } } if(maxCri > 0) { Interval int1=(Interval)intervals.elementAt(posMin); Interval int2=(Interval)intervals.elementAt(posMin+1); int1.enlargeInterval(int2.end); intervals.removeElementAt(posMin+1); } else { exit=true; } } Vector cutPoints=new Vector(); for(int i=0;i<intervals.size()-1;i++) { Interval int1=(Interval)intervals.elementAt(i); Interval int2=(Interval)intervals.elementAt(i+1); double cutPoint=(realValues[attribute][values[int1.end]]+realValues[attribute][values[int2.begin]])/2.0; cutPoints.addElement(new Double(cutPoint)); } return cutPoints; } Vector <Interval> mergeEqualValues(int attribute,int []values,int begin,int end) { Vector <Interval> intervals = new Vector <Interval> (); int beginAnt=begin; double valueAnt=realValues[attribute][values[begin]]; int classAnt = classOfInstances[values[begin]]; for(int i=begin+1;i<=end;i++) { double val=realValues[attribute][values[i]]; int clas = classOfInstances[values[i]]; if(val!=valueAnt) { if (clas != classAnt) { intervals.addElement(new Interval(attribute,values,beginAnt,i-1)); beginAnt=i; valueAnt=val; classAnt=clas; } } } intervals.addElement(new Interval(attribute,values,beginAnt,end)); return intervals; } int []classDistribution(int attribute,int []values,int begin,int end) { int []classCount = new int[Parameters.numClasses]; for(int i=0;i<Parameters.numClasses;i++) classCount[i]=0; for(int i=begin;i<=end;i++) classCount[classOfInstances[values[i]]]++; return classCount; } double eval_2 (Vector <Interval> intervals, double alpha, double lambda) { int i, j; int Nj; double suma; double factor; double total = 0; for (i=0; i<intervals.size(); i++) { Nj = 0; for (j=0; j<Parameters.numClasses; j++) { Nj += intervals.elementAt(i).cd[j]; } suma = 0; for (j=0; j<Parameters.numClasses; j++) { factor = (intervals.elementAt(i).cd[j] + lambda) / (Nj + Parameters.numClasses*lambda); suma += factor * (1 - factor); } total += (alpha * ((double)Nj / (double)Parameters.numInstances) * suma); total += ((1 - alpha) * (((double)Parameters.numClasses * lambda) / (double)Nj)); } return total; } double eval_2 (Vector <Interval> intervals, double alpha, double lambda, int merged) { int i, j; int Nj; double suma; double factor; double total = 0; for (i=0; i<intervals.size(); i++) { if (i==merged) { Nj = 0; for (j=0; j<Parameters.numClasses; j++) { Nj += intervals.elementAt(i).cd[j]; Nj += intervals.elementAt(i+1).cd[j]; } suma = 0; for (j=0; j<Parameters.numClasses; j++) { factor = (intervals.elementAt(i).cd[j] + intervals.elementAt(i+1).cd[j] + lambda) / (Nj + Parameters.numClasses*lambda); suma += factor * (1 - factor); } total += (alpha * ((double)Nj / (double)Parameters.numInstances) * suma) + ((1 - alpha) * (((double)Parameters.numClasses * lambda) / (double)Nj)); } else if (i==merged+1) { } else { Nj = 0; for (j=0; j<Parameters.numClasses; j++) { Nj += intervals.elementAt(i).cd[j]; } suma = 0; for (j=0; j<Parameters.numClasses; j++) { factor = (intervals.elementAt(i).cd[j] + lambda) / (Nj + Parameters.numClasses*lambda); suma += factor * (1 - factor); } total += (alpha * ((double)Nj / (double)Parameters.numInstances) * suma) + ((1 - alpha) * (((double)Parameters.numClasses * lambda) / (double)Nj)); } } return total; } }