/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Discretizers.USD_Discretizer;
import java.util.*;
import keel.Algorithms.Discretizers.Basic.*;
import keel.Algorithms.Genetic_Rule_Learning.Globals.*;
/**
* <p>
* This class implements the USD discretizer.
* </p>
*
* @author Written by Jaume Bacardit (La Salle, Ram�n Llull University - Barcelona) 28/03/2004 </p>
* Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008
* @version 1.1
* @since JDK1.5
*/
public class USDDiscretizer extends Discretizer {
/**
* <p>
* Interval class.
* </p>
*/
private class Interval {
int attribute;
int begin;
int end;
int []values;
int []cd;
int majority;
double goodness;
boolean pure;
/**
* <p>
* Constructor of the class, initializes the object attributes
* </p>
* @param _atribute
* @param _values
* @param _begin
* @param _end
*/
public Interval(int _attribute, int []_values, int _begin, int _end) {
attribute=_attribute;
begin=_begin;
end=_end;
values=_values;
computeIntervalRatios();
}
void computeIntervalRatios() {
cd=classDistribution(attribute,values,begin,end);
int max=-1;
int maxC=-1;
boolean tie=false;
int count=0;
for(int i=0;i<Parameters.numClasses;i++) {
if(cd[i]>max) {
maxC=i;
max=cd[i];
tie=false;
} else if(cd[i]==max) {
tie=true;
}
count+=cd[i];
}
if(!tie) majority=maxC;
else majority=-1;
if(max==count) pure=true;
else pure=false;
goodness=max/(1.0+(count-max));
}
/**
* <p>
* Enlarge the interval using a new "end"
* </p>
* @param newEnd indicates the new end
*/
public void enlargeInterval(int newEnd) {
end=newEnd;
computeIntervalRatios();
}
}
protected Vector discretizeAttribute(int attribute,int []values,int begin,int end) {
Vector intervals=mergeEqualValues(attribute,values,begin,end);
createInitialIntervals(intervals);
boolean thereAreUnions=true;
while(thereAreUnions) {
thereAreUnions=false;
int bestUnion=-1;
double bestGoodness=0;
Interval bestInterval=null;
for(int i=0;i<intervals.size()-1;i++) {
Interval int1=(Interval)intervals.elementAt(i);
Interval int2=(Interval)intervals.elementAt(i+1);
if(int1.majority==int2.majority || int1.majority==-1 || int2.majority==-1) {
Interval res=new Interval(attribute,values,int1.begin,int2.end);
if(res.goodness>(int1.goodness+int2.goodness)/2.0) {
thereAreUnions=true;
if(bestUnion==-1 || res.goodness>bestGoodness) {
bestUnion=i;
bestGoodness=res.goodness;
bestInterval=res;
}
}
}
}
if(thereAreUnions) {
intervals.removeElementAt(bestUnion);
intervals.removeElementAt(bestUnion);
intervals.insertElementAt(bestInterval,bestUnion);
}
}
Vector cutPoints=new Vector();
for(int i=0;i<intervals.size()-1;i++) {
Interval int1=(Interval)intervals.elementAt(i);
Interval int2=(Interval)intervals.elementAt(i+1);
double cutPoint=(realValues[attribute][values[int1.end]]+realValues[attribute][values[int2.begin]])/2.0;
cutPoints.addElement(new Double(cutPoint));
}
return cutPoints;
}
void createInitialIntervals(Vector intervals) {
int index=0;
while(index<intervals.size()-1) {
Interval int1=(Interval)intervals.elementAt(index);
Interval int2=(Interval)intervals.elementAt(index+1);
if(int1.majority==int2.majority &&
int1.majority!=-1 && int2.majority!=-1 &&
int1.pure && int2.pure) {
int1.enlargeInterval(int2.end);
intervals.removeElementAt(index+1);
} else {
index++;
}
}
}
Vector mergeEqualValues(int attribute,int []values,int begin,int end) {
Vector intervals = new Vector();
int beginAnt=begin;
double valueAnt=realValues[attribute][values[begin]];
for(int i=begin+1;i<=end;i++) {
double val=realValues[attribute][values[i]];
if(val!=valueAnt) {
intervals.addElement(new Interval(attribute,values,beginAnt,i-1));
beginAnt=i;
valueAnt=val;
}
}
intervals.addElement(new Interval(attribute,values,beginAnt,end));
return intervals;
}
int []classDistribution(int attribute,int []values,int begin,int end) {
int []classCount = new int[Parameters.numClasses];
for(int i=0;i<Parameters.numClasses;i++) classCount[i]=0;
for(int i=begin;i<=end;i++) classCount[classOfInstances[values[i]]]++;
return classCount;
}
}