/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Discretizers.OneR;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Vector;
import keel.Algorithms.Discretizers.Basic.Discretizer;
import keel.Dataset.*;
/**
* This class implements the OneR discretizer
*
* <p>
* @author Written by Juli�n Luengo Mart�n 28/10/2008
* @version 0.1
* @since JDK 1.5
* </p>
*/
public class OneR extends Discretizer{
PrintStream stdout = System.out; //the standard output stored for further manipulation
int small = 6; //minimum number of explanatory values with the same class in an interval
public OneR(int minimum){
small = minimum;
}
@Override
protected Vector discretizeAttribute(int attribute,int []values,int begin,int end){
int numClasses,_class,count,optClass[];
int optimum[];
double last,value,cp;
double reals[],nonRepReals[];
ArrayList<Opt> opts = new ArrayList<Opt>();
Opt opt;
Vector cps = new Vector();
numClasses = Attributes.getOutputAttribute(0).getNumNominalValues();
reals = realValues[attribute];
//first lets fill the opts vector with the optimal class for each value
//remember that values[] give us the index of the real values SORTED
last = Double.NaN;
for(int i=0;i<values.length;i++){
value = reals[values[i]];
_class = classOfInstances[values[i]];
if(last==Double.NaN || value!=last){
opt = new Opt(value,numClasses);
opt.countClass(_class);
opts.add(opt);
last = value;
}else{
opt = opts.get(opts.size()-1);
opt.countClass(_class);
}
}
optimum = new int[opts.size()];
nonRepReals = new double[opts.size()];
for(int i=0;i<optimum.length;i++){
optimum[i] = opts.get(i).getOptClass();
nonRepReals[i] = opts.get(i).getValue();
}
//create the cutpoints
if(nonRepReals.length > 1){
count = 1;
optClass = new int[numClasses];
optClass[optimum[0]]++;
cp = nonRepReals[1];
_class = optimum[0];
for(int i=1;i<nonRepReals.length-1;i++){
if(count<small){
//displace the cut point, so it includes the present value
optClass[optimum[i]]++;
_class = indexOfMax(optClass);
count = optClass[_class];
}else if(count>=small && optimum[i]==_class){
optClass[classOfInstances[values[i]]]++;
count++;
}else if(count>=small && optimum[i]!=_class){
//add the cut point, since extending the interval has failed
cps.add(new Double(cp));
for(int j=0;j<optClass.length;j++)
optClass[j] = 0;
count = 0;
}
cp = nonRepReals[i+1];
}
//for the last interval...
if(count<small){
//displace the cut point, so it includes the present value
optClass[optimum[nonRepReals.length-1]]++;
_class = indexOfMax(optClass);
count = optClass[_class];
}else if(count>=small && optimum[nonRepReals.length-1]==_class){
optClass[classOfInstances[values[nonRepReals.length-1]]]++;
count++;
}else if(count>=small && optimum[nonRepReals.length-1]!=_class){
//add the cut point, since extending the interval has failed
cps.add(new Double(cp));
for(int j=0;j<optClass.length;j++)
optClass[j] = 0;
count = 0;
}
}
return cps;
}
/**
* Looks for the index of the maximum element in the array
* @param vec the array of elements
* @return the maximum element in vec
*/
public int indexOfMax(int vec[]){
int max = 0;
for(int i=1;i<vec.length;i++){
if(vec[i]>vec[max])
max = i;
}
return max;
}
}