/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Discretizers.Basic; import keel.Dataset.*; import java.util.*; import keel.Algorithms.Genetic_Rule_Learning.Globals.*; public abstract class Discretizer { protected double [][]cutPoints; protected double [][]realValues; protected boolean []realAttributes; protected int []classOfInstances; protected int iClassIndex; public void buildCutPoints(InstanceSet is) { int i; boolean bHit; Instance []instances=is.getInstances(); classOfInstances= new int[instances.length]; for(i=0;i<instances.length;i++) classOfInstances[i]=instances[i].getOutputNominalValuesInt(0); cutPoints=new double[Parameters.numAttributes][]; realAttributes = new boolean[Parameters.numAttributes]; realValues = new double[Parameters.numAttributes][]; i = 0; bHit = false; for (int a = 0; i < Parameters.numAttributes; a++){ Attribute at=Attributes.getAttribute(a); if (at.getDirectionAttribute() == Attribute.INPUT){ if(at.getType()==Attribute.REAL || at.getType()==Attribute.INTEGER) { realAttributes[i]=true; realValues[i] = new double[instances.length]; int []points= new int[instances.length]; int numPoints=0; for(int j=0;j<instances.length;j++) { if(!instances[j].getInputMissingValues(i)) { points[numPoints++]=j; realValues[i][j]=instances[j].getInputRealValues(i); } } sortValues(i,points,0,numPoints-1); Vector cp=discretizeAttribute(i,points,0,numPoints-1); if(cp.size()>0) { cutPoints[i]=new double[cp.size()]; for(int j=0;j<cutPoints[i].length;j++) { cutPoints[i][j]=((Double)cp.elementAt(j)).doubleValue(); LogManager.println("Cut point "+j+" of attribute "+i+" : "+cutPoints[i][j]); } } else { cutPoints[i]=null; } LogManager.println("Number of cut points of attribute "+i+" : "+cp.size()); } else { realAttributes[i]=false; } i++; } else { iClassIndex = a; bHit = true; } } if (bHit == false){ iClassIndex = Parameters.numAttributes; } } public void applyDiscretization(String in,String out) { boolean bHit; InstanceSet is=new InstanceSet(); try { is.readSet(in,false); } catch(Exception e) { LogManager.printErr(e.toString()); System.exit(1); } FileManagement fm = new FileManagement(); Instance []instances=is.getInstances(); Attribute []att=Attributes.getInputAttributes(); try { fm.initWrite(out); fm.writeLine("@relation "+Attributes.getRelationName()+"\n"); bHit = false; for(int i=0;i<Parameters.numAttributes;i++) { if (i == iClassIndex){ fm.writeLine(Attributes.getOutputAttributes()[0].toString()+"\n"); bHit = true; } if(realAttributes[i]) { String def="@attribute "+att[i].getName()+" {"; if(cutPoints[i]!=null) { for(int j=0;j<cutPoints[i].length+1;j++) { def+=j; if(j<cutPoints[i].length) def+=","; } } else { def+=0; } def+="}\n"; fm.writeLine(def); } else { fm.writeLine(att[i].toString()+"\n"); } } if (bHit == false){ fm.writeLine(Attributes.getOutputAttributes()[0].toString()+"\n"); } fm.writeLine("@inputs "); for (int i = 0; i < Parameters.numAttributes-1;i++){ fm.writeLine(att[i].getName()+","); } fm.writeLine(att[Parameters.numAttributes-1].getName()+"\n"); fm.writeLine("@outputs "+ Attributes.getOutputAttributes()[0].getName()+"\n"); fm.writeLine("@data\n"); bHit = false; for(int i=0;i<instances.length;i++) { boolean []missing=instances[i].getInputMissingValues(); String newInstance=""; for(int j=0;j<Parameters.numAttributes;j++) { if (j == iClassIndex){ String className=instances[i].getOutputNominalValues(0); newInstance+=className+","; bHit = true; } if(missing[j]) { newInstance+="?"; } else { if(realAttributes[j]) { double val=instances[i].getInputRealValues(j); int interv=discretize(j,val); newInstance+=interv; } else { newInstance+=instances[i].getInputNominalValues(j); } } if (bHit == true && j == (Parameters.numAttributes -1)){ newInstance += "\n"; } else { newInstance +=","; } } if (bHit == false){ String className=instances[i].getOutputNominalValues(0); newInstance+=className+"\n"; } fm.writeLine(newInstance); } fm.closeWrite(); } catch(Exception e) { LogManager.printErr("Exception in doDiscretize"); e.printStackTrace(); System.exit(1); } } protected void sortValues(int attribute,int []values,int begin,int end) { double pivot; int temp; int i,j; i=begin;j=end; pivot=realValues[attribute][values[(i+j)/2]]; do { while(realValues[attribute][values[i]]<pivot) i++; while(realValues[attribute][values[j]]>pivot) j--; if(i<=j) { if(i<j) { temp=values[i]; values[i]=values[j]; values[j]=temp; } i++; j--; } } while(i<=j); if(begin<j) sortValues(attribute,values,begin,j); if(i<end) sortValues(attribute,values,i,end); } public int getNumIntervals(int attribute) { return cutPoints[attribute].length+1; } public double getCutPoint(int attribute,int cp) { return cutPoints[attribute][cp]; } protected abstract Vector discretizeAttribute(int attribute,int []values,int begin,int end); public int discretize(int attribute,double value) { if(cutPoints[attribute]==null) return 0; for(int i=0;i<cutPoints[attribute].length;i++) if(value<cutPoints[attribute][i]) return i; return cutPoints[attribute].length; } }