/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.PSO_Learning.CPSO; /** * <p>Title: Algorithm CPSO</p> * * <p>Description: It contains the implementation of the algorithm</p> * * * <p>Company: KEEL </p> * * @author Jose A. Saez Munoz * @version 1.0 */ import java.io.IOException; import java.util.Vector; import keel.Dataset.Attributes; import org.core.*; public class CPSO { static public myDataset train, val, test; String outputTr, outputTst, outputRules; //parameters private long semilla; private int NumParticles; private int NumAttributes; private int NumInstances; private double ConvergenceRadius; private double WeightsUpperLimit; private double maxUncoveredInstances; private double indifferenceThreshold; private double constrictionCoefficient; private int numDimensions; private int numCenters; private int ConvergencePlatformWidth; private Crono cronometro; private Vector<Particle> ruleSet; private boolean somethingWrong = false; //to check if everything is correct. /** * Default constructor */ public CPSO(){ } /** * It reads the data from the input files (training, validation and test) and parse all the parameters * from the parameters array. * @param parameters parseParameters It contains the input files, output files and parameters */ public CPSO(parseParameters parameters) { train = new myDataset(); val = new myDataset(); test = new myDataset(); try { System.out.println("\nReading the training set: "+parameters.getTrainingInputFile()); train.readClassificationSet(parameters.getTrainingInputFile(), true); System.out.println("\nReading the validation set: "+parameters.getValidationInputFile()); val.readClassificationSet(parameters.getValidationInputFile(), false); System.out.println("\nReading the test set: "+parameters.getTestInputFile()); test.readClassificationSet(parameters.getTestInputFile(), false); } catch (IOException e) { System.err.println("There was a problem while reading the input data-sets: " + e); somethingWrong = true; } outputTr = parameters.getTrainingOutputFile(); outputTst = parameters.getTestOutputFile(); outputRules = parameters.getOutputFile(0); //Now we parse the parameters semilla = Long.parseLong(parameters.getParameter(0)); NumParticles=Integer.parseInt(parameters.getParameter(1)); ConvergenceRadius = Double.parseDouble(parameters.getParameter(2)); WeightsUpperLimit = Double.parseDouble(parameters.getParameter(3)); maxUncoveredInstances = Double.parseDouble(parameters.getParameter(4)); indifferenceThreshold = Double.parseDouble(parameters.getParameter(5)); constrictionCoefficient = Double.parseDouble(parameters.getParameter(6)); ConvergencePlatformWidth = Integer.parseInt(parameters.getParameter(7)); //inicializar la semilla Randomize.setSeed(semilla); NumAttributes=train.getnInputs(); numDimensions=NumAttributes*2; numCenters=NumAttributes; NumInstances=train.getnData(); ruleSet=new Vector<Particle>(25,10); Particle.InitializeParameters(indifferenceThreshold, constrictionCoefficient, WeightsUpperLimit); cronometro=new Crono(); } /** * It launches the algorithm */ public void execute() { if (somethingWrong) { //We do not execute the program System.err.println("An error was found, either the data-set have numerical values or missing values."); System.err.println("Aborting the program"); } else { train.normalize(indifferenceThreshold); val.normalize(indifferenceThreshold); test.normalize(indifferenceThreshold); cronometro.inicializa(); CPSO_Method(); cronometro.fin(); //Finally we should fill the training and test output files double accTrain=doOutput(val, outputTr); double accTest=doOutput(test, outputTst); PrintOutputRules(); double mediaAtts=0; for(int i=0 ; i<ruleSet.size() ; ++i) mediaAtts+=ruleSet.get(i).presentAttsBest(); mediaAtts/=ruleSet.size(); System.out.print("\n\n************************************************"); System.out.print("\nPorcertanje acierto train:\t"+accTrain); System.out.print("\nPorcertanje acierto test:\t"+accTest); System.out.print("\nNumero de reglas:\t\t"+ruleSet.size()); System.out.print("\nNumero atributos inicial:\t"+NumAttributes); System.out.print("\nMedia de atributos/regla:\t"+mediaAtts); System.out.print("\nTiempo:\t\t\t\t"+cronometro.tiempoTotal()); System.out.print("\n************************************************\n\n"); System.out.println("Algorithm Finished"); } } //********************************************************************* //***************** CPSO method *************************************** //********************************************************************* private void CPSO_Method(){ Particle bestRule; System.out.println("Total de instancias sin clasificar = "+train.noClasificadas()); for(int classChosen=train.ClasePredominante() ; train.QuedanMasInstancias(maxUncoveredInstances)&&train.NumClassesNotRemoved()>1 ; classChosen=train.ClasePredominante()){ bestRule=GetRule(classChosen); //get bestRule RemoveUnnecesaryVariables(bestRule,0); ruleSet.add(bestRule); //add bestRule to ruleSet EliminarInstanciasClasificadas(bestRule); //remove matched instances System.out.println("Total de instancias sin clasificar = "+train.noClasificadas()); } //add default rule Particle defaultRule=new Particle(numDimensions, train.ClasePredominante()); defaultRule.setAsDefaultRule(); ruleSet.add(defaultRule); //remove unnecesary rules EliminarReglasInnecesarias(); } //********************************************************************* //***************** PSO algorithm to get a rule *********************** //********************************************************************* private Particle GetRule(int classChosen){ Particle[] P=new Particle[NumParticles]; Particle bestActual=new Particle(numDimensions,classChosen); Particle bestPrevious=new Particle(numDimensions,classChosen); boolean mejoraItActual; //inicializo las posiciones y velocidades aleatoriamente for(int i=0 ; i<NumParticles ; ++i){ P[i]=new Particle(numDimensions,classChosen); P[i].randomInitialization(); } int ItActOpt=0; int iter=0; do{ mejoraItActual=false; bestActual.bestEvaluation=-1; //the first particle will be the best of the swarm at start //1) evaluar el fitness de cada particula for(int i=0 ; i<NumParticles ; ++i){ //1) evaluar P P[i].lastEvaluation=P[i].evaluation(); //2) actualizar Bp if(P[i].lastEvaluation>P[i].bestEvaluation) P[i].setB(P[i].X,P[i].lastEvaluation); //3) actualizar Bg if(P[i].isBetter(bestActual)) bestActual=P[i].cloneParticle(); } if(bestActual.isBetter(bestPrevious)) mejoraItActual=true; //2) mover cada particula a su siguiente posicion for(int i=0 ; i<NumParticles ; ++i){ P[i].updateV(bestActual); P[i].updateX(); } //ver si en esta iteracion se mejoro el global if(mejoraItActual){ ItActOpt=0; } else{ ItActOpt++; } iter++; bestPrevious=bestActual.cloneParticle(); }while(ItActOpt<ConvergencePlatformWidth && !ParticulasCercanas(P,bestActual)); Particle bestParticle=bestActual.cloneParticle(); bestParticle.lastEvaluation=bestParticle.bestEvaluation; bestParticle.setX(bestParticle.B); //set attribute presence bestParticle.fixAttributePresence(); return bestParticle; } //********************************************************************* //***************** Distance between particles ************************ //********************************************************************* private Boolean ParticulasCercanas(Particle[] P, Particle G){ for(int i=0 ; i<NumParticles ; ++i) if(EuclideanDistance(P[i],G)>(ConvergenceRadius*indifferenceThreshold)) return false; return true; } private double EuclideanDistance(Particle p1, Particle p2){ double distance=0; //distance for centers for(int d=0 ; d<numCenters ; ++d) distance+=Math.pow(p1.B[d]-p2.B[d], 2); //distance for radius for(int d=numCenters ; d<numDimensions ; ++d) if(train.getTipo(d-(NumAttributes))!=myDataset.NOMINAL) distance+=Math.pow(p1.B[d]-p2.B[d], 2); distance=Math.sqrt(distance); //count real number of dimensions int cont=0; for(int d=0 ; d<NumAttributes ; ++d) if(train.getTipo(d)!=myDataset.NOMINAL) cont+=2; else cont++; distance=distance/(Math.sqrt(cont)); return distance; } //********************************************************************* //***************** Remove unnecesary attributes of a rule ************ //********************************************************************* private double RemoveUnnecesaryVariables(Particle rule, int pos){ double bestEvaluation=rule.evaluation(); double newEvaluation; for(int i=pos ; i<NumAttributes ; ++i){ if(rule.GetAttributePresence(i)){ rule.SetAttributePresence(i, false); newEvaluation=RemoveUnnecesaryVariables(rule, i+1); if(newEvaluation>=bestEvaluation) bestEvaluation=newEvaluation; else rule.SetAttributePresence(i, true); } } return bestEvaluation; } //********************************************************************* //***************** Remove unnecesary rules *************************** //********************************************************************* private void EliminarReglasInnecesarias(){ Boolean continuar=true; //elimino reglas que son mas amplias que otras for(int i=ruleSet.size()-2 ; i>=0 ; --i){ continuar=true; for(int j=i-1 ; j>=0&&continuar ; --j){ if(IsSubSet(ruleSet.get(j),ruleSet.get(i))){//veo si alguna j, es menor que i (j incluido en i) ruleSet.remove(i); continuar=false; } } } //elimino reglas que predicen misma clase que la regla por defecto y estan justo antes que ella continuar=true; for(int i=ruleSet.size()-2 ; i>=0&&continuar ; --i){ if(ruleSet.get(i).clase==ruleSet.get(ruleSet.size()-1).clase) ruleSet.remove(i); else//en el momento que no elimine una, salgo del bucle continuar=false; } //elimino si hay reglas despues de una regla por defecto continuar=false; for(int i=0 ; i<ruleSet.size() ; ++i){ if(continuar) ruleSet.remove(i--); if(!continuar && ruleSet.get(i).presentAttsBest()==0) continuar=true; } } private Boolean IsSubSet(Particle Rule1, Particle Rule2){ //para los atributos presentes for(int d=0 ; d<NumAttributes ; ++d){ if(Rule1.GetAttributePresence(d)){ //ATRIBUTOS NUMERICOS if(train.getTipo(d)!=myDataset.NOMINAL){ if( !(Rule2.GetAttributePresence(d)) || !((Rule1.X[d]-Rule1.X[NumAttributes+d])<=(Rule2.X[d]-Rule2.X[NumAttributes+d])) || !((Rule1.X[d]+Rule1.X[NumAttributes+d])>=(Rule2.X[d]+Rule2.X[NumAttributes+d]))) return false; } //atributos nominales else{ int rango=(int)train.devuelveRangos()[d][1]+1; int valueR1=(int)((Rule1.B[d]*rango)/indifferenceThreshold); int valueR2=(int)((Rule2.B[d]*rango)/indifferenceThreshold); if( !(Rule2.GetAttributePresence(d)) || !(valueR1==valueR2)) return false; } } } return true; } //********************************************************************* //***************** Remove classified instances *********************** //********************************************************************* public void EliminarInstanciasClasificadas(Particle p){ for(int i=0 ; i<NumInstances ; ++i){ //si satisface la clase y el antecedente se remueve if(p.CoverInstance(train.getExample(i))&&train.getOutputAsInteger(i)==p.clase) train.setRemoved(i,true); } } //********************************************************************* //***************** To do outputs files ******************************* //********************************************************************* public void PrintOutputRules(){ double valor1, valor2; double min[]=train.getemin(); double max[]=train.getemax(); String cad=""; //atributos presentes for(int i=0 ; i<ruleSet.size() ; ++i){ cad+="\n\n\nIF\t"; if(ruleSet.get(i).presentAttsBest()==0) cad+="TRUE\n\tTHEN CLASS = "+train.getOutputValue(ruleSet.get(i).clase); else{ for(int j=0 ; j<NumAttributes ; ++j){ if(ruleSet.get(i).GetAttributePresence(j)){ String nombreAtt=Attributes.getInputAttribute(j).getName(); valor1=(ruleSet.get(i).B[j]-ruleSet.get(i).B[j+numCenters]); valor2=(ruleSet.get(i).B[j]+ruleSet.get(i).B[j+numCenters]); valor1=train.Desnormalizar(valor1, j, indifferenceThreshold); valor2=train.Desnormalizar(valor2, j, indifferenceThreshold); if(valor1<min[j]) valor1=min[j]; if(valor2>max[j]) valor2=max[j]; if(train.getTipo(j)!=myDataset.NOMINAL) cad+="\t"+nombreAtt+"\tin\t["+valor1+" , "+valor2+"]\n\tAND"; else{ int rango=(int)train.devuelveRangos()[j][1]+1; int value=(int)((ruleSet.get(i).B[j]*rango)/indifferenceThreshold); cad+="\t"+nombreAtt+" = "+Attributes.getInputAttribute(j).getNominalValue(value)+"\n\tAND"; } } } cad=cad.substring(0, cad.length()-4); cad+="\tTHEN CLASS = "+train.getOutputValue(ruleSet.get(i).clase); } } Fichero.escribeFichero(outputRules, cad); } /** * It generates the output file from a given dataset and stores it in a file * @param dataset myDataset input dataset * @param filename String the name of the file */ private double doOutput(myDataset dataset, String filename) { double aciertos=0; String output = new String(""); output = dataset.copyHeader(); //we insert the header in the output file //We write the output for each example for (int i = 0; i < dataset.getnData(); i++) { output += dataset.getOutputAsString(i) + " " + this.classificationOutput(dataset.getExample(i)) + "\n"; if(dataset.getOutputAsString(i).equals(this.classificationOutput(dataset.getExample(i)))) aciertos++; } Fichero.escribeFichero(filename, output); return aciertos/dataset.getnData(); } /** * It returns the algorithm classification output given an input example * @param example double[] The input example * @return String the output generated by the algorithm */ private String classificationOutput(double[] example){ String output = ""; for(int i=0 ; i<ruleSet.size() ; ++i){ //veo si coincide el antecedente if(ruleSet.get(i).CoverInstance(example)){//coincide el antecedente, devuelvo la primera clase que lo cumple output=train.getOutputValue(ruleSet.get(i).clase); return output; } } return output; } }