/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 J. Alcal�-Fdez (jalcala@decsai.ugr.es) A. Fern�ndez (alberto.fernandez@ujaen.es) S. Garc�a (sglopez@ujaen.es) F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * * File: CIW_NN.java * * The CIW-NN Algorithm. * It makes use of three different preprocessing techniques in order to * improve the KNN classification. Instance Selection, feature weighting * and instance weighting are considered whitin the evolutionary framework * * @author Written by Joaqu�n Derrac (University of Granada) 13/1/2010 * @version 1.0 * @since JDK1.5 * */ package keel.Algorithms.Coevolution.CIW_NN; import java.util.Arrays; import java.util.StringTokenizer; import keel.Algorithms.Coevolution.CoevolutionAlgorithm; import keel.Dataset.Attribute; import org.core.Files; import org.core.Randomize; public class CIW_NN extends CoevolutionAlgorithm{ private int sizePop; private ChromosomeIS ISPopulation[]; private ChromosomeFW FWPopulation[]; private ChromosomeIW IWPopulation[]; private int MAX_EVALUATIONS; private int evaluations; private int K; double fitA[]; double fitB[]; double fitC[]; int evs[]; int counter; private ChromosomeIS bestIS; private ChromosomeFW bestFW; private ChromosomeIW bestIW; private double bestFitnessIS; private double bestFitnessFW; private double bestFitnessIW; private double alpha; private double prob0to1; private double prob1; private double mutProb; private int epochFW; private int epochIW; private int trainRealClass[][]; private int trainPrediction[][]; private int testRealClass[][]; private int testPrediction[][]; private int testUnclassified; private int trainUnclassified; private int testConfMatrix[][]; private int trainConfMatrix[][]; /** * The main method of the class * * @param script Name of the configuration script * */ public CIW_NN (String script) { readDataFiles(script); //Naming the algorithm name="CIW_NN"; //Initialization of random generator Randomize.setSeed(seed); evaluations=0; //create populations ChromosomeIS.setSize(trainData.length); ChromosomeFW.setSize(inputAtt); ChromosomeIW.setSize(nClasses); ChromosomeIS.setProb(prob0to1); ChromosomeIS.setprob1(prob1); ISPopulation= new ChromosomeIS [sizePop]; FWPopulation= new ChromosomeFW [sizePop]; IWPopulation= new ChromosomeIW [sizePop]; //initialize populations for(int i=0;i<sizePop;i++){ ISPopulation[i]= new ChromosomeIS(); FWPopulation[i]= new ChromosomeFW(); IWPopulation[i]= new ChromosomeIW(); } //prepare weighted KNN classifier WKNN.setData(trainData); WKNN.setOutput(trainOutput); WKNN.setK(K); WKNN.setNClasses(nClasses); //prepare algorithms BinaryCHC.setThreshold((trainData.length/4)); BinaryCHC.setAlpha(alpha); BinaryCHC.setprob0to1R(prob0to1); RealCHC.setMAX_EVALS(epochFW); RealCHC.setMutation(mutProb); RealIWCHC.setMAX_EVALS(epochIW); RealIWCHC.setMutation(mutProb); //Initialization stuff ends here. So, we can start time-counting setInitialTime(); } //end-method /** * Reads configuration script, to extract the parameter's values. * * @param script Name of the configuration script * */ protected void readParameters (String script) { String file; String line; StringTokenizer fileLines, tokens; file = Files.readFile (script); fileLines = new StringTokenizer (file,"\n\r"); //Discard in/out files definition fileLines.nextToken(); fileLines.nextToken(); fileLines.nextToken(); //Getting the seed line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); seed = Long.parseLong(tokens.nextToken().substring(1)); //Getting the MAX EVALUATIONS parameter line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); K = Integer.parseInt(tokens.nextToken().substring(1)); //Getting the MAX EVALUATIONS parameter line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); MAX_EVALUATIONS = Integer.parseInt(tokens.nextToken().substring(1)); //Getting the sizePop parameter line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); sizePop = Integer.parseInt(tokens.nextToken().substring(1)); //Getting the alpha parameter line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); alpha = Double.parseDouble(tokens.nextToken().substring(1)); //Getting the r parameter line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); prob0to1 = Double.parseDouble(tokens.nextToken().substring(1)); //Getting the prob1 line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); prob1 = Double.parseDouble(tokens.nextToken().substring(1)); //Getting the mutation probability line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); mutProb = Double.parseDouble(tokens.nextToken().substring(1)); //Getting the epoch length (FW) line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); epochFW = Integer.parseInt(tokens.nextToken().substring(1)); //Getting the epoch length (FW) line = fileLines.nextToken(); tokens = new StringTokenizer (line, "="); tokens.nextToken(); epochIW = Integer.parseInt(tokens.nextToken().substring(1)); }//end-method /** * Performs the coevolutionary search */ public void coevolution(){ fitA= new double [10000]; fitB= new double [10000]; fitC= new double [10000]; evs= new int [10000]; counter=0; Arrays.fill(fitA, -1.0); Arrays.fill(fitB, -1.0); Arrays.fill(fitC, -1.0); Arrays.fill(evs, -1); //initial evaluations initialEvaluation(); bestIS=ISPopulation[0].clone(); bestFW=FWPopulation[0].clone(); bestIW=IWPopulation[0].clone(); evaluations=0; bestFitnessIS=Double.MIN_VALUE; bestFitnessFW=Double.MIN_VALUE; bestFitnessIW=Double.MIN_VALUE; while(evaluations<MAX_EVALUATIONS){ //IS generation WKNN.setFeatureWeights(bestFW.getAll()); WKNN.setInstanceWeights(bestIW.getAll()); evaluations+=BinaryCHC.generation(ISPopulation); //FW generation WKNN.setInstances(bestIS.getAll()); WKNN.setInstanceWeights(bestIW.getAll()); evaluations+=RealCHC.generation(FWPopulation); //IW generation WKNN.setInstances(bestIS.getAll()); WKNN.setFeatureWeights(bestFW.getAll()); evaluations+=RealIWCHC.generation(IWPopulation); //Update best individuals if(ISPopulation[0].getFitness()>bestFitnessIS){ bestIS=ISPopulation[0].clone(); bestFitnessIS=ISPopulation[0].getFitness(); System.out.println("-----"+bestFitnessIS); } if(FWPopulation[0].getFitness()>bestFitnessFW){ bestFW=FWPopulation[0].clone(); bestFitnessFW=FWPopulation[0].getFitness(); System.out.println("*****"+bestFitnessFW); } if(IWPopulation[0].getFitness()>bestFitnessIW){ bestIW=IWPopulation[0].clone(); bestFitnessIW=IWPopulation[0].getFitness(); System.out.println(); System.out.println("+++"+bestFitnessIW); } fitA[counter]=ISPopulation[0].getFitness(); fitB[counter]=FWPopulation[0].getFitness(); fitC[counter]=IWPopulation[0].getFitness(); evs[counter]=evaluations; counter++; } System.out.println(bestIS); System.out.println(bestFW); System.out.println(bestIW); } /** * Performs an initial evaluation of the populations */ private void initialEvaluation(){ double acc,red; int fullIS []; double fullFW []; double fullIW []; double fitness; fullIS=new int [trainData.length]; fullFW=new double [inputAtt]; fullIW=new double [nClasses]; Arrays.fill(fullIS, 1); Arrays.fill(fullFW, 1.0); Arrays.fill(fullIW, 0.5); //evaluateISPopulation WKNN.setFeatureWeights(fullFW); WKNN.setInstanceWeights(fullIW); for(int i=0;i<sizePop;i++){ WKNN.setInstances(ISPopulation[i].getAll()); acc=evaluate(); red=ISPopulation[i].computeRed(); fitness=ISFitness(acc,red); ISPopulation[i].setFitness(fitness); } //evaluateFWPopulation WKNN.setInstances(fullIS); WKNN.setInstanceWeights(fullIW); for(int i=0;i<sizePop;i++){ WKNN.setFeatureWeights(FWPopulation[i].getAll()); acc=evaluate(); fitness=FWFitness(acc); FWPopulation[i].setFitness(fitness); } //evaluateIWPopulation WKNN.setInstances(fullIS); WKNN.setFeatureWeights(fullFW); for(int i=0;i<sizePop;i++){ WKNN.setInstanceWeights(IWPopulation[i].getAll()); acc=evaluate(); fitness=IWFitness(acc); IWPopulation[i].setFitness(fitness); } //sort populations Arrays.sort(ISPopulation); Arrays.sort(FWPopulation); Arrays.sort(IWPopulation); } /** * Performs an evaluation * * @return Accuracy obtained */ private double evaluate(){ double value; value=WKNN.accuracy(); evaluations++; return value; } /** * Sets the fitness for a IS individual * * @param acc Accuracy computed * @param red Reduction rate computed * * @return Fitness computed */ private double ISFitness(double acc,double red){ double result; result= (alpha*acc)+((1.0-alpha)*red); return result; } /** * Sets the fitness for a FW individual * * @param acc Accuracy computed * * @return Fitness computed */ private double FWFitness(double acc){ double result; result=acc; return result; } /** * Sets the fitness for a IW individual * * @param acc Accuracy computed * * @return Fitness computed */ private double IWFitness(double acc){ double result; result=acc; return result; } /** * Classifies the training set * * @return Output computed */ public int [] classifyTraining(){ int result []= new int [trainData.length]; WKNN.setInstances(bestIS.getAll()); WKNN.setFeatureWeights(bestFW.getAll()); WKNN.setInstanceWeights(bestIW.getAll()); for(int i=0;i<trainData.length;i++){ result[i]=WKNN.classifyTrainInstance(i); } return result; } /** * Classifies the test set * * @return Output computed */ public int [] classifyTestSet(){ int result []= new int [testData.length]; WKNN.setInstances(bestIS.getAll()); WKNN.setFeatureWeights(bestFW.getAll()); WKNN.setInstanceWeights(bestIW.getAll()); for(int i=0;i<testData.length;i++){ result[i]=WKNN.classifyNewInstance(testData[i]); } return result; } /** * Executes the classification of train dataset * */ public void classifyTrain(){ modelTime=((double)System.currentTimeMillis()-initialTime)/1000.0; System.out.println(name+" "+ relation + " Model " + modelTime + "s"); //Check time setInitialTime(); int [] clasResult; trainRealClass = new int[trainData.length][1]; trainPrediction = new int[trainData.length][1]; clasResult=classifyTraining(); for (int i=0; i<trainRealClass.length; i++) { trainRealClass[i][0]= trainOutput[i]; trainPrediction[i][0]= clasResult[i]; } trainingTime=((double)System.currentTimeMillis()-initialTime)/1000.0; //Writing results writeOutput(outFile[0], trainRealClass, trainPrediction); System.out.println(name+" "+ relation + " Training " + trainingTime + "s"); }//end-method /** * Executes the classification of test dataset * */ public void classifyTest(){ //Check time setInitialTime(); int [] clasResult; testRealClass = new int[testData.length][1]; testPrediction = new int[testData.length][1]; clasResult=classifyTestSet(); for (int i=0; i<testRealClass.length; i++) { testRealClass[i][0]= testOutput[i]; testPrediction[i][0]= clasResult[i]; } testTime=((double)System.currentTimeMillis()-initialTime)/1000.0; //Writing results writeOutput(outFile[1], testRealClass, testPrediction); System.out.println(name+" "+ relation + " Test " + testTime + "s"); }//end-method /** * Prints the additional output file */ public void printExitValues(){ double redIS; String text=""; computeConfussionMatrixes(); //Accuracy text+="Accuracy: "+getAccuracy()+"\n"; text+="Accuracy (Training): "+getTrainAccuracy()+"\n"; //Kappa text+="Kappa: "+getKappa()+"\n"; text+="Kappa (Training): "+getTrainKappa()+"\n"; //Unclassified text+="Unclassified instances: "+testUnclassified+"\n"; text+="Unclassified instances (Training): "+trainUnclassified+"\n"; //Reduction redIS=bestIS.computeRed(); //Reduction IS text+= "Reduction (IS): " +redIS+ "\n"; //Model time text+= "Model time: "+modelTime+" s\n"; //Training time text+= "Training time: "+trainingTime+" s\n"; //Test time text+= "Test time: "+testTime+" s\n"; //Print final chromosomes text+="Final solution:\n"; text+=bestIS+"\n"; text+=bestFW+"\n"; text+=bestIW+"\n"; text+="\n"; //Confusion matrix text+="Confussion Matrix:\n"; for(int i=0;i<nClasses;i++){ for(int j=0;j<nClasses;j++){ text+=testConfMatrix[i][j]+"\t"; } text+="\n"; } text+="\n"; text+="Training Confussion Matrix:\n"; for(int i=0;i<nClasses;i++){ for(int j=0;j<nClasses;j++){ text+=trainConfMatrix[i][j]+"\t"; } text+="\n"; } text+="\n"; text+="Convergence\n\n"; for(int i=0;i<counter;i++){ text+=evs[i]+" "+fitA[i]+" "+fitB[i]+" "+fitC[i]+"\n"; } //Finish additional output file Files.writeFile (outFile[2], text); }//end-method /** * Computes the confusion matrixes * */ private void computeConfussionMatrixes(){ testConfMatrix= new int [nClasses][nClasses]; trainConfMatrix= new int [nClasses][nClasses]; testUnclassified=0; for(int i=0;i<nClasses;i++){ Arrays.fill(testConfMatrix[i], 0); } for(int i=0;i<testPrediction.length;i++){ if(testPrediction[i][0]==-1){ testUnclassified++; }else{ testConfMatrix[testPrediction[i][0]][testRealClass[i][0]]++; } } trainUnclassified=0; for(int i=0;i<nClasses;i++){ Arrays.fill(trainConfMatrix[i], 0); } for(int i=0;i<trainPrediction.length;i++){ if(trainPrediction[i][0]==-1){ trainUnclassified++; }else{ trainConfMatrix[trainPrediction[i][0]][trainRealClass[i][0]]++; } } }//end-method /** * Computes the accuracy obtained on test set * * @return Accuracy on test set */ private double getAccuracy(){ double acc; int count=0; for(int i=0;i<nClasses;i++){ count+=testConfMatrix[i][i]; } acc=((double)count/(double)test.getNumInstances()); return acc; }//end-method /** * Computes the accuracy obtained on the training set * * @return Accuracy on test set */ private double getTrainAccuracy(){ double acc; int count=0; for(int i=0;i<nClasses;i++){ count+=trainConfMatrix[i][i]; } acc=((double)count/(double)train.getNumInstances()); return acc; }//end-method /** * Computes the Kappa obtained on test set * * @return Kappa on test set */ private double getKappa(){ double kappa; double agreement,expected; int count,count2; double prob1,prob2; count=0; for(int i=0;i<nClasses;i++){ count+=testConfMatrix[i][i]; } agreement=((double)count/(double)test.getNumInstances()); expected=0.0; for(int i=0;i<nClasses;i++){ count=0; count2=0; for(int j=0;j<nClasses;j++){ count+=testConfMatrix[i][j]; count2+=testConfMatrix[j][i]; } prob1=((double)count/(double)test.getNumInstances()); prob2=((double)count2/(double)test.getNumInstances()); expected+=(prob1*prob2); } kappa=(agreement-expected)/(1.0-expected); return kappa; }//end-method /** * Computes the Kappa obtained on test set * * @return Kappa on test set */ private double getTrainKappa(){ double kappa; double agreement,expected; int count,count2; double prob1,prob2; count=0; for(int i=0;i<nClasses;i++){ count+=trainConfMatrix[i][i]; } agreement=((double)count/(double)train.getNumInstances()); expected=0.0; for(int i=0;i<nClasses;i++){ count=0; count2=0; for(int j=0;j<nClasses;j++){ count+=trainConfMatrix[i][j]; count2+=trainConfMatrix[j][i]; } prob1=((double)count/(double)train.getNumInstances()); prob2=((double)count2/(double)train.getNumInstances()); expected+=(prob1*prob2); } kappa=(agreement-expected)/(1.0-expected); return kappa; }//end-method /** * Prints output files. * * @param filename Name of output file * @param realClass Real output of instances * @param prediction Predicted output for instances */ private void writeOutput(String filename, int [][] realClass, int [][] prediction) { String text = ""; /*Printing input attributes*/ text += "@relation "+ relation +"\n"; for (int i=0; i<inputs.length; i++) { text += "@attribute "+ inputs[i].getName()+" "; if (inputs[i].getType() == Attribute.NOMINAL) { text += "{"; for (int j=0; j<inputs[i].getNominalValuesList().size(); j++) { text += (String)inputs[i].getNominalValuesList().elementAt(j); if (j < inputs[i].getNominalValuesList().size() -1) { text += ", "; } } text += "}\n"; } else { if (inputs[i].getType() == Attribute.INTEGER) { text += "integer"; } else { text += "real"; } text += " ["+String.valueOf(inputs[i].getMinAttribute()) + ", " + String.valueOf(inputs[i].getMaxAttribute())+"]\n"; } } /*Printing output attribute*/ text += "@attribute "+ output.getName()+" "; if (output.getType() == Attribute.NOMINAL) { text += "{"; for (int j=0; j<output.getNominalValuesList().size(); j++) { text += (String)output.getNominalValuesList().elementAt(j); if (j < output.getNominalValuesList().size() -1) { text += ", "; } } text += "}\n"; } else { text += "integer ["+String.valueOf(output.getMinAttribute()) + ", " + String.valueOf(output.getMaxAttribute())+"]\n"; } /*Printing data*/ text += "@data\n"; Files.writeFile(filename, text); if (output.getType() == Attribute.INTEGER) { text = ""; for (int i=0; i<realClass.length; i++) { for (int j=0; j<realClass[0].length; j++){ text += "" + realClass[i][j] + " "; } for (int j=0; j<realClass[0].length; j++){ text += "" + prediction[i][j] + " "; } text += "\n"; if((i%10)==9){ Files.addToFile(filename, text); text = ""; } } if((realClass.length%10)!=0){ Files.addToFile(filename, text); } } else{ text = ""; for (int i=0; i<realClass.length; i++) { for (int j=0; j<realClass[0].length; j++){ text += "" + (String)output.getNominalValuesList().elementAt(realClass[i][j]) + " "; } for (int j=0; j<realClass[0].length; j++){ if(prediction[i][j]>-1){ text += "" + (String)output.getNominalValuesList().elementAt(prediction[i][j]) + " "; } else{ text += "" + "Unclassified" + " "; } } text += "\n"; if((i%10)==9){ Files.addToFile(filename, text); text = ""; } } if((realClass.length%10)!=0){ Files.addToFile(filename, text); } } }//end-method } //end-class