/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * * File: Subpopulation.java * * This class manage the subpopulations of selectors of the CoCoIS model * * @author Written by Joaquin Derrac (University of Granada) 3/3/2010 * @version 1.1 * @since JDK1.5 * */ package keel.Algorithms.Preprocess.Instance_Selection.CoCoIS; import java.util.Arrays; import org.core.Randomize; public class Subpopulation{ private int ID; //identifier of the population private double trainData [][]; //subset of training data assigned to the population private int trainOutput[]; //training data output private int nClasses; //number of classes private int IDs[]; //identifier for each member of the population private int population[][]; //population chromosomes private double fitness[]; //fitness value of the population individuals private int newPopulation[][]; //copy of the population for each new generation private double cache[][]; //distances cache private static int K; // K parameter for K-NN private static int size; //size of the population private static double WError; //Weight for Error in fitness function private static double WReduction; //Weight for Reduction in fitness function private static double WDifference; //Weight for Difference in fitness function private static double Elitism; //Percentage of member affected by elitism private static double PRnn; //Probability of application of RNN mutation private static double PRandom; //Probability of application of random mutation private static double PBit; //Probability of bit change in random mutation private int ISSelection[]; //Current instance selection vector for K-NN private double minDist[]; //auxiliary vector for K-NN classifier private int nearestN[];//auxiliary vector for K-NN classifier private int selectedClasses[];//auxiliary vector for K-NN classifier /** * Sets the K parameter * * @param value Value for the K parameter * */ public static void setK(int value){ K=value; }//end-method /** * Sets the size of the population * * @param value Size of the population * */ public static void setSize(int value){ size=value; }//end-method /** * Sets the WError parameter * * @param value Value for the WError parameter * */ public static void setWError(double value){ WError=value; }//end-method /** * Sets the WReduction parameter * * @param value Value for the WReduction parameter * */ public static void setWReduction(double value){ WReduction=value; }//end-method /** * Sets the WDifference parameter * * @param value Value for the WDifference parameter * */ public static void setWDifference(double value){ WDifference=value; }//end-method /** * Sets the Elitism percentage * * @param value Value for the Elitism percentage * */ public static void setElitism(double value){ Elitism=value; }//end-method /** * Sets the RNN mutation probability * * @param value Value for the RNN mutation probability * */ public static void setPRnn(double value){ PRnn=value; }//end-method /** * Sets the random mutation probability * * @param value Value for the random mutation probability * */ public static void setPRandom(double value){ PRandom=value; }//end-method /** * Sets the bit flip probability * * @param value Value for the bit flip probability * */ public static void setPBit(double value){ PBit=value; }//end-method /** * Builder. Generates a new subpopulation from a subset of the entire training set * * @param id Identifier of the population * @param train Subset of training data * @param out Output attribute of the subset of training data */ public Subpopulation(int id, double train[][],int out[]){ //identify it ID=id; IDs=new int[size]; for(int i=0;i<size;i++){ IDs[i]=i; } //set data trainData=new double [train.length][train[0].length]; trainOutput=new int [out.length]; for(int i=0;i< trainData.length;i++){ for(int j=0;j<trainData[0].length;j++){ trainData[i][j]=train[i][j]; } trainOutput[i]=out[i]; } //Getting the number of different classes nClasses = 0; for (int i=0; i<trainOutput.length; i++){ if (trainOutput[i] > nClasses){ nClasses = trainOutput[i]; } } nClasses++; //create population population= new int [size][trainData.length]; for(int i=0;i<size;i++){ for(int j=0;j<trainData.length;j++){ if(Randomize.Rand()<0.5){ population[i][j]=1; } else{ population[i][j]=0; } } } fitness= new double [size]; Arrays.fill(fitness, -1.0); //initialize cache of distances generateCache(); //initialize structures ISSelection=new int [trainData.length]; selectedClasses= new int[nClasses]; nearestN = new int[K]; minDist = new double[K]; }//end-method /** * Performs a new generation of the subpopulation * */ public void doGeneration(){ int notSave; int father, mother; //evaluate population for(int i=0;i<size;i++){ fitness[i]=evaluateFitness(i); } sortPopulation(); //apply elitism notSave=size-((int)((double)size*Elitism)); newPopulation=new int [size][trainData.length]; //generate new population for(int i=0;i<notSave;i+=2){ //selection of parents father = Randomize.RandintClosed(0, size-notSave-1); do { mother = Randomize.RandintClosed(0, size-notSave-1); } while (mother == father); //crossover HUX(father,mother,i); } //merge new population int basis=size-notSave; for(int i=0;i<notSave;i++){ for(int j=0; j<population[i].length;j++){ population[basis+i][j]=newPopulation[i][j]; } fitness[basis+i]=-1.0; CoCoIS.RequestReevaluation(ID, getKey(basis+i)); } //apply random mutation for(int i=0; i<size;i++){ if(Randomize.Rand()<PRandom){ for(int j=0; j<population[i].length;j++){ if(Randomize.Rand()<PBit){ population[i][j]=(population[i][j]+1)%2; } } fitness[i]=-1.0; CoCoIS.RequestReevaluation(ID, getKey(i)); } } //apply Rnn mutation for(int i=0; i<size;i++){ if(Randomize.Rand()<PRnn){ rnnMutation(i); fitness[i]=-1.0; CoCoIS.RequestReevaluation(ID, getKey(i)); } } }//end-method /** * Sorts population by descending fitness value * */ private void sortPopulation(){ for(int i=0;i<population.length;i++){ for(int j=i+1;j<population.length;j++){ if(fitness[j]>fitness[i]){ swapSelectors(i,j); } } } }//end-method /** * Swaps two selectors of the subpopulation * * @param a First selector * @param B Second selector */ private void swapSelectors(int a,int b){ double aux; int auxN; //swap data for(int i=0;i<population[0].length;i++){ auxN=population[a][i]; population[a][i]=population[b][i]; population[b][i]=auxN; } //swap fitness aux=fitness[a]; fitness[a]=fitness[b]; fitness[b]=aux; //swap IDs auxN=IDs[a]; IDs[a]=IDs[b]; IDs[b]=auxN; }//end-method /** * Crosses two selectors of the subpopulation and generates two new childs * * @param a First selector * @param b Second selector * @param newIndex Base index for the offspring */ public void HUX(int a, int b,int newIndex){ int index=0; int aux; int diff []=new int [population[a].length]; //copy parents for(int i=0;i<population[a].length;i++){ newPopulation[newIndex][i]=population[a][i]; newPopulation[newIndex+1][i]=population[b][i]; } //mark non matching alleles for(int i=0;i<population[a].length;i++){ if(population[a][i]!=population[b][i]){ diff[index]=i; index++; } } //shuffle differences shuffleDiff(diff,index); index=index/2; //Exchange half of the differences randomly for(int i=0;i<index;i++){ aux=newPopulation[newIndex][diff[i]]; newPopulation[newIndex][diff[i]]=newPopulation[newIndex+1][diff[i]]; newPopulation[newIndex+1][diff[i]]=aux; } }//end-method /** * Shuffles a vector of differences * * @param diff Vector of differences * @param index Final position of the vector */ private void shuffleDiff(int diff [], int index){ int pos,tmp; for (int i=0; i<index; i++) { pos = Randomize.Randint (0, index); tmp = diff[i]; diff[i] = diff[pos]; diff[pos] = tmp; } }//end-method /** * Performs a Rnn mutation on the selected chromosome * * @param individual Chromosome selected */ private void rnnMutation(int individual){ int initialAcc, actualAcc; //compute initial accuracy initialAcc=computeHits(individual); for(int i=0;i<trainData.length;i++){ //test instance if(population[individual][i]==1){ //remove instance population[individual][i]=0; //compute accuracy actualAcc=computeHits(individual); //decide if removing the instance definitively if(initialAcc>actualAcc){ population[individual][i]=1; } else{ initialAcc=actualAcc; } } } }//end-method /** * Fitness function of the subpopulations * * @param index Individual to evaluate */ private double evaluateFitness(int index){ double fitness; double acc, red, dif; int hits; //compute reduction rate red=computeRed(index); acc=0.0; dif=0.0; //a void chromosome gets a low fitness if(red==1.0){ fitness=WReduction; } else{ //compute difference rate dif=CoCoIS.getContribution(ID,index); //compute accuracy hits=computeHits(index); acc=(double)((double)hits/(double)trainData.length); fitness=(WError*acc)+(WReduction*red)+(WDifference*dif); } return fitness; }//end-method /** * Compute reduction rate of an individual * * @param index Individual to evaluate */ private double computeRed(int index){ double red; int count; //count number of instances selected count=0; for(int i=0;i<population[index].length;i++){ if(population[index][i]==1){ count++; } } red= 1.0-((double)count/(double)population[index].length); return red; }//end-method /** * Compute number of hits in K-NN classification of an individual * * @param individual Individual to evaluate */ private int computeHits(int individual){ int hits; int test; int old; hits=0; //copy member to the K-NN classifier for(int i=0;i<trainData.length;i++){ ISSelection[i]=population[individual][i]; } //perform classification for (int i=0; i<trainData.length; i++) { //leave-one-out old=ISSelection[i]; ISSelection[i]=0; test=knnClassify(i); if(test==trainOutput[i]){ hits++; } ISSelection[i]=old; } return hits; }//end-method /** * K-NN classifier * * @param index Training instance to classify * @return Class predicted */ private int knnClassify(int index){ double dist; int prediction; int predictionValue; boolean stop; Arrays.fill(minDist, Double.MAX_VALUE); //KNN Method starts here for (int i=0; i<trainData.length; i++) { if(ISSelection[i]==1){ dist = distance(index,i); //see if it's nearer than our previous selected neigbours stop=false; for(int j=0;j<K && !stop;j++){ if (dist < minDist[j]) { for (int l = K - 1; l >= j+1; l--) { minDist[l] = minDist[l - 1]; nearestN[l] = nearestN[l - 1]; } minDist[j] = dist; nearestN[j] = i; stop=true; } } } } //we have check all the instances... see what is the most present class if(K==1){ return trainOutput[nearestN[0]]; } Arrays.fill(selectedClasses, 0); for (int i=0; i<K; i++) { selectedClasses[trainOutput[nearestN[i]]]++; } prediction=-1; predictionValue=0; for (int i=0; i<nClasses; i++) { if (predictionValue < selectedClasses[i]) { predictionValue = selectedClasses[i]; prediction = i; } } return prediction; }//end-method /** * Generates a cache of distances to speed up the method * */ private void generateCache(){ cache= new double [trainData.length][trainData.length]; for(int i=0;i<trainData.length;i++){ Arrays.fill(cache[i], -1.0); cache[i][i]=0.0; } }//end-method /** * Distance between two training instances * * @param a First instance * @param b Second instance * @return Euclidean distance */ private double distance(int a, int b){ double dist; //use cache if(cache[a][b]!=-1.0){ dist=cache[a][b]; } else{ //compute distance and store it in the cache dist= euclideanDistance(a,b); cache[a][b]=dist; cache[b][a]=dist; } return dist; }//end-method /** * Euclidean distance between two training instances * * @param a First instance * @param b Second instance * @return Euclidean distance */ private double euclideanDistance(int a, int b){ double dist=0.0; double aux; for(int i=0;i<trainData[0].length;i++){ aux=trainData[a][i]-trainData[b][i]; aux=aux*aux; dist+=aux; } //sqrt avoided to speed up the algorithm return dist; }//end-method /** * Returns the body of an individual, given its ID * * @param key ID of the individual * @return body of the individual */ public int [] getBody(int key){ int body []; int index; //search real index index=searchKey(key); body=new int[trainData.length]; for(int i=0;i<trainData.length;i++){ body[i]=population[index][i]; } return body; }//end-method /** * Search the real index of an individual, given its ID * * @param value ID of the individual * @return index of the individual */ private int searchKey(int value){ boolean found=false; int index=0; for(int i=0; i<size && !found;i++){ if(IDs[i]==value){ found=true; index=i; } } return index; }//end-method /** * Returns the ID assigned to an individual * @param value Index of the individual * @return ID */ private int getKey(int value){ return IDs[value]; }//end-method /** * Performs a roulette selection process * @return Individual selected */ public int rouletteSelection(){ int selected; double uniform; double sum[]; if(fitness[0]==-1.0){ return Randomize.RandintClosed(0, size-1); } sum=new double[size]; sum[0]=fitness[0]; for(int i=1;i<size;i++){ sum[i]=sum[i-1]+fitness[i]; } uniform = Randomize.Randdouble(0.0, sum[size-1]); selected = 0; while (uniform > sum[selected]){ selected++; } //selected is the method. We must return its ID return getKey(selected); } /** * Prints the population * @return String with the contents of the population */ public String print(){ String text=""; for(int i=0;i<size;i++){ for(int j=0;j<trainData.length;j++){ text+=population[i][j]; } text+="\n"; } return text; }//end-method /** * Prints an individual of the population * * @param val index of the individual * @return String with the contents of the individual */ public String printIndividual(int val){ String text=""; for(int j=0;j<trainData.length;j++){ text+=population[val][j]; } text+="\n"; return text; }//end-method }//end-class