/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/***********************************************************************
This file is part of the Fuzzy Instance Based Learning package, a
Java package implementing Fuzzy Nearest Neighbor Classifiers as
complementary material for the paper:
Fuzzy Nearest Neighbor Algorithms: Taxonomy, Experimental analysis and Prospects
Copyright (C) 2012
J. Derrac (jderrac@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
F. Herrera (herrera@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: GAFuzzyKNN.java
*
* The GAFuzzyKNN algorithm.
*
* @author Written by Joaqu�n Derrac (University of Granada) 13/11/2011
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.Fuzzy_Instance_Based_Learning.GAFuzzyKNN;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Arrays;
import java.util.StringTokenizer;
import org.core.Files;
import org.core.Randomize;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.FuzzyIBLAlgorithm;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.ReportTool;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.Timer;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.Util;
public class GAFuzzyKNN extends FuzzyIBLAlgorithm {
private static final double MAX_NORM = 100000000;
private int K; //K value for Fuzzy K-NN
private double M; //M value for Fuzzy K-NN norm
private int kInit; //k value for K-NN in membership assignment
private double membership [][];
private double referenceMembership [][];
private double testMembership [][];
private int population [][];
private int elite [];
private double eliteFitness;
private double fitness [];
private int popSize;
private int maxGenerations;
private int generations;
private double crossProb;
private double mutProb;
private double sumFitness;
private double bestFitness;
private int indexBest;
/**
* Reads the parameters of the algorithm.
*
* @param script Configuration script
*
*/
@Override
protected void readParameters(String script) {
String file;
String line;
String type;
StringTokenizer fileLines, tokens;
file = Files.readFile (script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//Getting the seed
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
seed = Long.parseLong(tokens.nextToken().substring(1));
//Getting the K parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
K = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the popSize parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
popSize = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the maxGenerations parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
maxGenerations = Integer.parseInt(tokens.nextToken().substring(1));
//Getting the crossProb parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
crossProb = Double.parseDouble(tokens.nextToken().substring(1));
//Getting the mutProb parameter
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
mutProb = Double.parseDouble(tokens.nextToken().substring(1));
} //end-method
/**
* Main builder. Initializes the methods' structures
*
* @param script Configuration script
*/
public GAFuzzyKNN(String script){
readDataFiles(script);
//Naming the algorithm
name="FuzzyKNN";
membership = new double [trainData.length][nClasses];
for(int i=0;i<trainData.length;i++){
Arrays.fill(membership[i], -1.0);
}
referenceMembership = new double [referenceData.length][nClasses];
testMembership = new double [testData.length][nClasses];
//Initialization of random generator
Randomize.setSeed(seed);
//Initialization of Reporting tool
ReportTool.setOutputFile(outFile[2]);
} //end-method
/**
* Generates the model of the algorithm
*/
public void generateModel (){
//Start of model time
Timer.resetTime();
evolve();
//End of model time
Timer.setModelTime();
//Showing results
System.out.println(name+" "+ relation + " Model " + Timer.getModelTime() + "s");
}
/**
* Obtains k and m parameters using a binary GA
*/
private void evolve(){
int newPopulation [][];
//16 bits for M, 5 bits for k
population = new int [popSize][21];
elite = new int [21];
fitness = new double [popSize];
//Step 1: Initialization
sumFitness=0.0;
bestFitness=0.0;
indexBest=0;
for(int i=0;i<popSize;i++){
for(int j=0;j<population[0].length;j++){
if(Randomize.Rand()<0.5){
population[i][j]=0;
}
else{
population[i][j]=1;
}
}
evaluate(i);
sumFitness+=fitness[i];
if(fitness[i]>bestFitness){
bestFitness=fitness[i];
indexBest=i;
}
}
for(int j=0;j<population[0].length;j++){
elite[j]=population[indexBest][j];
}
eliteFitness=bestFitness;
generations=0;
int first, second, cross;
double value;
while(generations<maxGenerations){
newPopulation= new int [popSize][21];
for(int i=0;i<popSize;i+=2){
//Step 2: First parent
value=Randomize.Rand()*sumFitness;
for(first=0;(first<popSize)&&(value<fitness[first]);first++);
first--;
//Step 2: Second parent
do{
value=Randomize.Rand()*sumFitness;
for(second=0;(second<popSize)&&(value<fitness[second]);second++);
second--;
}while(first==second);
//Step 3: Crossover
cross=Randomize.RandintClosed(1,population[0].length-2);
for(int j=0;j<cross;j++){
newPopulation[i][j]=population[i][j];
newPopulation[i+1][j]=population[i+1][j];
}
if(Randomize.Rand()<crossProb){
for(int j=cross;j<population[0].length;j++){
newPopulation[i][j]=population[i+1][j];
newPopulation[i+1][j]=population[i][j];
}
}
else{
for(int j=cross;j<population[0].length;j++){
newPopulation[i][j]=population[i][j];
newPopulation[i+1][j]=population[i+1][j];
}
}
//Step 4: Mutation
for(int j=0;j<population[0].length;j++){
if(Randomize.Rand()<mutProb){
newPopulation[i][j]=(newPopulation[i][j]+1)%2;
}
if(Randomize.Rand()<mutProb){
newPopulation[i+1][j]=(newPopulation[i+1][j]+1)%2;
}
}
}
//Step 5: Elitist Strategy
int replace=Randomize.RandintClosed(0, popSize-1);
for(int j=0;j<population[0].length;j++){
newPopulation[replace][j]=elite[j];
}
//Evaluation of new Population
for(int i=0;i<popSize;i++){
for(int j=0;j<population[0].length;j++){
population[i][j]=newPopulation[i][j];
}
}
sumFitness=0.0;
bestFitness=0.0;
indexBest=0;
for(int i=0;i<popSize;i++){
evaluate(i);
sumFitness+=fitness[i];
if(fitness[i]>bestFitness){
bestFitness=fitness[i];
indexBest=i;
}
}
if(bestFitness>eliteFitness){
for(int j=0;j<population[0].length;j++){
elite[j]=population[indexBest][j];
}
eliteFitness=bestFitness;
}
generations+=1;
}
//get M and K values
//get M value
M=1;
double increment=2.0;
for(int i=0;i<16;i++){
if(elite[i]==1){
M+=increment;
}
increment/=2.0;
}
//get K value
kInit=1;
int inc=1;
for(int i=16;i<21;i++){
if(elite[i]==1){
kInit+=inc;
}
inc*=2;
}
assignMembership();
}
private void evaluate(int index){
//get M value
M=1;
double increment=2.0;
for(int i=0;i<16;i++){
if(population[index][i]==1){
M+=increment;
}
increment/=2.0;
}
//get K value
kInit=1;
int inc=1;
for(int i=16;i<21;i++){
if(population[index][i]==1){
kInit+=inc;
}
inc*=2;
}
assignMembership();
double acc;
int hits=0;
for(int i=0;i<trainData.length;i++){
computeTrainMembership(i,referenceData[i]);
if(computeClass(referenceMembership[i])==trainOutput[i]){
hits++;
}
}
acc=(double)hits/(double)trainData.length;
fitness[index]=acc;
referenceMembership = new double [referenceData.length][nClasses];
}
/**
* Assign class membership to each instance of the training set
*/
private void assignMembership(){
for(int instance=0;instance<trainData.length;instance++){
double minDist[];
int nearestN[];
int selectedClasses[];
double dist;
boolean stop;
nearestN = new int[kInit];
minDist = new double[kInit];
for (int i=0; i<kInit; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
dist = Util.euclideanDistance(trainData[i],trainData[instance]);
if (i != instance){ //leave-one-out
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<kInit && !stop;j++){
if (dist < minDist[j]) {
for (int l = kInit - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
selectedClasses= new int[nClasses];
Arrays.fill(selectedClasses, 0);
for (int i=0; i<kInit; i++) {
selectedClasses[trainOutput[nearestN[i]]]++;
}
Arrays.fill(membership[instance], 0.0);
double term;
for (int i=0; i<nClasses; i++) {
term = ((double)selectedClasses[i]/(double)kInit);
if(trainOutput[instance]==i){
membership[instance][i]=0.51+0.49*term;
}else{
membership[instance][i]=0.49*term;
}
}
}
} //end-method
/**
* Classifies the training set (leave-one-out)
*/
public void classifyTrain(){
//Start of training time
Timer.resetTime();
classifyTrainSet();
//End of training time
Timer.setTrainingTime();
//Showing results
System.out.println(name+" "+ relation + " Training " + Timer.getTrainingTime() + "s");
} //end-method
/**
* Classifies the test set
*/
public void classifyTest(){
//Start of training time
Timer.resetTime();
classifyTestSet();
//End of test time
Timer.setTestTime();
//Showing results
System.out.println(name+" "+ relation + " Test " + Timer.getTestTime() + "s");
} //end-method
/**
* Classifies the training set
*/
public void classifyTrainSet(){
for(int i=0;i<trainData.length;i++){
computeTrainMembership(i,referenceData[i]);
trainPrediction[i]=computeClass(referenceMembership[i]);
}
} //end-method
/**
* Classifies the test set
*/
public void classifyTestSet(){
for(int i=0;i<testData.length;i++){
computeTestMembership(i,testData[i]);
testPrediction[i]=computeClass(testMembership[i]);
}
} //end-method
/**
* Computes the class of a instance given its membership array
* @param pertenence Membership array
*
* @return Class assigned (crisp)
*/
private int computeClass(double pertenence[]){
double max = Double.MIN_VALUE;
int output=-1;
for(int i=0; i< pertenence.length;i++){
if(max<pertenence[i]){
max=pertenence[i];
output=i;
}
}
return output;
} //end-method
/**
* Evaluates a instance to predict its class membership
*
* @param index Index of the instance in the test set
* @param example Instance evaluated
*
*/
private void computeTrainMembership(int index, double example[]) {
double minDist[];
int nearestN[];
double dist;
boolean stop;
nearestN = new int[K];
minDist = new double[K];
for (int i=0; i<K; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
if(i!=index){ //leave-one-out
dist = Util.euclideanDistance(trainData[i],example);
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//compute membership
double norm[];
double sum;
norm = new double [K];
sum = 0.0;
for(int i = 0;i<K;i++){
if(minDist[i]==0.0){
norm[i]=MAX_NORM;
}
norm[i] = 1.0/ Math.pow(minDist[i],(2.0/(M-1.0)));
norm[i]=Math.min(norm[i],MAX_NORM);
sum+=norm[i];
}
for(int i = 0;i<K;i++){
for(int c=0;c<nClasses;c++){
referenceMembership [index][c]+= membership[nearestN[i]][c]*(norm[i]/sum);
}
}
} //end-method
/**
* Evaluates a instance to predict its class membership
*
* @param index Index of the instance in the test set
* @param example Instance evaluated
*
*/
private void computeTestMembership(int index, double example[]) {
double minDist[];
int nearestN[];
double dist;
boolean stop;
nearestN = new int[K];
minDist = new double[K];
for (int i=0; i<K; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
dist = Util.euclideanDistance(trainData[i],example);
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
//compute membership
double norm[];
double sum;
norm = new double [K];
sum = 0.0;
for(int i = 0;i<K;i++){
if(minDist[i]==0.0){
norm[i]=MAX_NORM;
}
norm[i] = 1.0/ Math.pow(minDist[i],(2.0/(M-1.0)));
norm[i]=Math.min(norm[i],MAX_NORM);
sum+=norm[i];
}
for(int i = 0;i<K;i++){
for(int c=0;c<nClasses;c++){
testMembership [index][c]+= membership[nearestN[i]][c]*(norm[i]/sum);
}
}
} //end-method
/**
* Reports the results obtained
*/
public void printReport(){
writeOutput(outFile[0], trainOutput, trainPrediction);
writeOutput(outFile[1], testOutput, testPrediction);
ReportTool.setResults(trainOutput,trainPrediction,testOutput,testPrediction,nClasses);
ReportTool.printReport();
String text="\n";
text+= "K value = "+kInit;
text+= "M value = "+M;
ReportTool.addToReport(text);
} //end-method
} //end-class