/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/***********************************************************************
This file is part of the Fuzzy Instance Based Learning package, a
Java package implementing Fuzzy Nearest Neighbor Classifiers as
complementary material for the paper:
Fuzzy Nearest Neighbor Algorithms: Taxonomy, Experimental analysis and Prospects
Copyright (C) 2012
J. Derrac (jderrac@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
F. Herrera (herrera@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: JFKNN.java
*
* The JFKNN algorithm.
*
* @author Written by Joaqu�n Derrac (University of Granada) 13/11/2011
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.Fuzzy_Instance_Based_Learning.JFKNN;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Arrays;
import java.util.StringTokenizer;
import org.core.Files;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.FuzzyIBLAlgorithm;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.ReportTool;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.Timer;
import keel.Algorithms.Fuzzy_Instance_Based_Learning.Util;
public class JFKNN extends FuzzyIBLAlgorithm {
private double distances[][];
private int trainInstances;
private int testInstances;
private int totalInstances;
private int H;
private double bestError;
private double globalError;
private Triplet tripletArray [];
private Triplet bestTriplet;
private Triplet baseTriplet;
private Triplet bestTripletPhase1;
private double phase1Error;
private Triplet bestTripletPhase2;
private double phase2Error;
private double Vbase[][];
/**
* Reads the parameters of the algorithm.
*
* @param script Configuration script
*
*/
@Override
protected void readParameters(String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Files.readFile (script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
} //end-method
/**
* Main builder. Initializes the methods' structures
*
* @param script Configuration script
*/
public JFKNN(String script){
readDataFiles(script);
//Naming the algorithm
name="Jozwik Fuzzy K-NN";
trainInstances=trainData.length;
testInstances=testData.length;
totalInstances=trainInstances+testInstances;
distances= new double [totalInstances][totalInstances];
//Initialization of Reporting tool
ReportTool.setOutputFile(outFile[2]);
} //end-method
/**
* Generates the model of the algorithm
*/
public void generateModel (){
int index1,index2;
//Start of model time
Timer.resetTime();
//compute all possible distances (for optimizing)
for(int i=0;i<totalInstances;i++){
distances[i][i]=0.0;
if(i<trainInstances){
index1=i;
for(int j=i+1;j<totalInstances;j++){
if(j<trainInstances){
index2=j;
distances[i][j]=Util.euclideanDistance(trainData[index1], trainData[index2]);
distances[j][i]=distances[i][j];
}else{
index2=j-trainInstances;
distances[i][j]=Util.euclideanDistance(trainData[index1], testData[index2]);
distances[j][i]=distances[i][j];
}
}
}else{
index1=i-trainInstances;
for(int j=index1+1;j<testInstances;j++){
index2=j;
distances[i][j]=Util.euclideanDistance(testData[index1], testData[index2]);
distances[j][i]=distances[i][j];
}
}
}
bestTriplet=new Triplet(trainInstances,nClasses);
bestTriplet.error=1.0;
bestTriplet.k=0;
for(int i=0;i<trainInstances;i++){
bestTriplet.w[i][trainOutput[i]]=1.0;
}
baseTriplet= new Triplet(bestTriplet);
tripletArray= new Triplet[trainInstances-1];
//First Stage
bestError=1.0;
H=0;
double partialBestError=1.0;;
int partialIndex=-1;
do{
globalError=bestError;
H++;
for(int i=0;i<trainInstances-1;i++){
tripletArray[i]=new Triplet(baseTriplet.w);
tripletArray[i].k=i+1;
}
for(int i=0;i<tripletArray.length;i++){
leaveOneOutTriplet(tripletArray[i]);
if(partialBestError>tripletArray[i].error){
partialBestError=tripletArray[i].error;
partialIndex=i;
}
}
if(bestError>partialBestError){
bestError=partialBestError;
generateNewTriplets(tripletArray[partialIndex]);
}
System.out.println("Iteration number "+H+" Current Error: "+globalError+" Best Error: "+bestError);
}while(globalError>bestError);
phase1Error=globalError;
//end-first-stage
//obtaining of the V array
Vbase = new double [testInstances][nClasses];
VClassification(bestTriplet.w,bestTriplet.k);
//generating new base and best triplets
bestTripletPhase1=new Triplet(bestTriplet);
bestTriplet=new Triplet(totalInstances,nClasses);
bestTriplet.error=1.0;
bestTriplet.k=0;
for(int i=0;i<trainInstances;i++){
bestTriplet.w[i][trainOutput[i]]=1.0;
}
for(int i=0;i<testInstances;i++){
for(int j=0;j<nClasses;j++){
bestTriplet.w[i+trainInstances][j]=Vbase[i][j];
}
}
baseTriplet= new Triplet(bestTriplet);
tripletArray= new Triplet[totalInstances-1];
//Second Stage
bestError=1.0;
H=0;
partialBestError=1.0;;
partialIndex=-1;
do{
globalError=bestError;
H++;
for(int i=0;i<totalInstances-1;i++){
tripletArray[i]=new Triplet(baseTriplet.w);
tripletArray[i].k=i+1;
}
for(int i=0;i<tripletArray.length;i++){
leaveOneOutTripletSecond(tripletArray[i]);
if(partialBestError>tripletArray[i].error){
partialBestError=tripletArray[i].error;
partialIndex=i;
}
}
if(bestError>partialBestError){
bestError=partialBestError;
generateNewTripletsSecond(tripletArray[partialIndex]);
}
System.out.println("Iteration number "+H+" Current Error: "+globalError+" Best Error: "+bestError);
}while(globalError>bestError);
phase2Error=globalError;
bestTripletPhase2=new Triplet(bestTriplet);
//End of model time
Timer.setModelTime();
//Showing results
System.out.println(name+" "+ relation + " Model " + Timer.getModelTime() + "s");
} //end-method
/**
* Performs the preliminary classification of the test set using the
* instances obtained at the first stage
*
* @param memberships matrix of membership of the training set
* @param k best k value found
*/
private void VClassification(double memberships [][],int k){
for(int instance=0;instance<testInstances;instance++){
double minDist[];
int nearestN[];
double dist;
boolean stop;
nearestN = new int[k];
minDist = new double[k];
for (int i=0; i<k; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
dist = distances[i][trainInstances+instance];
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<k && !stop;j++){
if (dist < minDist[j]) {
for (int l = k - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
Arrays.fill(Vbase[instance],0.0);
for(int i=0;i<nearestN.length;i++){
for(int j=0;j<nClasses;j++){
Vbase[instance][j]+=memberships[nearestN[i]][j];
}
}
for(int j=0;j<nClasses;j++){
Vbase[instance][j]/=k;
}
}
}//end-method
/**
* Generates a new sequence of triplets for a new step of the iterative process
*
* @param best current best triplet found
*/
private void generateNewTriplets(Triplet best){
bestTriplet=new Triplet(best);
for(int i=0;i<trainInstances;i++){
for(int j=0;j<nClasses;j++){
baseTriplet.w[i][j]=((best.w[i][j]*(double)best.k)+baseTriplet.w[i][j])/((double)best.k+1.0);
}
}
}//end-method
/**
* Generates a new sequence of triplets for a new step of the iterative process
* This time, both training and test instances are considered
*
* @param best current best triplet found
*/
private void generateNewTripletsSecond(Triplet best){
bestTriplet=new Triplet(best);
for(int i=0;i<totalInstances;i++){
for(int j=0;j<nClasses;j++){
baseTriplet.w[i][j]=((best.w[i][j]*(double)best.k)+baseTriplet.w[i][j])/((double)best.k+1.0);
}
}
}//end-method
/**
* Performs a LOO process on a triplet, to estimate its error
*
* @param set triplet to analyze
*/
private void leaveOneOutTriplet(Triplet set){
double selectedClasses[];
double oldW[][];
double term;
double bestMembership;
int trueOutput;
int expectedOutput;
int misses;
misses=0;
selectedClasses= new double[nClasses];
oldW= new double [trainInstances][nClasses];
for(int i=0;i<trainInstances;i++){
System.arraycopy(set.w[i], 0, oldW[i], 0, oldW[i].length);
}
for(int instance=0;instance<trainInstances;instance++){
double minDist[];
int nearestN[];
double dist;
boolean stop;
nearestN = new int[set.k];
minDist = new double[set.k];
for (int i=0; i<set.k; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
dist = distances[i][instance];
if (i != instance){ //leave-one-out
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<set.k && !stop;j++){
if (dist < minDist[j]) {
for (int l = set.k - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
Arrays.fill(selectedClasses, 0.0);
for (int i=0; i<set.k; i++) {
for(int j=0;j<nClasses;j++){
selectedClasses[j]+=oldW[nearestN[i]][j];
}
}
bestMembership=0.0;
expectedOutput=-1;
for (int i=0; i<nClasses; i++) {
term = ((double)selectedClasses[i]/(double)set.k);
set.w[instance][i]=term;
if(term>bestMembership){
bestMembership=term;
expectedOutput=i;
}
}
trueOutput=trainOutput[instance];
if(trueOutput!=expectedOutput){
misses++;
}
}
//compute LOO error
set.error=(double)((double)misses/(double)trainInstances);
}//end-method
/**
* Performs a LOO process on a triplet, to estimate its error
* This time, both training and test instances are considered
*
* @param set triplet to analyze
*/
private void leaveOneOutTripletSecond(Triplet set){
double selectedClasses[];
double oldW[][];
double term;
double bestMembership;
int trueOutput;
int expectedOutput;
int misses;
misses=0;
selectedClasses= new double[nClasses];
oldW= new double [totalInstances][nClasses];
for(int i=0;i<totalInstances;i++){
System.arraycopy(set.w[i], 0, oldW[i], 0, oldW[i].length);
}
for(int instance=0;instance<totalInstances;instance++){
double minDist[];
int nearestN[];
double dist;
boolean stop;
nearestN = new int[set.k];
minDist = new double[set.k];
for (int i=0; i<set.k; i++) {
nearestN[i] = 0;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<totalInstances; i++) {
dist = distances[i][instance];
if (i != instance){ //leave-one-out
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<set.k && !stop;j++){
if (dist < minDist[j]) {
for (int l = set.k - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
Arrays.fill(selectedClasses, 0.0);
for (int i=0; i<set.k; i++) {
for(int j=0;j<nClasses;j++){
selectedClasses[j]+=oldW[nearestN[i]][j];
}
}
bestMembership=0.0;
expectedOutput=-1;
for (int i=0; i<nClasses; i++) {
term = ((double)selectedClasses[i]/(double)set.k);
set.w[instance][i]=term;
if(term>bestMembership){
bestMembership=term;
expectedOutput=i;
}
}
if(instance<trainInstances){
trueOutput=trainOutput[instance];
if(trueOutput!=expectedOutput){
misses++;
}
}
}
//compute LOO error
set.error=(double)((double)misses/(double)trainInstances);
}//end-method
/**
* Predict the class of an instance given its class membership array
*
* @param classArray class membership array
* @return class of the instance
*/
private int predictClass(double classArray[]){
int output =-1;
double membership = 0.0;
for(int j=0;j<nClasses;j++){
if(membership<classArray[j]){
membership=classArray[j];
output=j;
}
}
return output;
}//end-method
/**
* Classifies the training set (leave-one-out)
*/
public void classifyTrain(){
//Start of training time
Timer.resetTime();
classifyTrainSet();
//End of training time
Timer.setTrainingTime();
//Showing results
System.out.println(name+" "+ relation + " Training " + Timer.getTrainingTime() + "s");
} //end-method
/**
* Classifies the test set
*/
public void classifyTest(){
//Start of training time
Timer.resetTime();
classifyTestSet();
//End of test time
Timer.setTestTime();
//Showing results
System.out.println(name+" "+ relation + " Test " + Timer.getTestTime() + "s");
} //end-method
/**
* Classifies the training set
*/
public void classifyTrainSet(){
if(phase1Error<=phase2Error){
for(int i=0;i<trainData.length;i++){
trainPrediction[i]=predictClass(bestTripletPhase1.w[i]);
}
}else{
for(int i=0;i<trainData.length;i++){
trainPrediction[i]=predictClass(bestTripletPhase2.w[i]);
}
}
} //end-method
/**
* Classifies the test set
*/
public void classifyTestSet(){
if(phase1Error<=phase2Error){
for(int i=0;i<testData.length;i++){
testPrediction[i]=predictClass(Vbase[i]);
}
}else{
for(int i=0;i<testData.length;i++){
testPrediction[i]=predictClass(bestTripletPhase2.w[trainInstances+i]);
}
}
} //end-method
/**
* Reports the results obtained
*/
public void printReport(){
writeOutput(outFile[0], trainOutput, trainPrediction);
writeOutput(outFile[1], testOutput, testPrediction);
ReportTool.setResults(trainOutput,trainPrediction,testOutput,testPrediction,nClasses);
ReportTool.printReport();
} //end-method
} //end-class