/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
S. Garc�a (sglopez@ujaen.es)
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: KNNClassifier.java
*
* A KNN classifier with the capabilities of selecting instances and features.
* For efficiency, employs unsquared euclidean distance.
*
* @author Written by Joaqu�n Derrac (University of Granada) 20/04/2010
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.RST_Learning;
import java.util.Arrays;
public class KNNClassifier{
private static int K;
private static double data[][];
private static int output[];
private static int instances;
private static int features;
private static int nClasses;
private static int IS[];
private static int FS[];
private static int nearestN [];
private static double minDist [];
/**
* Sets the number of classes in the data
*
* @param value Number of classes
*/
public static void setClasses(int value){
nClasses=value;
}//end-method
/**
* Sets the K value
*
* @param value K value
*/
public static void setK(int value){
K=value;
nearestN = new int[K];
minDist = new double[K];
}//end-method
/**
* Loads the training data into the classifier
*
* @param newData Data represented with continuous values
*/
public static void setData(double newData[][]){
instances = newData.length;
features = newData[0].length;
data = new double [instances][features];
for(int i=0;i<instances;i++){
for(int j=0;j<features;j++){
data[i][j]=newData[i][j];
}
}
IS = new int [instances];
FS = new int [features];
Arrays.fill(IS, 1);
Arrays.fill(FS, 1);
}//end-method
/**
* Loads the training output into the classifier
*
* @param newOutput Output attribute of the training data
*/
public static void setOutput(int newOutput[]){
output=new int [data.length];
System.arraycopy(newOutput,0,output, 0, data.length);
}//end-method
/**
* Sets the vector of instances selected
*
* @param selected Vector of instances selected
*/
public static void setInstances(int selected []){
for(int i=0; i< instances;i++){
IS[i]=selected[i];
}
}//end-method
/**
* Sets the vector of features selected
*
* @param selected Vector of features selected
*/
public static void setFeatures(int selected []){
for(int i=0; i< features ;i++){
FS[i]=selected[i];
}
}//end-method
public static void setAllInstances(){
Arrays.fill(IS,1);
}//end-method
public static void setAllFeatures(){
Arrays.fill(FS,1);
}//end-method
/**
* Estimates the LVO (Leave-one-out) accuracy of the classifier
* over the training data.
*
* @return Accuracy estimated
*/
public static double accuracy(){
int hits;
int test;
double acc;
hits=0;
for (int i=0; i<data.length; i++) {
test=classifyTrainingInstance(i);
if(test==output[i]){
hits++;
}
}
acc=(double)((double)hits/(double)data.length);
return acc;
}//end-method
/**
* Classifies a new example
*
* @param example Example to classify
* @return Class of the example
*/
public static int classifyNewInstance(double example[]){
int value;
if(K==1){
value=classifyInstance(example);
}
else{
value=classifyInstanceK(example);
}
return value;
}//end-method
/**
* Classifies an instance by means of a 1-NN classifier
*
* @param example Example to classify
* @return Class of the example
*/
private static int classifyInstance(double example[]){
double dist;
int near=-1;
double minD=Double.MAX_VALUE;
//1-NN Method starts here
for (int i=0; i<data.length; i++) {
if(IS[i]==1){
dist = newEuclideanDistance(example,i);
//see if it's nearer than our previous selected neigbours
if (dist < minD) {
minD = dist;
near = i;
}
}
}
if(near==-1){
return -1;
}
return output[near];
}//end-method
/**
* Classifies an instance by means of a K-NN classifier
*
* @param example Example to classify
* @return Class of the example
*/
private static int classifyInstanceK(double example[]){
int selectedClasses[];
double dist;
int prediction;
int predictionValue;
boolean stop;
Arrays.fill(nearestN, -1);
Arrays.fill(minDist, Double.MAX_VALUE);
//KNN Method starts here
for (int i=0; i<instances; i++) {
if(IS[i]==1){
dist = newEuclideanDistance(example,i);
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
selectedClasses= new int[nClasses];
for (int i=0; i<nClasses; i++) {
selectedClasses[i] = 0;
}
for (int i=0; i<K; i++) {
selectedClasses[output[nearestN[i]]]+=1;
}
prediction=0;
predictionValue=selectedClasses[0];
for (int i=1; i<nClasses; i++) {
if (predictionValue < selectedClasses[i]) {
predictionValue = selectedClasses[i];
prediction = i;
}
}
return prediction;
}//end-method
/**
* Classifies a training example
*
* @param index Training example to classify
* @return Class of the example
*/
public static int classifyTrainingInstance(int index){
int value;
int aux;
//leave-one-out
aux=IS[index];
IS[index]=0;
if(K==1){
value=classifyTrainInstance(index);
}
else{
value=classifyTrainInstanceK(index);
}
IS[index]=aux;
return value;
}//end-method
/**
* Classifies a training instance by means of a 1-NN classifier
*
* @param index Example to classify
* @return Class of the example
*/
private static int classifyTrainInstance(int index){
double dist;
int near=-1;
double minD=Double.MAX_VALUE;
//1-NN Method starts here
for (int i=0; i<data.length; i++) {
if(IS[i]==1){
dist = euclideanDistance(index,i);
//see if it's nearer than our previous selected neigbours
if (dist < minD) {
minD = dist;
near = i;
}
}
}
if(near==-1){
return -1;
}
return output[near];
}//end-method
/**
* Classifies an instance by means of a K-NN classifier
*
* @param instance Example to classify
* @return Class of the example
*/
private static int classifyTrainInstanceK(int index){
int selectedClasses[];
double dist;
int prediction;
int predictionValue;
boolean stop;
Arrays.fill(nearestN, -1);
Arrays.fill(minDist, Double.MAX_VALUE);
//KNN Method starts here
for (int i=0; i<instances; i++) {
if(IS[i]==1){
dist = euclideanDistance(index,i);
//see if it's nearer than our previous selected neighbors
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
selectedClasses= new int[nClasses];
for (int i=0; i<nClasses; i++) {
selectedClasses[i] = 0;
}
for (int i=0; i<K; i++) {
selectedClasses[output[nearestN[i]]]+=1;
}
prediction=0;
predictionValue=selectedClasses[0];
for (int i=1; i<nClasses; i++) {
if (predictionValue < selectedClasses[i]) {
predictionValue = selectedClasses[i];
prediction = i;
}
}
return prediction;
}//end-method
/**
* Euclidean instance between two training instances
*
* @param a First instance
* @param b Second instance
*
* @return Unsquared euclidean distance
*/
private static double euclideanDistance(int a,int b){
double length=0.0;
double value;
for (int i=0; i<data[b].length; i++) {
if(FS[i]==1){
value = data[a][i]-data[b][i];
length += value*value;
}
}
return length;
}//end-method
/**
* Euclidean instance between a training instance and a new example
*
* @param example New example
* @param b Training instance
*
* @return Unsquared euclidean distance
*/
private static double newEuclideanDistance(double example [],int b){
double length=0.0;
double value;
for (int i=0; i<data[b].length; i++) {
if(FS[i]==1){
value = example[i]-data[b][i];
length += value*value;
}
}
return length;
}//end-method
/**
* Computes reduction rates over instances
*
* @return Reduction rate
*/
public static double computeISReduction(){
double count=0;
double result;
for(int i=0;i<instances;i++){
if(IS[i]==1){
count+=1.0;
}
}
result=(double)(count/(double)instances);
result=1.0-result;
return result;
}//end-method
/**
* Computes reduction rates over features
*
* @return Reduction rate
*/
public static double computeFSReduction(){
double count=0;
double result;
for(int i=0;i<features;i++){
if(FS[i]==1){
count+=1.0;
}
}
result=(double)(count/(double)features);
result=1.0-result;
return result;
}//end-method
/**
* Get a vector with the instances currently selected
*
* @return A vector with the instances currently selected
*/
public static int [] getIS(){
return IS;
}//end-method
/**
* Get a vector with the features currently selected
*
* @return A vector with the features currently selected
*/
public static int [] getFS(){
int newFS [];
newFS= new int [FS.length];
for(int i=0;i<FS.length;i++){
newFS[i]=FS[i];
}
return newFS;
}//end-method
public static String printFS(){
String aux="";
for(int i=0;i<FS.length;i++){
aux+=FS[i];
}
return aux;
}//end-method
}//end-class