/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Semi_Supervised_Learning.utilities.KNN;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype;
import keel.Algorithms.Semi_Supervised_Learning.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.*;
import java.util.*;
import keel.Dataset.*;
import org.core.*;
/**
* Implements the KNN algorithm.
* @author diegoj
*/
public class KNN
{
/** Number of neighbor-prototypes to be searched in the KNN. */
protected static int K = 1;
//! Index of the neighbors of an element.
//protected static HashSet<Integer> neighborsIndex = null;
/**
* Sets the number of prototypes to be used in the knn algorithm. Must be integer greater than 0.
* @param n Number of prototypes
*/
public static void setK(int n)
{
K = n;
}
/**
* Returns the current value of K.
* @return Number of neighbors used with each prototype in the KNN.
*/
public static int k()
{
return K;
}
/**
* Returns the current value of K.
* @return Number of neighbors used with each prototype in the KNN.
*/
public static int getK()
{
return k();
}
/**
* Implements the 1NN algorithm
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @return Nearest prototype to current in the prototype set dataset.
*/
public static Prototype _1nn(Prototype current, PrototypeSet dataSet)
{
Prototype nearestNeighbor = dataSet.get(0);
int indexNN = 0;
//double minDist = Distance.dSquared(current, nearestNeighbor);
//double minDist = Distance.euclideanDistance(current, nearestNeighbor);
double minDist =Double.POSITIVE_INFINITY;
double currDist;
int _size = dataSet.size();
// System.out.println("****************");
// current.print();
for (int i=0; i<_size; i++)
{
Prototype pi = dataSet.get(i);
//if(!current.equals(pi))
//{
// double currDist = Distance.dSquared(current, pi);
currDist = Distance.euclideanDistance(pi,current);
// System.out.println(currDist);
if(currDist >0){
if (currDist < minDist)
{
minDist = currDist;
// nearestNeighbor = pi;
indexNN =i;
}
}
//}
}
// System.out.println("Min dist =" + minDist + " Vecino Cercano = "+ indexNN);
return dataSet.get(indexNN);
}
/**
* Implements the KNN algorithm
* @param current Prototype which the algorithm will find its nearest-neighbors.
* @param dataSet Prototype set in which the algorithm will search.
* @param k The size neighborhood to be returned.
* @return Nearest prototypes to current in the prototype set dataset.
*/
public static PrototypeSet knn(Prototype current, PrototypeSet dataSet, int k)
{
PrototypeSet nneighbors = new PrototypeSet(k);
PrototypeSet sorted = dataSet.sort(current);
for(int i=0; i<k; ++i)
nneighbors.add(sorted.get(i));
return nneighbors;
}
/**
* Implements the KNN algorithm. Use static parameter of the class
* @param current Prototype which the algorithm will find its nearest-neighbors.
* @param dataSet Prototype set in which the algorithm will search.
* @return Nearest prototypes to current in the prototype set dataset.
*/
public static PrototypeSet knn(Prototype current, PrototypeSet dataSet)
{
return KNN.knn(current,dataSet,K);
}
/**
* Informs of the number of prototypes with correct class. Uses 1NN to perform nearest prototype.
* @param training Training data prototype set
* @param test Test data prototype set
* @return Number of prototypes well classificated
*/
public static int classficationAccuracy1NN(PrototypeSet training, PrototypeSet test)
{
int wellClassificated = 0;
for(Prototype p : test)
{
Prototype nearestNeighbor = _1nn(p, training);
if(p.getOutput(0) == nearestNeighbor.getOutput(0))
wellClassificated++;
}
return wellClassificated;
}
/**
* Informs of the number of prototypes with correct class. Uses 1NN to perform nearest prototype.
* @param training Training data prototype set
* @param test Test data prototype set
* @return Number of prototypes well classificated
*/
public static Pair<Integer,Integer> classficationAccuracyAndError1NN(PrototypeSet training, PrototypeSet test)
{
int wellClassificated = 0;
int notWellClassif = 0;
for(Prototype p : test)
{
Prototype nearestNeighbor = _1nn(p, training);
++notWellClassif;
if(p.label() == nearestNeighbor.label())
{
++wellClassificated;
--notWellClassif;
}
}
return new Pair<Integer,Integer>(wellClassificated, notWellClassif);
}
/**
* Informs of the classification accuracy. The number of prototypes must be at least 1.
* @param training Training data prototype set
* @param test Test data prototype set
* @param k Number of prototype-neighbors to be compared with each prototype of test data prototypes.
* @return Number of prototypes well classificated
*/
public static int classficationAccuracy(PrototypeSet training, PrototypeSet test, int k)
{
int wellClassificated = 0;
//int index = 0;
for(Prototype p: test)
{
//System.out.println("PROTOTIPO " + (index++) + " DE TEST ");
PrototypeSet neighbors = knn(p, training, k);
//TreeMap<Class, Ocurrences>
//TreeMap<Double, Integer> classes = new TreeMap<Double, Integer>();
HashMap<Double, Integer> classes = new HashMap<Double, Integer>();
double maximumKey = -1;
int maximumTimes = -1;
int i=0;
for(Prototype n: neighbors)
{
//double d = Distance.d(p, n);
double class_n = n.firstOutput();
//System.out.println("Vecino " + i + " a distancia " + d + " clase " + class_n);
i++;
if(!classes.containsKey(class_n))
{
if(maximumKey==-1)//Para el caso de la asignación inicial
maximumKey = class_n;
classes.put(class_n, 1);
}
else
{
int num_n = classes.get(class_n) + 1;
classes.put(class_n, num_n);
if(num_n > maximumTimes)
{
maximumTimes = num_n;
maximumKey = class_n;
//System.out.println("maximumKey: " + maximumKey);
}
//evitamos que se coja siempre el mismo
else if(num_n == maximumTimes)
{
if(RandomGenerator.RanddoubleClosed(0.0, 1.0) > 0.5)
maximumKey = class_n;
}
}
}
/*double m = _1nn(p,training).firstOutput();
if(maximumKey == m)
System.out.println("EXITO");
else
System.out.println("FAIL " + maximumKey + "( Correcta es "+ m +")");*/
//System.out.println(classes.toString());
//System.out.println(maximumKey);
//int chosen_class = classes.get(maximumKey);
if(maximumKey == p.firstOutput())
++wellClassificated;//*/
}
return wellClassificated;
}
/**
* Informs of the classification accuracy
* @param training Training data prototype set
* @param test Test data prototype set
* @return Number of prototypes well classificated
*/
public static int classficationAccuracy(PrototypeSet training, PrototypeSet test)
{
return classficationAccuracy(training, test, K);
}
/**
* Return the nearest prototype to another with the same of different class
* @param current Prototype which the algorithm will find its nearest-neighbors.
* @param dataSet Prototype set in which the algorithm will search.
* @param isSameClass Must return a prototype with the same class as current?
* @return Nearest prototype to current with the same of different class (resp. isSameClass)
*/
public static Prototype getNearest(Prototype current, PrototypeSet dataSet, boolean isSameClass)
{
if(isSameClass)
return getNearestWithSameClassAs(current,dataSet);
return getNearestWithDifferentClassAs(current,dataSet);
}
/**
* Return the nearest prototype to another in a set.
* @param current Prototype which the algorithm will find its nearest-neighbors.
* @param dataSet Prototype set in which the algorithm will search.
* @return Nearest prototype to current in dataSet.
*/
public static Prototype getNearest(Prototype current, PrototypeSet dataSet)
{
return dataSet.nearestTo(current);
}
/**
* Return the nearest prototype to another with the same class.
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @return Nearest prototype to current with the same class.
*/
public static Prototype getNearestWithSameClassAs(Prototype current, PrototypeSet dataSet)
{
double label = current.label();
PrototypeSet dataSetOfLabel = dataSet.getFromClass(label);
if(dataSetOfLabel.isEmpty())
{
Debug.errorln("There are no prototypes of class " + label);
return null;
}
//Debug.errorln("Size of dataSetOfLabel " + dataSetOfLabel.size());
double dMin = Double.POSITIVE_INFINITY;
Prototype nearest = null;
for(Prototype p : dataSetOfLabel)
{
double d = Distance.d(current, p);
if(d < dMin && current != p)
{
dMin = d;
nearest = p;
}
}
return nearest;
}
/**
* Return the nearest prototypes to another with the same class
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @return PrototypeSet containing all the prototypes of the same class in distance increasing order.
*/
public static PrototypeSet getNearestNeighborsWithSameClassAs(Prototype current, PrototypeSet dataSet)
{
PrototypeSet neighborsWithSameClass = new PrototypeSet();
PrototypeSet sorted = dataSet.sort(current);
double class_current = current.label();
for(Prototype p : sorted)
if(p.label() == class_current)
neighborsWithSameClass.add(p);
return neighborsWithSameClass;
}
/**
* Return some nearest prototypes to another with the same class
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @param numberOfNeighbors Number of neighbors to be returned.
* @return PrototypeSet containing all the prototypes of the same class in distance increasing order.
*/
public static PrototypeSet getNearestNeighborsWithSameClassAs(Prototype current, PrototypeSet dataSet, int numberOfNeighbors)
{
PrototypeSet neighborsWithSameClass = new PrototypeSet();
PrototypeSet sorted = dataSet.sort(current);
double class_current = current.label();
int _size = sorted.size();
boolean full = false;
int counter = 0;
for(int i=0; i<_size && !full; i++)
if(sorted.get(i).label() == class_current)
{
neighborsWithSameClass.add(sorted.get(i));
full = (counter == numberOfNeighbors);
++counter;
}
return neighborsWithSameClass;
}
/**
* Return some nearest prototypes to another with different class
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @param numberOfNeighbors Number of neighbors to be returned.
* @return PrototypeSet containing all the prototypes of the same class in distance increasing order.
*/
public static PrototypeSet getNearestNeighborsWithDifferentClassAs(Prototype current, PrototypeSet dataSet, int numberOfNeighbors)
{
PrototypeSet nn = new PrototypeSet();
PrototypeSet sorted = dataSet.sort(current);
double class_current = current.label();
int _size = sorted.size();
boolean full = false;
int counter = 0;
for(int i=0; i<_size && !full; i++)
if(sorted.get(i).label() != class_current)
{
nn.add(sorted.get(i));
full = (counter == numberOfNeighbors);
++counter;
}
return nn;
}
/**
* Return the nearest prototype to another with the same class
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @return Nearest prototype to current with the same class.
*/
public static Prototype getNearestWithDifferentClassAs(Prototype current, PrototypeSet dataSet)
{
double label = current.label();
double dMin = Double.POSITIVE_INFINITY;
Prototype nearest = null;
for(Prototype p : dataSet)
{
double d = Distance.d(current, p);
if(d < dMin && p.label()!=label && current != p && !current.equals(p))
{
dMin = d;
nearest = p;
}
}
return nearest;
}
/**
* Return some nearest prototypes to another with different class
* @param current Prototype which the algorithm will find its nearest-neighbor.
* @param dataSet Prototype set in which the algorithm will search.
* @param numberOfNeighbors Number of neighbors to be returned.
* @return PrototypeSet containing all the prototypes of the same class in distance increasing order.
*/
public static PrototypeSet getNearestNeighbors(Prototype current, PrototypeSet dataSet, int numberOfNeighbors)
{
PrototypeSet nn = new PrototypeSet();
PrototypeSet sorted = dataSet.sort(current);
int _size = sorted.size();
boolean full = false;
int counter = 0;
for(int i=0; i<_size && !full; i++)
{
if(!current.equals(sorted.get(i))){
nn.add(sorted.get(i));
full = (counter == numberOfNeighbors);
++counter;
}
}
return nn;
}
/**
* Informs of the classification done. The number of prototypes must be at least 1.
* @param training Training data prototype set
* @param test Test data prototype set
* @param k Number of prototype-neighbors to be compared with each prototype of test data prototypes.
* @return Number of prototypes well classificated
*/
public static int [] classify(PrototypeSet training, PrototypeSet test, int k, double probabilities[][])
{
int predicted [] = new int[test.size()];
int index = 0;
for(Prototype p: test)
{
PrototypeSet neighbors = knn(p, training, k);
HashMap<Double, Integer> classes = new HashMap<Double, Integer>();
double maximumKey = -1;
int maximumTimes = -1;
int i=0;
Arrays.fill(probabilities[index], 0); // initially 0.
for(Prototype n: neighbors)
{
double class_n = n.firstOutput();
probabilities[index][(int) class_n] += 1./k;
i++;
if(!classes.containsKey(class_n))
{
if(maximumKey==-1){//Para el caso de la asignación inicial
maximumKey = class_n;
predicted[index] = (int)maximumKey;
}
classes.put(class_n, 1);
}
else
{
int num_n = classes.get(class_n) + 1;
classes.put(class_n, num_n);
if(num_n > maximumTimes)
{
maximumTimes = num_n;
maximumKey = class_n;
predicted[index] = (int) maximumKey;
//System.out.println("maximumKey: " + maximumKey);
}
//evitamos que se coja siempre el mismo
else if(num_n == maximumTimes)
{
if(RandomGenerator.RanddoubleClosed(0.0, 1.0) > 0.5){
maximumKey = class_n;
predicted[index] = (int) maximumKey;
}
}
}
}
index++;
}
return predicted;
}
/**
* Informs of the classification done. The number of prototypes must be at least 1.
* @param training Training data prototype set
* @param test Test data prototype set
* @param k Number of prototype-neighbors to be compared with each prototype of test data prototypes.
* @return Number of prototypes well classificated
*/
public static int [] classify2(PrototypeSet training, PrototypeSet test, int k)
{
int predicted [] = new int[test.size()];
int index = 0;
for(Prototype p: test)
{
PrototypeSet neighbors = knn(p, training, k);
HashMap<Double, Integer> classes = new HashMap<Double, Integer>();
double maximumKey = -1;
int maximumTimes = -1;
int i=0;
for(Prototype n: neighbors)
{
double class_n = n.firstOutput();
i++;
if(!classes.containsKey(class_n))
{
if(maximumKey==-1){//Para el caso de la asignación inicial
maximumKey = class_n;
predicted[index] = (int)maximumKey;
}
classes.put(class_n, 1);
}
else
{
int num_n = classes.get(class_n) + 1;
classes.put(class_n, num_n);
if(num_n > maximumTimes)
{
maximumTimes = num_n;
maximumKey = class_n;
predicted[index] = (int) maximumKey;
//System.out.println("maximumKey: " + maximumKey);
}
//evitamos que se coja siempre el mismo
else if(num_n == maximumTimes)
{
if(RandomGenerator.RanddoubleClosed(0.0, 1.0) > 0.5){
maximumKey = class_n;
predicted[index] = (int) maximumKey;
}
}
}
}
index++;
}
return predicted;
}
}//end KNN.java