/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: CamNN.java
*
* The Cam NN Algorithm.
* It makes use of Cam distance to improve the KNN classification.
*
* @author Written by Joaqu�n Derrac (University of Granada) 13/11/2008
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.Lazy_Learning.CamNN;
import keel.Algorithms.Lazy_Learning.LazyAlgorithm;
import org.core.*;
import java.util.StringTokenizer;
public class CamNN extends LazyAlgorithm{
//Parameters
int K;
//Adictional structures
double V[][];
double W[][];
double G[];
double L;
double c1;
double c2;
double A[];
double B[];
double TAU[][];
int N;
double gammaValue;
double gammaNextValue;
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public CamNN (String script) {
readDataFiles(script);
//Naming the algorithm
name="Cam NN";
//Inicialization of auxiliar structures
V= new double[inputAtt][K];
W= new double[inputAtt][K];
G= new double[inputAtt];
A= new double[trainData.length];
B= new double[trainData.length];
TAU= new double[trainData.length][inputAtt];
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* Reads configuration script, to extract the parameter's values.
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Files.readFile (script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//Getting the number of neighbors
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
K = Integer.parseInt(tokens.nextToken().substring(1));
}//end-method
/**
* Calculates A, B and TAU Values for each training instance.
*
*/
public void precalculateParameters(){
double minDist[];
int nearestN[];
double dist;
boolean stop;
double module;
int dimension;
if(inputAtt>16){
dimension=16;
}else{
dimension=inputAtt;
}
//Calculate gamma values
if(dimension%2==0){
N=dimension/2;
gammaValue=fact(N-1);
gammaNextValue=Math.sqrt(Math.PI)/Math.pow(2.0,(double)N);
gammaNextValue*=doubleFact((2*N)-1);
}
else{
N=dimension/2;
gammaValue=Math.sqrt(Math.PI)/Math.pow(2.0,(double)N);
gammaValue*=doubleFact((2*N)-1);
gammaNextValue=fact(N+1);
}
nearestN = new int[K];
minDist = new double[K];
//We need to find K Nearest Neighbors to estimate V and W
for(int instance=0;instance<trainData.length;instance++){
//KNN Method starts here
for (int i=0; i<K; i++) {
nearestN[i] = -1;
minDist[i] = Double.MAX_VALUE;
}
for (int i=0; i<trainData.length; i++) {
dist = euclideanDistance(trainData[instance],trainData[i]);
if (dist > 0.0){ //leave-one-out
//see if it's nearer than our previous selected neigbours
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//neigbours have been found
//calc V vector
for(int i=0;i<inputAtt;i++){
for(int j=0;j<K;j++){
V[i][j]=trainData[nearestN[j]][i]-trainData[instance][i];
}
}
//calc W vector
for(int i=0;i<inputAtt;i++){
for(int j=0;j<K;j++){
if(trainOutput[instance]==trainOutput[nearestN[j]]){
W[i][j]=V[i][j];
}
else{
W[i][j]=V[i][j]*(-0.5);
}
}
}
//calc G vector
for(int i=0;i<inputAtt;i++){
G[i]=0.0;
for(int j=0;j<K;j++){
G[i]+=W[i][j];
}
G[i]/=K;
}
//calc L value
L=0.0;
for(int j=0;j<K;j++){
module=0.0;
for(int i=0;i<inputAtt;i++){
module+=(W[i][j]*W[i][j]);
}
L+=Math.sqrt(module);
}
L/=K;
//calc c1 and c2 value
c2=Math.sqrt(2.0)*(gammaNextValue)/gammaValue;
c1=c2/(double)inputAtt;
//calc A and B values
A[instance]=L/c2;
module=0.0;
for(int i=0;i<inputAtt;i++){
module+=(G[i]*G[i]);
}
module=Math.sqrt(module);
B[instance]=module/c1;
//calc TAU vector
for(int i=0;i<inputAtt;i++){
TAU[instance][i]=G[i]/module;
}
}
}//end-method
/**
* Evaluates a instance to predict its class.
*
* @param example Instance evaluated
* @return Class predicted
*
*/
protected int evaluate (double example[]) {
double minDist[];
int nearestN[];
int selectedClasses[];
double dist;
int prediction;
int predictionValue;
boolean stop;
nearestN = new int[K];
minDist = new double[K];
for (int i=0; i<K; i++) {
nearestN[i] = -1;
minDist[i] = Double.MAX_VALUE;
}
//KNN Method starts here
for (int i=0; i<trainData.length; i++) {
dist = camDistance(example,i);
if (dist > 0.0){ //leave-one-out
//see if it's nearer than our previous selected neigbours
stop=false;
for(int j=0;j<K && !stop;j++){
if (dist < minDist[j]) {
for (int l = K - 1; l >= j+1; l--) {
minDist[l] = minDist[l - 1];
nearestN[l] = nearestN[l - 1];
}
minDist[j] = dist;
nearestN[j] = i;
stop=true;
}
}
}
}
//we have check all the instances... see what is the most present class
selectedClasses= new int[nClasses];
for (int i=0; i<nClasses; i++) {
selectedClasses[i] = 0;
}
for (int i=0; i<K; i++) {
selectedClasses[trainOutput[nearestN[i]]]+=1;
}
prediction=0;
predictionValue=selectedClasses[0];
for (int i=1; i<nClasses; i++) {
if (predictionValue < selectedClasses[i]) {
predictionValue = selectedClasses[i];
prediction = i;
}
}
return prediction;
}//end-method
/**
* Calculates Cam Weigthed distance
*
* @param example New instance
* @param instance Index of train instance
* @return Distance calculated
*
*/
private double camDistance(double example[],int instance){
double length=0.0;
double factor;
double cosine;
double angleSum;
//calculate their euclidean distance
length=euclideanDistance(example,trainData[instance]);
//calculate the Cam Weight
angleSum=0.0;
for (int i=0; i<example.length; i++) {
angleSum += (example[i]-trainData[instance][i])*TAU[instance][i];
}
cosine= angleSum/length;
factor=A[instance];
factor+=B[instance]*cosine;
//apply the Cam Weight
length/=factor;
return length;
} //end-method
/**
* Factorial function
*
* @param n Number to get its factorial
* @return Value of factorial
*
*/
private double fact(int n){
int value;
if(n<2){
return 1.0;
}
else{
value=1;
for(int i=2;i<=n;i++){
value*=i;
}
return (double)value;
}
}
/**
* Double factorial function ( n*(n-2)*(n-4)* .... )
*
* @param n Number to get its double factorial
* @return Value of double factorial
*
*/
private double doubleFact(int n){
int value;
if(n<2){
return 1.0;
}
else{
value=1;
for(int i=n;i>1;i-=2){
value*=i;
}
return (double)value;
}
} //end-method
} //end-class