/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Fuzzy_Rule_Learning.Genetic.ClassifierFuzzySGERD; import java.lang.String; import java.util.StringTokenizer; import keel.Algorithms.Preprocess.Basic.*; import keel.Dataset.*; /** * The KNN algorithm tries to find the K nearest instances in the * training data, selecting the most present class. * * Euclidean (L2), Manhattan (L1) and HVDM distances can be used as * distance function by the classifier. * * @author Written by Salvador Garc�a L�pez (University of Granada) 11/07/2004 * @author Modified by Joaquin Derrac (University of Granada) 3/11/2009 * @version 1.1 * @since JDK1.4 */ public class KNN extends Metodo { /*Paths and names of I/O files*/ private String ficheroReferencia; /*Own parameters of the algorithm*/ private int k; private int k_dif; private boolean distanceEu; /*Data structures*/ protected InstanceSet referencia; /*Data matrix*/ double datosReferencia[][]; int clasesReferencia[]; /*Extra*/ boolean nulosReferencia[][]; int nominalReferencia[][]; double realReferencia[][]; public KNN(InstanceSet dataset, int value_k) { /*Read of the script file*/ configuracion(value_k); /*Read of data files*/ try { training = dataset; /*Normalize and check the data*/ normalizarTrain(); } catch (Exception e) { System.err.println(e); System.exit(1); } try { referencia = dataset; /*Normalize the data*/ normalizarReferencia(); } catch (Exception e) { System.err.println(e); System.exit(1); } } private void normalizarTrain() throws CheckException { int i, j, k; Instance temp; double caja[]; StringTokenizer tokens; boolean nulls[]; /*Check if dataset corresponding with a classification problem*/ if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException("This dataset have an input attribute with float values, so it not corresponding to a classification problem."); } entradas = Attributes.getInputAttributes(); salida = Attributes.getOutputAttribute(0); nEntradas = Attributes.getInputNumAttributes(); tokens = new StringTokenizer(training.getHeader(), " \n\r"); tokens.nextToken(); relation = tokens.nextToken(); datosTrain = new double[training.getNumInstances()][Attributes. getInputNumAttributes()]; clasesTrain = new int[training.getNumInstances()]; caja = new double[1]; nulosTrain = new boolean[training.getNumInstances()][Attributes. getInputNumAttributes()]; nominalTrain = new int[training.getNumInstances()][Attributes. getInputNumAttributes()]; realTrain = new double[training.getNumInstances()][Attributes. getInputNumAttributes()]; for (i = 0; i < training.getNumInstances(); i++) { temp = training.getInstance(i); nulls = temp.getInputMissingValues(); datosTrain[i] = training.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) { if (nulls[j]) { datosTrain[i][j] = 0.0; nulosTrain[i][j] = true; } } caja = training.getInstance(i).getAllOutputValues(); clasesTrain[i] = (int) caja[0]; for (k = 0; k < datosTrain[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { nominalTrain[i][k] = (int) datosTrain[i][k]; datosTrain[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { realTrain[i][k] = datosTrain[i][k]; datosTrain[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosTrain[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } } /*This function builds the data matrix for classification reference and normalizes inputs values*/ private void normalizarReferencia() throws CheckException { int i, j, k; Instance temp; double caja[]; boolean nulls[]; /*Check if dataset corresponding with a classification problem*/ if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException("This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); } datosReferencia = new double[referencia.getNumInstances()][Attributes.getInputNumAttributes()]; clasesReferencia = new int[referencia.getNumInstances()]; caja = new double[1]; nulosReferencia = new boolean[referencia.getNumInstances()][Attributes.getInputNumAttributes()]; nominalReferencia = new int[referencia.getNumInstances()][Attributes.getInputNumAttributes()]; realReferencia = new double[referencia.getNumInstances()][Attributes.getInputNumAttributes()]; /*Get the number of instances that have a null value*/ for (i = 0; i < referencia.getNumInstances(); i++) { temp = referencia.getInstance(i); nulls = temp.getInputMissingValues(); datosReferencia[i] = referencia.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) { if (nulls[j]) { datosReferencia[i][j] = 0.0; nulosReferencia[i][j] = true; } } caja = referencia.getInstance(i).getAllOutputValues(); clasesReferencia[i] = (int) caja[0]; for (k = 0; k < datosReferencia[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { nominalReferencia[i][k] = (int) datosReferencia[i][k]; datosReferencia[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { realReferencia[i][k] = datosReferencia[i][k]; datosReferencia[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosReferencia[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } } public void ejecutar(int[] outliers, int[] ExamplesClass) { int i; int nClases; String cadena = ""; /*Getting the number of differents classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] > nClases) { nClases = clasesTrain[i]; } } nClases++; int salidaKNN[][]; int prediccion[][]; /*Output of the training file*/ for (i = 0; i < datosTrain.length; i++) { ExamplesClass[clasesTrain[i]]++; if (KNN.differentClass(k, clasesTrain[i], datosReferencia, datosTrain[i]) >= this.k_dif) { outliers[clasesTrain[i]]++; } } } public void configuracion(int value_k) { /*Getting the number of neighbors*/ this.k = value_k; this.k_dif = (int) (this.k * 0.8); } /* STATIC Methods */ public static int differentClass(int nvec, int classE,double conj[][],double ejemplo[]) { int i, j, l; boolean parar = false; int vecinosCercanos[]; double minDistancias[]; int different; double dist; if (nvec > conj.length) { nvec = conj.length; } vecinosCercanos = new int[nvec]; minDistancias = new double[nvec]; for (i = 0; i < nvec; i++) { vecinosCercanos[i] = -1; minDistancias[i] = Double.POSITIVE_INFINITY; } for (i = 0; i < conj.length; i++) { dist = distancia(conj[i], ejemplo); if (dist > 0) { parar = false; for (j = 0; j < nvec && !parar; j++) { if (dist < minDistancias[j]) { parar = true; for (l = nvec - 1; l >= j + 1; l--) { minDistancias[l] = minDistancias[l - 1]; vecinosCercanos[l] = vecinosCercanos[l - 1]; } minDistancias[j] = dist; vecinosCercanos[j] = i; } } } } different = 0; for (j = 0; j < nvec; j++) { if (vecinosCercanos[j] != classE) { different++; } } return (different); } public static double distancia(double ej1[], double ej2[]) { int i; double suma = 0; for (i = 0; i < ej1.length; i++) { suma += (ej1[i] - ej2[i]) * (ej1[i] - ej2[i]); } suma = Math.sqrt(suma); return suma; } }