/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ // // LVQ.java // // Juli�n Luengo Mart�n // // Created by Juli�n Luengo Mart�n Julio 2007 // Proyecto KEEL // package keel.Algorithms.Neural_Networks.LVQ; import keel.Algorithms.Preprocess.Basic.*; import keel.Dataset.*; import org.core.*; import java.util.StringTokenizer; import java.util.Vector; public class LVQ extends Metodo { String ficheroReferencia; double datosReferencia[][]; int clasesReferencia[]; InstanceSet referencia; /* Own parameters of the algorithm */ private long semilla; double alpha = 0, nu = 0; int n_p, T, nSel; /** Neurons*/ double conjS[][]; double datosTest[][]; int clasesTest[]; /** Neurons' associated class */ int clasesS[]; public LVQ(String ficheroScript) { super(ficheroScript); try { referencia = new InstanceSet(); referencia.readSet(ficheroReferencia, false); /*Normalize the data*/ normalizarReferencia(); } catch (Exception e) { System.err.println(e); System.exit(1); } } public void ejecutar() { int i, j, l, m; double alfai; int nClases; int claseObt; boolean marcas[]; boolean notFound; int init; int clasSel[]; int baraje[]; int pos, tmp; String instanciasIN[]; String instanciasOUT[]; long tiempo = System.currentTimeMillis(); /* Getting the number of differents classes */ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /* Shuffle the train set */ baraje = new int[datosTrain.length]; Randomize.setSeed(semilla); for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } /* * Inicialization of the flagged instaces vector for a posterior * elimination */ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; if (datosTrain.length > 0) { // marcas[baraje[0]] = true; //the first instance is included always nSel = n_p; if (nSel < nClases) nSel = nClases; } else { System.err.println("Input dataset is empty"); nSel = 0; } clasSel = new int[nClases]; System.out.print("Selecting initial neurons... "); // at least, there must be 1 neuron of each class at the beginning init = nClases; for (i = 0; i < nClases && i < datosTrain.length; i++) { pos = Randomize.Randint(0, datosTrain.length - 1); tmp = 0; while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) { pos = (pos + 1) % datosTrain.length; tmp++; } if (tmp < datosTrain.length) marcas[pos] = true; else init--; // clasSel[i] = i; } for (i = init; i < Math.min(nSel, datosTrain.length); i++) { tmp = 0; pos = Randomize.Randint(0, datosTrain.length - 1); while (marcas[pos]) { pos = (pos + 1) % datosTrain.length; tmp++; } // if(i<nClases){ // notFound = true; // do{ // for(j=i-1;j>=0 && notFound;j--){ // if(clasSel[j] == clasesTrain[pos]) // notFound = false; // } // if(!notFound) // pos = Randomize.Randint (0, datosTrain.length-1); // }while(!notFound); // } // clasSel[i] = clasesTrain[pos]; marcas[pos] = true; init++; } nSel = init; System.out.println("Initial neurons selected: " + nSel); /* Building of the S set from the flags */ conjS = new double[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (m = 0, l = 0; m < datosTrain.length; m++) { if (marcas[m]) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[m][j]; } clasesS[l] = clasesTrain[m]; l++; } } alfai = alpha; boolean change = true; /* Body of the LVQ algorithm. */ // Train the network for (int it = 0; it < T && change; it++) { change = false; alpha = alfai; for (i = 1; i < datosTrain.length; i++) { // search for the nearest neuron to training instance pos = NN(nSel, conjS, datosTrain[baraje[i]]); // nearest neuron labels correctly the class of training // instance? if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel // the neuron for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } change = true; } else { // YES - migrate the neuron towards the input vector for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } } alpha = nu * alpha; } // Shuffle again the training partition baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } } System.out .println("LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // Classify the train data set instanciasIN = new String[datosReferencia.length]; instanciasOUT = new String[datosReferencia.length]; for (i = 0; i < datosReferencia.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases); if(tipo!=Attribute.NOMINAL){ instanciasIN[i] = new String(String.valueOf(clasesReferencia[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else{ instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida(ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Classify the test data set normalizarTest(); instanciasIN = new String[datosTest.length]; instanciasOUT = new String[datosTest.length]; for (i = 0; i < datosTest.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases); if(tipo!=Attribute.NOMINAL){ instanciasIN[i] = new String(String.valueOf(clasesTest[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else{ instanciasIN[i] = new String(a.getNominalValue(clasesTest[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida(ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); //Print the network to a file printNetworkToFile(ficheroSalida[2],referencia.getHeader()); } protected int NN(int nSel, double conj[][], double ejemplo[]) { double mindist, dist; int nneigh = -1; mindist = Double.POSITIVE_INFINITY; for (int i = 0; i < nSel; i++) { dist = KNN.distancia(conj[i], ejemplo); if (dist < mindist) { mindist = dist; nneigh = i; } } return nneigh; } public void leerConfiguracion(String ficheroScript) { String fichero, linea, token; StringTokenizer lineasFichero, tokens; byte line[]; int i, j; ficheroSalida = new String[3]; fichero = Fichero.leeFichero(ficheroScript); lineasFichero = new StringTokenizer(fichero, "\n\r"); lineasFichero.nextToken(); linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); token = tokens.nextToken(); /* Getting the names of the training and test files */ line = token.getBytes(); for (i=0; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); ficheroTraining = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); ficheroReferencia = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); ficheroTest = new String (line,i,j-i); /* Getting the path and base name of the results files */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); token = tokens.nextToken(); /* Getting the names of output files */ line = token.getBytes(); for (i = 0; line[i] != '\"'; i++) ; i++; for (j = i; line[j] != '\"'; j++) ; ficheroSalida[0] = new String(line, i, j - i); for (i = j + 1; line[i] != '\"'; i++) ; i++; for (j = i; line[j] != '\"'; j++) ; ficheroSalida[1] = new String(line, i, j - i); for (i = j + 1; line[i] != '\"'; i++) ; i++; for (j = i; line[j] != '\"'; j++) ; ficheroSalida[2] = new String(line, i, j - i); /* Getting the seed */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); semilla = Long.parseLong(tokens.nextToken().substring(1)); /* Getting the number of iterations */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); T = Integer.parseInt(tokens.nextToken().substring(1)); /* Getting the number of neurons */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); n_p = Integer.parseInt(tokens.nextToken().substring(1)); /* Getting the alpha factor */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); alpha = Double.parseDouble(tokens.nextToken().substring(1)); /* Getting the nu factor */ linea = lineasFichero.nextToken(); tokens = new StringTokenizer(linea, "="); tokens.nextToken(); nu = Double.parseDouble(tokens.nextToken().substring(1)); } protected static void escribeSalida(String nombreFichero, String instanciasIN[], String instanciasOUT[], Attribute entradas[], Attribute salida, int nEntradas, String relation) { String cadena = ""; int i, j, k; int aux; /* Printing input attributes */ cadena += "@relation " + relation + "\n"; for (i = 0; i < nEntradas; i++) { cadena += "@attribute " + entradas[i].getName() + " "; if (entradas[i].getType() == Attribute.NOMINAL) { cadena += "{"; for (j = 0; j < entradas[i].getNominalValuesList().size(); j++) { cadena += (String) entradas[i].getNominalValuesList() .elementAt(j); if (j < entradas[i].getNominalValuesList().size() - 1) { cadena += ", "; } } cadena += "}\n"; } else { if (entradas[i].getType() == Attribute.INTEGER) { cadena += "integer"; cadena += " [" + String.valueOf((int) entradas[i] .getMinAttribute()) + ", " + String.valueOf((int) entradas[i] .getMaxAttribute()) + "]\n"; } else { cadena += "real"; cadena += " [" + String.valueOf(entradas[i].getMinAttribute()) + ", " + String.valueOf(entradas[i].getMaxAttribute()) + "]\n"; } } } /* Printing output attribute */ cadena += "@attribute " + salida.getName() + " "; if (salida.getType() == Attribute.NOMINAL) { cadena += "{"; for (j = 0; j < salida.getNominalValuesList().size(); j++) { cadena += (String) salida.getNominalValuesList().elementAt(j); if (j < salida.getNominalValuesList().size() - 1) { cadena += ", "; } } cadena += "}\n"; } else { cadena += "integer [" + String.valueOf((int) salida.getMinAttribute()) + ", " + String.valueOf((int) salida.getMaxAttribute()) + "]\n"; } /* Printing the data */ cadena += "@data\n"; Fichero.escribeFichero(nombreFichero, cadena); cadena = ""; for (i = 0; i < instanciasIN.length; i++) { cadena += instanciasIN[i] + " " + instanciasOUT[i]; cadena += "\n"; } Fichero.AnadirtoFichero(nombreFichero, cadena); } /** * <p> * Save network weights to a file * </p> * @param file_name Output file name * @param header header of the data set for which the network has been adjusted to */ protected void printNetworkToFile(String file_name, String header){ //write the header to the file Files.writeFile(file_name, header); Files.addToFile(file_name, "Number of neurons: " + nSel + "\n"); for(int i = 0; i < nSel; i++){ Files.addToFile(file_name, "\nNeuron " + i + "\n"); for(int j = 0; j < conjS[i].length;j++){ Files.addToFile(file_name, Double.toString(conjS[i][j])+" " ); } Files.addToFile(file_name, " Class = " + clasesS[i] + "\n"); } } private void normalizarTest() { int i, j, cont = 0, k; Instance temp; boolean hecho; double caja[]; StringTokenizer tokens; boolean nulls[]; /* Check if dataset corresponding with a classification problem */ if (Attributes.getOutputNumAttributes() < 1) { System.err .println("This dataset haven�t outputs, so it not corresponding to a classification problem."); System.exit(-1); } else if (Attributes.getOutputNumAttributes() > 1) { System.err.println("This dataset have more of one output."); System.exit(-1); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { System.err .println("This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); System.exit(-1); } datosTest = new double[test.getNumInstances()][Attributes .getInputNumAttributes()]; clasesTest = new int[test.getNumInstances()]; caja = new double[1]; for (i = 0; i < test.getNumInstances(); i++) { temp = test.getInstance(i); nulls = temp.getInputMissingValues(); datosTest[i] = test.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0; caja = test.getInstance(i).getAllOutputValues(); clasesTest[i] = (int) caja[0]; for (k = 0; k < datosTest[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { datosTest[i][k] /= Attributes.getInputAttribute(k) .getNominalValuesList().size() - 1; } else { datosTest[i][k] -= Attributes.getInputAttribute(k) .getMinAttribute(); datosTest[i][k] /= Attributes.getInputAttribute(k) .getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } } /*This function builds the data matrix for classification reference and normalizes inputs values*/ private void normalizarReferencia () throws CheckException { int i, j, cont = 0, k; Instance temp; boolean hecho; double caja[]; StringTokenizer tokens; boolean nulls[]; /*Check if dataset corresponding with a classification problem*/ if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException ("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException ("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException ("This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); } datosReferencia = new double[referencia.getNumInstances()][Attributes.getInputNumAttributes()]; clasesReferencia = new int[referencia.getNumInstances()]; caja = new double[1]; /*Get the number of instances that have a null value*/ for (i=0; i<referencia.getNumInstances(); i++) { temp = referencia.getInstance(i); nulls = temp.getInputMissingValues(); datosReferencia[i] = referencia.getInstance(i).getAllInputValues(); for (j=0; j<nulls.length; j++) if (nulls[j]) datosReferencia[i][j]=0.0; caja = referencia.getInstance(i).getAllOutputValues(); clasesReferencia[i] = (int)caja[0]; for (k=0; k<datosReferencia[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { datosReferencia[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size()-1; } else { datosReferencia[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosReferencia[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } } }