/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Manuel Chica Serrano (University of Jaen) 01/09/2005 * @author Modified by Jose Joaquin Aguilera Garcia (University of Jaen) 19/12/2008 * @author Modified by Cristobal Jose Carmona del Jesus (University of Jaen) 19/12/2008 * @author Modified by Jose Joaquin Aguilera Garcia (University of Jaen) 03/02/2009 * @version 1.0 * @since JDK1.5 * </p> */ package keel.Algorithms.Preprocess.Feature_Selection.nonevolutionary_algorithms.FOCUS; import org.core.Files; import java.util.*; import keel.Dataset.*; import keel.Algorithms.Preprocess.Feature_Selection.Datos; public class FocusIncon { /** * <p> * Main class of focus method for feature selection using inconsistency ratio as evaluation measure. * * This class implements FOCUS for features selection. The running process consists of generate * all posible features subsets. The stopping criteria will be satisfy a determinate inconsistency ratio * (read as parameter) * </p> */ /** Datos class with all information about datasets and feature selection methods */ private Datos data; /** needed parameters for backward method */ private Parametros params; /** a boolean array with selected features */ private boolean features[]; /** interior class using for reading all parameters */ private class Parametros{ /** algorithm name */ String nameAlgorithm; /** number of nearest neighbours for KNN Classifier */ int paramKNN; /** pathname of training dataset */ String trainFileNameInput; /** pathname of test dataset */ String testFileNameInput; /** pathname of test dataset only with selected features */ String testFileNameOutput; /** pathname of training dataset only with selected features */ String trainFileNameOutput; /** pathname of an extra file with additional information about the algorithm results */ String extraFileNameOutput; /** allowed inconsistency ratio */ double inconAllow; /** * <p> * Constructor of the Parametros Class * </p> * @param nombreFileParametros is the pathname of input parameter file */ Parametros (String nombreFileParametros){ try{ int i; String fichero, linea, tok; StringTokenizer lineasFile, tokens; /* read the parameter file using Files class */ fichero = Files.readFile(nombreFileParametros); fichero += "\n"; /* remove all \r characters. it is neccesary for a correct use in Windows and UNIX */ fichero = fichero.replace('\r', ' '); /* extract the differents tokens of the file */ lineasFile = new StringTokenizer(fichero, "\n"); i=0; while(lineasFile.hasMoreTokens()) { linea = lineasFile.nextToken(); i++; tokens = new StringTokenizer(linea, " ,\t"); if(tokens.hasMoreTokens()){ tok = tokens.nextToken(); if(tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens); else if(tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens); else if(tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens); else if(tok.equalsIgnoreCase("paramKNN")) paramKNN = getParamInt(tokens); else if(tok.equalsIgnoreCase("inconAllow")) inconAllow = getParamFloat(tokens); else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n"); } } } catch(java.io.FileNotFoundException e){ System.err.println(e + "Parameter file"); }catch(java.io.IOException e){ System.err.println(e + "Aborting program"); System.exit(-1); } /** show the read parameter in the standard output */ String contents = "-- Parameters echo --- \n"; contents += "Algorithm name: " + nameAlgorithm +"\n"; contents += "Input Train File: " + trainFileNameInput +"\n"; contents += "Input Test File: " + testFileNameInput +"\n"; contents += "Output Train File: " + trainFileNameOutput +"\n"; contents += "Output Test File: " + testFileNameOutput +"\n"; contents += "Parameter k of KNN Algorithm: " + paramKNN + "\n"; contents += "Ratio of Inconsistency: " + inconAllow + "\n"; System.out.println(contents); } /** obtain a float value from the parameter file @param s is the StringTokenizer */ private double getParamFloat(StringTokenizer s){ String val = s.nextToken(); val = s.nextToken(); return Float.parseFloat(val); } /** obtain an integer value from the parameter file @param s is the StringTokenizer */ private int getParamInt(StringTokenizer s){ String val = s.nextToken(); val = s.nextToken(); return Integer.parseInt(val); } /** obtain a string value from the parameter file @param s is the StringTokenizer */ private String getParamString(StringTokenizer s){ String contenido = ""; String val = s.nextToken(); while(s.hasMoreTokens()) contenido += s.nextToken() + " "; return contenido.trim(); } /**obtain the names of the input files from the parameter file @param s is the StringTokenizer */ private void getInputFiles(StringTokenizer s){ String val = s.nextToken(); trainFileNameInput = s.nextToken().replace('"', ' ').trim(); testFileNameInput = s.nextToken().replace('"', ' ').trim(); } /** obtain the names of the output files from the parameter file @param s is the StringTokenizer */ private void getOutputFiles(StringTokenizer s){ String val = s.nextToken(); trainFileNameOutput = s.nextToken().replace('"', ' ').trim(); testFileNameOutput = s.nextToken().replace('"', ' ').trim(); extraFileNameOutput = s.nextToken().replace('"', ' ').trim(); } } /** * <p> * Creates a new instance of FocusIncon * </p> * @param ficParametros is the name of the param file */ public FocusIncon(String ficParametros) { /* loads the parameter file */ params = new Parametros(ficParametros); /* loads both of training and test datasets */ data = new Datos (params.trainFileNameInput, params.testFileNameInput, params.paramKNN); } /** * <p> * neccesary method for FOCUS Algorithm. Sets a boolean array from an integer pointers array (a pointer * value, e.g. 5, will set 5th position, in this case, of the boolean array as true) * </p> * @param fv, a boolean array result of method's calling * @param pointers, is the integer pointers array * @param tamPoiters, is the size of pointers array */ private static void establecerValoresBooleanos(boolean fv[], int pointers[], int tamPointers){ for(int i=0; i<fv.length; i++) fv[i] = false; for(int i=0; i<tamPointers; i++) fv[pointers[i]-1] = true; } /** * <p> * initializes the pointers array to 1...N (N is the logical size of array, passed as argument) * </p> * @param pointers is the integer pointers array * @param tam is the size of pointers array */ private static void inicializarMascara(int pointers[], int tam){ for(int i=0; i<tam; i++) pointers[i] = i+1; } /** * <p> * cleans the integer pointers array (0 as new value) * </p> * @param pointers is the integer pointers array */ private static void limpiarMascara(int pointers[]){ for(int i=0; i<pointers.length; i++) pointers[i] = 0; } /** * <p> * generates the next possible combination of selected features with a specified size * </p> * @param pointers is the pointers array that will be changed * @param tam is the logical size of the pointers array (also, it represents the number of features * to be selected in the next combination) * @return returns the number of failed attempts produced in the method */ private static int siguienteCombinacion(int pointers[], int tam){ int FCnt, i, j, val, intentosFallidos=0; for(i = tam-1; i >= 0; i--) if(pointers[i]!=0) if(pointers[i] + tam - i == pointers.length + 1) intentosFallidos++; else{ pointers[i]++; if(intentosFallidos!=0){ intentosFallidos = 0; val = pointers[i]; for(j = 1, FCnt = i+1; FCnt < tam; FCnt++, j++) pointers[FCnt] = val + j; while(FCnt < pointers.length){ pointers[FCnt] = 0; FCnt++; } } return intentosFallidos; } return intentosFallidos; } /** * <p> * main method for FocusIncon. This is an EXHAUSTIVE SEARCH ALGORITHM. Begins with subsets of size 1, * and searchs a subset which performs the inconsistency ratio (specified as input parameter) * </p> */ private void focus(){ boolean fv[]; int mascara[]; int i,j, intentosFallidos; /* allocates memory for fv & the pointers array. these structures help us to try with the different generated subsets */ fv = new boolean[data.returnNumFeatures()]; mascara = new int[data.returnNumFeatures()]; for(i=1; i<=data.returnNumFeatures(); i++){ intentosFallidos = 0; limpiarMascara(mascara); inicializarMascara(mascara, i); establecerValoresBooleanos(fv, mascara, i); while(intentosFallidos != i){ establecerValoresBooleanos(fv, mascara, i); /* calculates the inconsistency ratio from the generated features vector */ if(data.medidaInconsistencia(fv) <= params.inconAllow ){ /* the consistent subset has been found */ features = fv; return; } /* if the generated subset doesn't perform the inconsistency ratio, it will try with an other subset, modifying its pointers array with the next combination */ intentosFallidos += siguienteCombinacion(mascara, i); } } System.err.println("ERROR: It couldn't be possible to find any solution with this inconsistency ratio."); System.err.println("Please to reduce inconsistency ratio parameter"); System.exit(0); } /** * <p> * Method interface for FOCUS algorithm * </p> */ public void ejecutar(){ String resultado; int i, numFeatures; Date d; d = new Date(); resultado = "RESULTS generated at " + String.valueOf((Date)d) + " \n--------------------------------------------------\n"; resultado += "Algorithm Name: " + params.nameAlgorithm + "\n"; /* call of FOCUS algorithm */ focus(); resultado += "\nPARTITION Filename: "+ params.trainFileNameInput +"\n---------------\n\n"; resultado += "Features selected: \n"; for(i=numFeatures=0; i<features.length; i++) if(features[i] == true){ resultado += Attributes.getInputAttribute(i).getName() + " - "; numFeatures++; } resultado += "\n\n" + String.valueOf(numFeatures) + " features of " + Attributes.getInputNumAttributes() + "\n\n" ; resultado += "Error in test (using train for prediction): " + String.valueOf(data.validacionCruzada(features)) + "\n"; resultado += "Error in test (using test for prediction): " + String.valueOf(data.LVOTest(features)) + "\n"; resultado += "---------------\n"; System.out.println("Experiment completed successfully"); /* creates the new training and test datasets only with the selected features */ Files.writeFile(params.extraFileNameOutput, resultado); data.generarFicherosSalida(params.trainFileNameOutput, params.testFileNameOutput, features); } }