/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Fuzzy_Rule_Learning.Genetic.ClassifierMOGUL; import java.io.*; import keel.Dataset.*; /** * <p> * It contains the methods to read a Dataset * </p> * * <p> * @author Written by Jesus Alcala Fernandez (University of Granada) 01/01/2004 * @author Modified by Francisco Jos� Berlanga (University of Ja�n) 09/12/2008 * @version 1.0 * @since JDK 1.6 * </p> */ public class MyDataset { public T_Table[] datos; public int n_variables, n_inputs, long_tabla; public int nClasses; public int no_cubiertos; public String[] output; public String[] clases; public T_Interval[] extremos; public String fichero; public InstanceSet IS; public boolean noOutputs; /** * <p> * Stores in memory the contents of the data file "f" * </p> * @param f String The name containing the Data Set * @param train boolean TRUE is the Data Set contains the training data. FALSE if it contains the test data */ public MyDataset(String f, boolean train) { fichero = f; IS = new InstanceSet(); try { processModelDataset(f, train); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } } /** * <p> * Reads the Data Sets * </p> * @param nfejemplos String The name containing the Data Set * @param train boolean TRUE is the Data Set contains the training data. FALSE if it contains the test data */ public void processModelDataset(String nfejemplos, boolean train) throws IOException { int i, j, k, pos; try { // Load in memory a dataset that contains a regression problem IS.readSet(nfejemplos, train); // We read the number of instances and variables long_tabla = IS.getNumInstances(); n_inputs = Attributes.getInputNumAttributes(); no_cubiertos = long_tabla; // Check that there is only one output variable and // it is nominal if (Attributes.getOutputNumAttributes() > 1) { System.out.println( "This algorithm can not process MIMO datasets"); System.out.println( "All outputs but the first one will be removed"); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println( "This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; } n_variables = n_inputs + Attributes.getOutputNumAttributes(); output = new String[long_tabla]; clases = new String[long_tabla]; // Initialice and fill our own tables datos = new T_Table[long_tabla]; // Maximum and minimum of inputs/output data extremos = new T_Interval[n_variables]; for (i = 0; i < n_variables; i++) { extremos[i] = new T_Interval(); } // All values are casted into double/integer nClasses = 0; for (i = 0, k = 0; i < long_tabla; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue() == true) { System.out.println( "This algorithm can not process missing values"); System.out.println("This algorithm don't use the instance " + (i + 1) + ". You have to apply before a preprocess method"); } else { datos[k] = new T_Table(n_variables); for (j = 0; j < n_inputs; j++) { datos[k].ejemplo[j] = IS.getInputNumericValue(i, j); if (datos[k].ejemplo[j] > extremos[j].max || k == 0) { extremos[j].max = datos[k].ejemplo[j]; } if (datos[k].ejemplo[j] < extremos[j].min || k == 0) { extremos[j].min = datos[k].ejemplo[j]; } } if (noOutputs) { datos[k].ejemplo[j] = 0; output[k] = ""; } else { datos[k].ejemplo[j] = IS.getOutputNumericValue(i, 0); output[k] = IS.getOutputNominalValue(i, 0); } if ((int) datos[k].ejemplo[j] > nClasses) { nClasses = (int) datos[k].ejemplo[j]; } pos = (int) datos[k].ejemplo[j]; clases[pos] = output[k]; k++; } } nClasses++; System.out.println("Number of classes=" + nClasses); long_tabla = k; } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } } /** * <p> * It returns the header * </p> * @return String The Header of the Data Set */ public String getHeader() { return (IS.getHeader()); } /** * <p> * It create a new table with the examples from the Data Set * </p> */ public void newTable() { int pos; // Initialice and fill our own tables datos = new T_Table[long_tabla]; // All values are casted into double/integer nClasses = 0; for (int i = 0, k = 0, j = 0; i < long_tabla; i++) { Instance inst = IS.getInstance(i); datos[k] = new T_Table(n_variables); for (j = 0; j < n_inputs; j++) { datos[k].ejemplo[j] = IS.getInputNumericValue(i, j); if (datos[k].ejemplo[j] > extremos[j].max || k == 0) { extremos[j].max = datos[k].ejemplo[j]; } if (datos[k].ejemplo[j] < extremos[j].min || k == 0) { extremos[j].min = datos[k].ejemplo[j]; } } if (noOutputs) { datos[k].ejemplo[j] = 0; output[k] = ""; } else { datos[k].ejemplo[j] = IS.getOutputNumericValue(i, 0); output[k] = IS.getOutputNominalValue(i, 0); } if ((int) datos[k].ejemplo[j] > nClasses) { nClasses = (int) datos[k].ejemplo[j]; } pos = (int) datos[k].ejemplo[j]; clases[pos] = output[k]; k++; } nClasses++; System.out.println("Number of classes=" + nClasses); } /** * <p> * Return the output value of the example in position "pos" * </p> * @return String The output value for example in position "pos" */ public String getOutputAsString(int pos) { return output[pos]; } /** * <p> * Return the name of the class in position "pos" * </p> * @return String The name of the class in position "pos" */ public String getClassAsString(int pos) { return clases[pos]; } }