/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Decision_Trees.C45_Binarization; /** * <p>Title: Dataset</p> * * <p>Description: It contains the methods to read a Classification/Regression Dataset</p> * <p>Company: KEEL </p> * * @author Alberto Fern�ndez (University of Ja�n - 27-09-2010) * @version 1.2 */ import java.io.IOException; import keel.Dataset.*; import java.util.Vector; public class myDataset { public static final int REAL = Attribute.REAL; public static final int INTEGER = Attribute.INTEGER; public static final int NOMINAL = Attribute.NOMINAL; protected double[][] X = null; //examples array private double[][] X_normalized = null; private boolean[][] missing = null; //possible missing values private int[] outputInteger = null; //output of the data-set as integer values private double[] outputReal = null; //output of the data-set as double values private String[] output = null; //output of the data-set as string values private double[] emax; //max value of an attribute private double[] emin; //min value of an attribute private int nData; // Number of examples private int nVars; // Number of variables private int nInputs; // Number of inputs private int nClasses; // Number of outputs private int [] list_of_classes; private InstanceSet IS; //The whole instance set private Attribute[] inputs_att; private Attribute output_att; private double stdev[], average[]; //standard deviation and average of each attribute private double stdevPerClass[][], averagePerClass[][]; private int instancesCl[]; private int[] types; private String[] classes; private String[] variables; private String[][] nominals; private String[][] nominales_head; /** * Init a new set of instances */ public myDataset() { IS = new InstanceSet(); } /** * It generates a new binary dataset for the OVO scheme * @param copy the original training dataset * @param class_1 first class * @param class_2 second class */ public myDataset(myDataset copy, int class_1, int class_2) { nVars = copy.getnVars(); nInputs = copy.getnInputs(); nClasses = 2; //copia.getnClasses(); classes = copy.classes.clone(); variables = copy.variables.clone(); types = copy.types.clone(); nominals = new String[nInputs][]; for (int i = 0; i < nominals.length; i++) { nominals[i] = copy.nominals[i].clone(); } double[][] X_aux = new double[copy.size()][copy.getnInputs()]; int[] outputInteger_aux = new int[copy.size()]; String[] output_aux = new String[copy.size()]; nData = 0; emax = new double[copy.getnInputs()]; emin = new double[copy.getnInputs()]; for (int i = 0; i < emax.length; i++) { emax[i] = Double.MIN_VALUE; emin[i] = Double.MAX_VALUE; } for (int i = 0; i < copy.size(); i++) { if ( (copy.getOutputAsInteger(i) == class_1) || (copy.getOutputAsInteger(i) == class_2)) { //X_aux[nData] = copia.getExample(i).clone(); double[] auxiliar = copy.getExample(i).clone(); for (int j = 0; j < emax.length; j++) { X_aux[nData][j] = auxiliar[j]; if (emax[j] < auxiliar[j]) { emax[j] = auxiliar[j]; } if (emin[j] > auxiliar[j]) { emin[j] = auxiliar[j]; } } outputInteger_aux[nData] = copy.getOutputAsInteger(i); output_aux[nData] = copy.getOutputAsString(i); nData++; } } nominales_head = new String[nominals.length][]; for (int i = 0; i < nominals.length; i++) { if (types[i] == this.NOMINAL) { boolean[] auxi = new boolean[nominals[i].length]; for (int j = 0; j < auxi.length; j++) { auxi[j] = false; } for (int j = 0; j < nData; j++) { auxi[ (int) X_aux[j][i]] = true; } int contador = 0; for (int j = 0; j < auxi.length; j++) { if (auxi[j]) { contador++; } } nominales_head[i] = new String[contador]; contador = 0; for (int j = 0; j < auxi.length; j++) { if (auxi[j]) { nominales_head[i][contador++] = nominals[i][j]; } } } else { nominales_head[i] = new String[1]; nominales_head[i][0] = "?"; } } X = new double[nData][nInputs]; outputInteger = new int[nData]; output = new String[nData]; for (int i = 0; i < nData; i++) { X[i] = X_aux[i].clone(); outputInteger[i] = outputInteger_aux[i]; output[i] = output_aux[i]; } list_of_classes = new int[2]; list_of_classes[0] = class_1; list_of_classes[1] = class_2; copy.computeInstancesPerClass(); //para el cost_sensitive learning instancesCl = new int[copy.getnClasses()]; for (int i = 0; i < instancesCl.length; i++) { instancesCl[i] = copy.numberInstances(i); } } /** * It generates a new binary dataset for the OVA scheme * @param copy the original training dataset * @param positiva primary class (against the rest) */ public myDataset(myDataset copia, int positiva) { nVars = copia.getnVars(); nInputs = copia.getnInputs(); nClasses = 2; //copia.getnClasses(); classes = copia.classes.clone(); variables = copia.variables.clone(); types = copia.types.clone(); nominals = new String[nInputs][]; for (int i = 0; i < nominals.length; i++) { nominals[i] = copia.nominals[i].clone(); } nominales_head = nominals.clone(); emax = copia.getemax().clone(); emin = copia.getemin().clone(); nData = copia.size(); X = new double[nData][nInputs]; X = copia.getX().clone(); outputInteger = new int[nData]; output = new String[nData]; int positivos = 0; for (int i = 0; i < nData; i++){ outputInteger[i] = 1; output[i] = "negative"; if (copia.getOutputAsInteger(i) == positiva){ positivos++; outputInteger[i] = 0; output[i] = "positive"; } } list_of_classes = new int[2]; list_of_classes[0] = 0;//clase_1; list_of_classes[1] = 1;//clase_2; instancesCl = new int[2]; instancesCl[0] = positivos; instancesCl[1] = nData - positivos; if (instancesCl[1] < 0) instancesCl[1] = positivos - nData; classes[0] = "positive"; classes[1] = "negative"; } /** * It generates a new binary dataset for the OVO scheme (NESTING) * @param copy the original training dataset * @param class_1 first class * @param class_2 second class * @param ties number of ties */ public myDataset(myDataset copy, int class_1, int class_2, int[] ties){ nVars = copy.getnVars(); nInputs = copy.getnInputs(); nClasses = copy.getnClasses(); double [][] X_aux = new double[copy.size()][copy.getnInputs()]; int [] outputInteger_aux = new int [copy.size()]; String [] output_aux = new String [copy.size()]; nData = 0; for (int i = 0; i < copy.size(); i++){ if (((copy.getOutputAsInteger(i) == class_1)||(copy.getOutputAsInteger(i) == class_2)) && ties[i] == 1){ X_aux[nData] = copy.getExample(i).clone(); outputInteger_aux[nData] = copy.getOutputAsInteger(i); output_aux[nData] = copy.getOutputAsString(i); nData++; } } X = new double[nData][nInputs]; outputInteger = new int[nData]; output = new String[nData]; for (int i = 0; i < nData; i++){ X[i] = X_aux[i].clone(); outputInteger[i] = outputInteger_aux[i]; output[i] = output_aux[i]; } list_of_classes = new int[2]; list_of_classes[0] = class_1; list_of_classes[1] = class_2; copy.computeInstancesPerClass(); //para el cost_sensitive learning instancesCl = new int[copy.getnClasses()]; for (int i = 0; i < instancesCl.length; i++){ instancesCl[i] = copy.numberInstances(i); } } /** * Outputs an array of examples with their corresponding attribute values. * @return double[][] an array of examples with their corresponding attribute values */ public double[][] getX() { return X; } /** * Output a specific example * @param pos int position (id) of the example in the data-set * @return double[] the attributes of the given example */ public double[] getExample(int pos) { return X[pos]; } /** * Returns the output of the data-set as integer values * @return int[] an array of integer values corresponding to the output values of the dataset */ public int[] getOutputAsInteger() { int[] output = new int[outputInteger.length]; for (int i = 0; i < outputInteger.length; i++) { output[i] = outputInteger[i]; } return output; } /** * Returns the output of the data-set as real values * @return double[] an array of real values corresponding to the output values of the dataset */ public double[] getOutputAsReal() { double[] output = new double[outputReal.length]; for (int i = 0; i < outputReal.length; i++) { output[i] = outputInteger[i]; } return output; } /** * Returns the output of the data-set as nominal values * @return String[] an array of nomianl values corresponding to the output values of the dataset */ public String[] getOutputAsString() { String[] output = new String[this.output.length]; for (int i = 0; i < this.output.length; i++) { output[i] = this.output[i]; } return output; } /** * It returns the output value of the example "pos" * @param pos int the position (id) of the example * @return String a string containing the output value */ public String getOutputAsString(int pos) { return output[pos]; } /** * It returns the output value of the example "pos" * @param pos int the position (id) of the example * @return int an integer containing the output value */ public int getOutputAsInteger(int pos) { return outputInteger[pos]; } /** * It returns the output value of the example "pos" * @param pos int the position (id) of the example * @return double a real containing the output value */ public double getOutputAsReal(int pos) { return outputReal[pos]; } /** * It returns an array with the maximum values of the attributes * @return double[] an array with the maximum values of the attributes */ public double[] getemax() { return emax; } /** * It returns an array with the minimum values of the attributes * @return double[] an array with the minimum values of the attributes */ public double[] getemin() { return emin; } public double getMax(int variable) { return emax[variable]; } public double getMin(int variable) { return emin[variable]; } /** * It gets the size of the data-set * @return int the number of examples in the data-set */ public int getnData() { return nData; } /** * It gets the number of variables of the data-set (including the output) * @return int the number of variables of the data-set (including the output) */ public int getnVars() { return nVars; } /** * It gets the number of input attributes of the data-set * @return int the number of input attributes of the data-set */ public int getnInputs() { return nInputs; } /** * It gets the number of output attributes of the data-set (for example number of classes in classification) * @return int the number of different output values of the data-set */ public int getnClasses() { return nClasses; } /** * This function checks if the attribute value is missing * @param i int Example id * @param j int Variable id * @return boolean True is the value is missing, else it returns false */ public boolean isMissing(int i, int j) { return missing[i][j]; } /** * It reads the whole input data-set and it stores each example and its associated output value in * local arrays to ease their use. * @param datasetFile String name of the file containing the dataset * @param train boolean It must have the value "true" if we are reading the training data-set * @throws IOException If there ocurs any problem with the reading of the data-set */ public void readClassificationSet(String datasetFile, boolean train) throws IOException { try { // Load in memory a dataset that contains a classification problem IS = new InstanceSet(); IS.readSet(datasetFile, train); IS.setAttributesAsNonStatic(); inputs_att = IS.getAttributeDefinitions().getInputAttributes(); output_att = IS.getAttributeDefinitions().getOutputAttribute(0); nData = IS.getNumInstances(); nInputs = IS.getAttributeDefinitions().getInputNumAttributes(); nVars = nInputs + IS.getAttributeDefinitions().getOutputNumAttributes(); // outputIntegerheck that there is only one output variable if (Attributes.getOutputNumAttributes() > 1) { System.out.println( "This algorithm can not process MIMO datasets"); System.out.println( "All outputs but the first one will be removed"); System.exit(1); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println( "This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; System.exit(1); } // Initialice and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; outputInteger = new int[nData]; outputReal = new double[nData]; output = new String[nData]; // Maximum and minimum of inputs emax = new double[nInputs]; emin = new double[nInputs]; for (int i = 0; i < nInputs; i++) { emax[i] = Attributes.getAttribute(i).getMaxAttribute(); emin[i] = Attributes.getAttribute(i).getMinAttribute(); } // All values are casted into double/integer nClasses = 0; int datosMal, datos, aux; datosMal = datos = aux = 0; for (int i = 0; i < nData; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[datos][j] = IS.getInputNumericValue(i, j); //inst.getInputRealValues(j); missing[i][j] = inst.getInputMissingValues(j); if (missing[i][j]) { j = nInputs; //Anulo el ejemplo (ignore missing) //X[i][j] = emin[j] - 1; datosMal++; } } if (noOutputs) { outputInteger[datos] = 0; output[datos] = ""; } else { outputInteger[datos] = (int) IS.getOutputNumericValue(i, 0); output[datos] = IS.getOutputNominalValue(i, 0); } if (outputInteger[datos] > nClasses) { nClasses = outputInteger[datos]; } datos++; if (datosMal > aux){ datos--; //Anulo el ejemplo (ignore missing) aux = datosMal; } } nData -= datosMal; nClasses++; System.out.println("Number of classes=" + nClasses); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } list_of_classes = new int[nClasses]; for (int i = 0; i < nClasses; i++){ list_of_classes[i] = i; } computeStatistics(); computeInstancesPerClass(); variables = new String[nVars]; classes = new String[nClasses]; types = new int[nInputs]; nominals = new String[nInputs][]; nominales_head = new String[nInputs][]; for (int i = 0; i < nInputs; i++) { variables[i] = inputs_att[i].getName(); types[i] = inputs_att[i].getType(); //System.err.print("Atributo["+i+"]: "); if (inputs_att[i].getNumNominalValues() > 0) { nominals[i] = new String[inputs_att[i].getNumNominalValues()]; nominales_head[i] = new String[inputs_att[i].getNumNominalValues()]; for (int j = 0; j < nominals[i].length; j++) { nominals[i][j] = inputs_att[i].getNominalValue(j); //System.err.print(", "+nominales[i][j]); nominales_head[i][j] = inputs_att[i].getNominalValue(j); } } else { nominals[i] = new String[1]; nominals[i][0] = "?"; nominales_head[i] = new String[1]; nominales_head[i][0] = "?"; } //System.err.println(""); } variables[nInputs] = output_att.getName(); for (int i = 0; i < nClasses; i++) { classes[i] = output_att.getNominalValue(i); } } /** * It transform the input space into the [0,1] range */ public void normalize() { int atts = this.getnInputs(); double maxs[] = new double[atts]; for (int j = 0; j < atts; j++) { if (Attributes.getAttribute(j).getType() == Attribute.NOMINAL) { maxs[j] = 1.0 / ((Attributes.getAttribute(j).getNumNominalValues()-1) - 0); } else maxs[j] = 1.0 / (emax[j] - emin[j]); } for (int i = 0; i < this.getnData(); i++) { for (int j = 0; j < atts; j++) { if (isMissing(i, j)) { ; //this process ignores missing values } else { if (Attributes.getAttribute(j).getType() == Attribute.NOMINAL) { X[i][j] = (X[i][j] - 0) * maxs[j]; } else X[i][j] = (X[i][j] - emin[j]) * maxs[j]; } } } } public void normalize_statistics() { int atts = this.getnInputs(); double maxs[] = new double[atts]; X_normalized = new double[nData][nInputs]; for (int j = 0; j < atts; j++) { if (Attributes.getAttribute(j).getType() == Attribute.NOMINAL) { maxs[j] = 1.0 / ((Attributes.getAttribute(j).getNumNominalValues()-1) - 0); } else maxs[j] = 1.0 / (emax[j] - emin[j]); } for (int i = 0; i < this.getnData(); i++) { for (int j = 0; j < atts; j++) { if (isMissing(i, j)) { ; //this process ignores missing values } else { if (Attributes.getAttribute(j).getType() == Attribute.NOMINAL) { X_normalized[i][j] = (X[i][j] - 0) * maxs[j]; } else X_normalized[i][j] = (X[i][j] - emin[j]) * maxs[j]; } } } } /** * It computes the average and standard deviation of the input attributes */ public void computeStatisticsPerClass() { this.normalize_statistics(); stdevPerClass = new double[nClasses][this.getnInputs()]; averagePerClass = new double[nClasses][this.getnInputs()]; int c; for (int i = 0; i < this.getnInputs(); i++) { for (int j = 0; j < this.getnClasses(); j++) averagePerClass[j][i] = 0; for (int j = 0; j < this.getnData(); j++) { c = this.outputInteger[j]; if (!this.isMissing(j, i)) { averagePerClass[c][i] += X_normalized[j][i]; } } for (int j = 0; j < this.getnClasses(); j++) { averagePerClass[j][i] /= this.numberOfExamples(j); if (Double.isNaN(averagePerClass[j][i])) averagePerClass[j][i] = this.average(i); } } for (int i = 0; i < this.getnInputs(); i++) { double[] sum = new double[nClasses]; for (int j = 0; j < this.getnClasses(); j++) sum[j] = 0; for (int j = 0; j < this.getnData(); j++) { c = this.outputInteger[j]; if (!this.isMissing(j, i)) { sum[c] += (X_normalized[j][i] - averagePerClass[c][i]) * (X_normalized[j][i] - averagePerClass[c][i]); } } for (int j = 0; j < this.getnClasses(); j++) { sum[j] /= this.numberOfExamples(j); stdevPerClass[j][i] = Math.sqrt(sum[j]); } } } public double[][] getAveragePerClass() { return this.averagePerClass; } public double[][] getStdPerClass() { return this.stdevPerClass; } /** * It checks if the data-set has any real value * @return boolean True if it has some real values, else false. */ public boolean hasRealAttributes() { return Attributes.hasRealAttributes(); } public boolean hasNumericalAttributes() { return (Attributes.hasIntegerAttributes() || Attributes.hasRealAttributes()); } /** * It checks if the data-set has any missing value * @return boolean True if it has some missing values, else false. */ public boolean hasMissingAttributes() { return (this.sizeWithoutMissing() < this.getnData()); } /** * It return the size of the data-set without having account the missing values * @return int the size of the data-set without having account the missing values */ public int sizeWithoutMissing() { int tam = 0; for (int i = 0; i < nData; i++) { int j; for (j = 1; (j < nInputs) && (!isMissing(i, j)); j++) { ; } if (j == nInputs) { tam++; } } return tam; } public int size() { return nData; } /** * It computes the average and standard deviation of the input attributes */ private void computeStatistics() { stdev = new double[this.getnVars()]; average = new double[this.getnVars()]; for (int i = 0; i < this.getnInputs(); i++) { average[i] = 0; for (int j = 0; j < this.getnData(); j++) { if (!this.isMissing(j, i)) { average[i] += X[j][i]; } } average[i] /= this.getnData(); } average[average.length - 1] = 0; for (int j = 0; j < outputReal.length; j++) { average[average.length - 1] += outputReal[j]; } average[average.length - 1] /= outputReal.length; for (int i = 0; i < this.getnInputs(); i++) { double sum = 0; for (int j = 0; j < this.getnData(); j++) { if (!this.isMissing(j, i)) { sum += (X[j][i] - average[i]) * (X[j][i] - average[i]); } } sum /= this.getnData(); stdev[i] = Math.sqrt(sum); } double sum = 0; for (int j = 0; j < outputReal.length; j++) { sum += (outputReal[j] - average[average.length - 1]) * (outputReal[j] - average[average.length - 1]); } sum /= outputReal.length; stdev[stdev.length - 1] = Math.sqrt(sum); } /** * It return the standard deviation of an specific attribute * @param position int attribute id (position of the attribute) * @return double the standard deviation of the attribute */ public double stdDev(int position) { return stdev[position]; } /** * It return the average of an specific attribute * @param position int attribute id (position of the attribute) * @return double the average of the attribute */ public double average(int position) { return average[position]; } public void computeInstancesPerClass() { instancesCl = new int[nClasses]; for (int i = 0; i < this.getnData(); i++) { instancesCl[this.outputInteger[i]]++; } } public int numberInstances(int clas) { return instancesCl[clas]; } public int numberValues(int attribute) { return Attributes.getInputAttribute(attribute).getNumNominalValues(); } public String getOutputValue(int intValue) { return classes[intValue]; } public int getTipo(int variable) { return types[variable]; } /** * Devuelve el universo de discuros de las variables de entrada y salida * @return double[][] El rango minimo y maximo de cada variable */ public double[][] getRanges() { double[][] rangos = new double[this.getnVars()][2]; for (int i = 0; i < this.getnInputs(); i++) { rangos[i][0] = emin[i]; rangos[i][1] = emax[i]; } rangos[this.getnVars() - 1][0] = 0; rangos[this.getnVars() - 1][1] = nClasses - 1; return rangos; } /** * It returns the attribute name of a given variable * @param pos var id * @return */ public String varName(int pos) { return Attributes.getInputAttribute(pos).getName(); } /** * Devuelve el valor nominal correspondiente a la clase con valor numerico "clase" * @param clase int * @return String */ public String className(int clase) { return classes[clase]; //return Attributes.getOutputAttribute(0).getNominalValue(clase); } /** * Uniform width discretization * @param intervalos int Number of intervals */ public void discretize(int intervalos){ for (int i = 0; i < nInputs; i++){ if (this.getTipo(i) == this.REAL) { //si es real double corte = (emax[i] - emin[i])/intervalos; for (int j = 0; j < this.size(); j++){ double acum = emin[i] + corte; boolean salir = false; for (int k = 0; (k < intervalos)&&(!salir); k++){ if (X[j][i] < acum){ X[j][i] = k; salir = true; } acum += corte; } } } } } /** * It returns the names for all input variables * @return */ public String [] names(){ String nombres[] = new String[nInputs]; for (int i = 0; i < nInputs; i++){ nombres[i] = Attributes.getInputAttribute(i).getName(); } return nombres; } public static double realValue(int atributo, String valorNominal){ Vector nominales = Attributes.getInputAttribute(atributo).getNominalValuesList(); int aux = nominales.indexOf(valorNominal); return 1.0*aux; } public int numericClass(String valorNominal){ Vector nominales = Attributes.getOutputAttribute(0).getNominalValuesList(); int aux = nominales.indexOf(valorNominal); return aux; } public static String nominalValue(int atributo, double valorReal){ Vector nominales = Attributes.getInputAttribute(atributo).getNominalValuesList(); return (String)nominales.get((int)valorReal); } /** * It returns the number of nominal values for a given variable * @param attribute var id * @return the number of nominal values for a given variable */ public int totalNominals(int attribute){ return Attributes.getInputAttribute(attribute).getNumNominalValues(); } /** * It returns the most frequent class in the dataset * @return the most frequent class in the dataset */ public String mostFrequentClass(){ int claseMayoritaria = 0; for (int i = 1; i < nClasses; i++){ if (instancesCl[claseMayoritaria] < instancesCl[i]){ claseMayoritaria = i; } } return this.getOutputValue(claseMayoritaria); } /** * It returns the number of minority class examples * @return */ public int n_minoritaria() { if (this.numberInstances(list_of_classes[0]) > this.numberInstances(list_of_classes[1])) { return this.numberInstances(list_of_classes[1]); } else { return this.numberInstances(list_of_classes[0]); } } /** * It computes the Imbalance Ratio * @return the ratio between negative and positive examples (max/min) */ public double ir(){ if (this.numberInstances(list_of_classes[0]) > this.numberInstances(list_of_classes[1])) { return ( (double)this.numberInstances(list_of_classes[0]) / this.numberInstances(list_of_classes[1])); } else { return ( (double)this.numberInstances(list_of_classes[1]) / this.numberInstances(list_of_classes[0])); } } /** * It obtains the number of examples for the i-th class * @param clase int class id * @return int number of examples belonging to that class */ public int numberOfExamples(int clase){ int ejemplos = 0; for (int i = 0; i < outputInteger.length; i++){ if (clase == outputInteger[i]) ejemplos++; } return ejemplos; } /** * To compute whether the dataset is empty * @return */ public boolean empty(){ if ((this.numberInstances(list_of_classes[0]) == 0)||(this.numberInstances(list_of_classes[1]) == 0)){ return true; } return false; } /** * It prints the header of a dataset into KEEL format * @return the header of a dataset into KEEL format */ public String doHeader() { String cadena = new String(""); cadena += "@relation unknown\n"; for (int i = 0; i < this.nInputs; i++) { //Attribute a = inputs_att[i]; cadena += "@attribute " + variables[i]; if (types[i] == this.INTEGER) { cadena += " integer [" + (int) emin[i] + "," + (int) emax[i] + "]\n"; } else if (types[i] == this.REAL) { cadena += " real [" + emin[i] + "," + emax[i] + "]\n"; } else { cadena += " {"; int j; for (j = 0; j < nominales_head[i].length - 1; j++) { cadena += nominales_head[i][j] + ","; } cadena += nominales_head[i][j] + "}\n"; } } //Attribute a = output_att; cadena += "@attribute " + variables[nInputs]; cadena += " {"; int i = 0; for (; i < nClasses-1; i++){ cadena += classes[list_of_classes[i]] + ", "; } cadena += classes[list_of_classes[i]] + "}\n"; cadena += "@data\n"; return cadena; } /** * It copies the header of the dataset * @return String A string containing all the data-set information */ public String copyHeader() { String p = new String(""); p = "@relation " + Attributes.getRelationName() + "\n"; p += Attributes.getInputAttributesHeader(); p += Attributes.getOutputAttributesHeader(); p += Attributes.getInputHeader() + "\n"; p += Attributes.getOutputHeader() + "\n"; p += "@data\n"; return p; } /** * It prints the dataset into an string * @param ova a boolean variable for the OVO or OVA case. In the latter the attributes will be removed from static (new classes!) * @return a string containing the dataset into KEEL format */ public String printDataSet(boolean ova) { String cadena = new String(""); cadena += doHeader(); if (ova){ Attributes.clearAll(); } for (int i = 0; i < size(); i++) { double[] ejemplo = this.getExample(i); for (int j = 0; j < this.getnInputs(); j++) { if (this.getTipo(j) == this.NOMINAL) { cadena += nominals[j][ (int) ejemplo[j]] + ", "; } else if (this.getTipo(j) == this.INTEGER) { cadena += (int) ejemplo[j] + ", "; } else { cadena += ejemplo[j] + ", "; } } cadena += this.getOutputAsString(i) + "\n"; } return cadena; } }