/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.GraphInterKeel.datacf.util; import java.util.Vector; import keel.Dataset.*; /** * <p> * @author Jesus Alcala Fernandez 24-6-2004 * @author Modified by Pedro Antonio Gutiérrez and Juan Carlos Fernández (University of Córdoba) 23/10/2008 * @version 1.0 * @since JDK1.5 * </p> */ public class Dataset { /** * Class for representing the information contained in a Dataset */ /** Example number */ private int nData; /** Variable number */ private int nVariables; /** Input number */ protected int nInputs; /** Output number */ protected int nOutputs; /** Relation name */ private String relation; /** Data matrix */ private Vector dataVector; /** Column names */ private Vector attributes; /** Data types */ private Vector types; /** Type element names */ private Vector ranges; /** Selected input variables */ private Vector selInputs; /** Selected output variables */ private Vector selOutputs; /** Debuggin flag */ final static boolean debug = true; /** * <p> * Return the relation name * </p> * @return relation name */ public String getRelacion() { return (relation); } /** * <p> * Returns a vector of vectors in which each vector is an example/pattern * Note: each value is stored as a String (must be converted) * </p> * @return Vector of vector for patterns */ public Vector getDataVector() { return (dataVector); } /** * <p> * Returns a vector with variable names * </p> * @return Vector with variable names */ public Vector getAttributes() { return (attributes); } /** * <p> * Return a vector with variable types (integer, real, nominal) * </p> * @return Vector with variable types (integer, real, nominal) */ public Vector getTypes() { return (types); } /** * <p> * Return a vector of vectors in which each vector contains the ranges * for the variable * </p> * @return Vector of vectors in which each vector contains the ranges * for the variable */ public Vector getRanges() { return (ranges); } /** * <p> * Return a vector that contains input variables * </p> * @return Vector that contains input variables */ public Vector getInputs() { return (selInputs); } /** * <p> * Return a vector that contains output variables * </p> * @return Vector that contains output variables */ public Vector getOutputs() { return (selOutputs); } /** * <p> * Return variable number * </p> * * @return Variable number */ public int getNVariables() { return (nVariables); } /** * <p> * Sets input variable number * </p> * @param nVariables Input variable number */ public void setNVariables(int nVariables) { this.nVariables = nVariables; } /** * <p> * Return input variable number * </p> * @return Input variable number */ public int getNInputs() { return (nInputs); } /** * <p> * Set input variable number * </p> * @param nInputs New Input Variable Number */ public void setNentradas(int nInputs) { this.nInputs = nInputs; } /** * <p> * Return output variable number * </p> * @return Output variable number */ public int getNOutputs() { return (nOutputs); } /** * <p> * Set output variable number * </p> * @param nOutputs New output variable number */ public void setNOutputs(int nOutputs) { this.nOutputs = nOutputs; } /** * <p> * Return example/patterns number * </p> * @return Example/patterns number */ public int getNData() { return (nData); } /** * <p> * Return attribute name at index position * </p> * @param index Index position * @return Attribute name at index position */ public String getAttributeIndex(int index) { return ((String) attributes.elementAt(index)); } /** * <p> * Return attribute type at index position * </p> * @param index Index position * @return Attribute type at index position */ public String getAttributeTypeIndex(int index) { return ((String) types.elementAt(index)); } /** * <p> * Return example/pattern at index position * </p> * @param index Index position * @return Example/pattern at index position */ public Vector getPatternIndex(int index) { return ((Vector) dataVector.elementAt(index)); } /** * <p> * Return data at position (i,j) * </p> * @param i position i * @param j position j * @return Data at position (i,j) */ public String getDataIndex(int i, int j) { return ((String) ((Vector) dataVector.elementAt(i)).elementAt(j)); } /** * <p> * Return range of index variable * </p> * @param index Index position * @return Range of index variable */ public Vector getRange(int index) { return ((Vector) ranges.elementAt(index)); } /** * <p> * Return range value at index of var variable * </p> * @param var Number of int variable * @param index Index position * @return Range value at index of var variable */ public Integer getRangesInt(int var, int index) { return ((Integer) ((Vector) ranges.elementAt(var)).elementAt(index)); } /** * <p> * Return range value at index of var variable * </p> * @param var Number of double variable * @param index Index position * @return Range value at index of var variable */ public Double getRangesReal(int var, int index) { return ((Double) ((Vector) ranges.elementAt(var)).elementAt(index)); } /** * <p> * Returns range value at index of var variable * </p> * @param var Number of enum variable * @param index Index position * @return Range value at index of var variable */ public String getRangesEnum(int var, int index) { return ((String) ((Vector) ranges.elementAt(var)).elementAt(index)); } /** * <p> * Return the range of a variable * </p> * @param variableName Name of the variable * @return Range of the variable */ public Vector getRangesVar(String variableName) { int i; for (i = 0; i < nVariables; i++) { if (variableName.equals(attributes.elementAt(i))) { return ((Vector) ranges.elementAt(i)); } } return (null); } /** * <p> * Read a DataSet * </p> * @param fileName File Name with a dataset */ public Dataset(String fileName) { try { dataVector = new Vector(); attributes = new Vector(); types = new Vector(); ranges = new Vector(); selInputs = new Vector(); selOutputs = new Vector(); nVariables = nInputs = nOutputs = nData = 0; Attributes.clearAll(); InstanceSet data = new InstanceSet(); System.out.println("Before reading the dataset"); data.readSet(fileName, true); System.out.println("After reading the dataset"); data.setAttributesAsNonStatic(); InstanceAttributes attrib = data.getAttributeDefinitions(); relation = fileName.replaceAll(".dat", ""); relation=relation.substring(relation.lastIndexOf("\\")+1, relation.length()); nData = data.getNumInstances(); nVariables = attrib.getNumAttributes(); nInputs = attrib.getInputNumAttributes(); nOutputs = attrib.getOutputNumAttributes(); for (int i = 0; i < nVariables; i++) { keel.Dataset.Attribute a = attrib.getAttribute(i); // name attributes.addElement(a.getName()); // type int tipo = a.getType(); if (tipo == keel.Dataset.Attribute.NOMINAL) { types.addElement(new String("nominal")); } else if (tipo == keel.Dataset.Attribute.REAL) { types.addElement(new String("real")); } else if (tipo == keel.Dataset.Attribute.INTEGER) { types.addElement(new String("integer")); } // rank if (tipo == keel.Dataset.Attribute.NOMINAL) { ranges.addElement(a.getNominalValuesList()); } else if (tipo == keel.Dataset.Attribute.REAL) { Vector r = new Vector(); r.addElement(new Double(a.getMinAttribute())); r.addElement(new Double(a.getMaxAttribute())); ranges.addElement(r); } else if (tipo == keel.Dataset.Attribute.INTEGER) { Vector r = new Vector(); r.addElement(new Integer((int) a.getMinAttribute())); r.addElement(new Integer((int) a.getMaxAttribute())); ranges.addElement(r); } // function int funcion = a.getDirectionAttribute(); if (funcion == keel.Dataset.Attribute.INPUT) { selInputs.addElement(a.getName()); } else if (funcion == keel.Dataset.Attribute.OUTPUT) { selOutputs.addElement(a.getName()); } } // data for (int i = 0; i < nData; i++) { Instance inst = data.getInstance(i); Vector fila = new Vector(); int ent = 0; int sal = 0; for (int j = 0; j < nVariables; j++) { int funcion = attrib.getAttribute(j).getDirectionAttribute(); if (funcion == keel.Dataset.Attribute.INPUT) { int tipo = attrib.getAttribute(j).getType(); if (tipo == keel.Dataset.Attribute.NOMINAL) { if (inst.getInputMissingValues(ent) == false) { fila.addElement(inst.getInputNominalValues(ent)); } else { fila.addElement(null); } } else if (tipo == keel.Dataset.Attribute.REAL) { if (inst.getInputMissingValues(ent) == false) { fila.addElement(Double.toString(inst.getInputRealValues(ent))); } else { fila.addElement(null); } } else if (tipo == keel.Dataset.Attribute.INTEGER) { if (inst.getInputMissingValues(ent) == false) { fila.addElement(Integer.toString((int) inst.getInputRealValues(ent))); } else { fila.addElement(null); } } ent++; } else if (funcion == keel.Dataset.Attribute.OUTPUT) { int tipo = attrib.getAttribute(j).getType(); if (tipo == keel.Dataset.Attribute.NOMINAL) { fila.addElement(inst.getOutputNominalValues(sal)); } else if (tipo == keel.Dataset.Attribute.REAL) { fila.addElement(Double.toString(inst.getOutputRealValues(sal))); } else if (tipo == keel.Dataset.Attribute.INTEGER) { fila.addElement(Integer.toString((int) inst.getOutputRealValues(sal))); } sal++; } } dataVector.addElement(fila); } } catch (Exception e) { } } }