/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* * Exporter.java */ package keel.Algorithms.Preprocess.Converter; import keel.Dataset.*; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.File; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.Ostermiller.util.CSVParser; /** * <p> * <b> Exporter </b> * </p> * * Clase abstracta que contiene los métodos para exportar un fichero de * datos con formato keel a ficheros de distintos formatos. * * @author Teresa Prieto López (UCO) * @version 1.0 */ public abstract class Exporter { // Almacena la definición de los atributos del fichero. keel.Dataset.Attribute attribute[]; // Almacena los datos del fichero. Vector data[]; //Almacena el tipo de cada dato del fichero. Vector types[]; // Almacena el valor nulo del fichero de datos a crear. String nullValue = new String(); // Almacena el nombre de la relación del fichero String nameRelation = new String(); // Almacena el separador de los datos para el nuevo fichero generado. String separator = new String(); // Almacena el número de atributos existentes en el fichero de datos int numAttributes = 0; // Etiqueta para valores nominales. int NOMINAL = 0; // Etiqueta para valores enteros. int INTEGER = 1; // Etiqueta para valores reales. int REAL = 2; /* * Este método lee los datos almacenados en un fichero con formato keel * correspondiente al parámetro de entrada pathnameInput y * cargar la definición de los atributos en un vector de objetos de la * clase Attribute (del paquete keel.Dataset), los datos en el vector * data[], y el nombre de la relación en la variable miembro * nameRelation y el número de atributos en la variable miembro * numAttributes. * * @param String pathnameOutput Indica la ruta del fichero de entrada con formato Keel. * * @throws Exception */ public void Start(String pathnameInput) throws Exception { BufferedReader reader; Pattern p; Matcher m; File f; StringTokenizer token; String line = new String(); String tokenInitial = new String(); String nameAttribute = new String(); String typeAttribute = new String(); String element = new String(); String lineReduced = new String(); String filename = "tempOf"; int i; int j; int indexInitial = 0; int indexSecond = 0; int type = -1; File fileInput = new File(pathnameInput); filename = filename.concat(fileInput.getName()); reader = new BufferedReader(new FileReader(pathnameInput)); BufferedWriter auxFile = new BufferedWriter(new FileWriter(filename)); while ((line = reader.readLine()) != null) { p = Pattern.compile("\\s*,\\s*"); m = p.matcher(line); line = m.replaceAll(","); p = Pattern.compile("^\\s+"); m = p.matcher(line); line = m.replaceAll(""); p = Pattern.compile("\\s+$"); m = p.matcher(line); line = m.replaceAll(""); p = Pattern.compile("\\s+"); m = p.matcher(line); line = m.replaceAll(" "); auxFile.write(line + "\n"); } auxFile.close(); reader.close(); reader = new BufferedReader(new FileReader(filename)); /* Contamos el número de atributos que existen*/ line = reader.readLine(); token = new StringTokenizer(line, " "); while (!(line.equalsIgnoreCase("@data"))) { if (line.startsWith("@")) { tokenInitial = token.nextToken().toLowerCase(); if (tokenInitial.equals("@attribute")) { numAttributes++; } if (tokenInitial.equals("@relation")) { nameRelation = token.nextToken(); } } line = reader.readLine(); token = new StringTokenizer(line, " "); }// end while() reader.close(); /* Reservamos memoria para guardar la informacion de los atributos*/ attribute = new Attribute[numAttributes]; data = new Vector[numAttributes]; for (i = 0; i < numAttributes; i++) { attribute[i] = new Attribute(); data[i] = new Vector(); } // Insertamos la definición de los atributos en Attribute reader = new BufferedReader(new FileReader(filename)); line = reader.readLine(); i = -1; while (!(line.equalsIgnoreCase("@data"))) { if (line.startsWith("@")) { token = new StringTokenizer(line, " "); tokenInitial = token.nextToken(); if (tokenInitial.equalsIgnoreCase("@attribute")) { i++; nameAttribute = token.nextToken(); if (nameAttribute.startsWith("'")) { indexInitial = line.indexOf("\'"); indexSecond = line.indexOf("\'", indexInitial + 1); nameAttribute = line.substring(indexInitial, indexSecond + 1); } else { if (nameAttribute.contains("{")) { nameAttribute = nameAttribute.substring(0, nameAttribute.indexOf("{")); } } indexSecond = line.indexOf(nameAttribute) + nameAttribute.length(); if (nameAttribute.contains(" ") && !nameAttribute.startsWith("\'")) { nameAttribute = "'" + nameAttribute + "'"; } attribute[i].setName(nameAttribute); lineReduced = line.substring(indexSecond, line.length()); p = Pattern.compile("^\\s+"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); p = Pattern.compile("\\s+$"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); token = new StringTokenizer(lineReduced, " "); if (token.hasMoreTokens()) { typeAttribute = token.nextToken().toLowerCase(); if (typeAttribute.startsWith("real")) { attribute[i].setType(REAL); } else { if (typeAttribute.startsWith("integer")) { attribute[i].setType(INTEGER); } else { attribute[i].setType(NOMINAL); if (line.contains("{") && line.contains("}")) { lineReduced = line.substring(line.indexOf("{") + 1, line.indexOf("}")); p = Pattern.compile("^\\s+"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); p = Pattern.compile("\\s+$"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); if (lineReduced != "") { StringTokenizer listValues = new StringTokenizer(lineReduced, ","); while (listValues.hasMoreTokens()) { attribute[i].addNominalValue(listValues.nextToken()); } } } }//end else }//end else }//end if type = attribute[i].getType(); if (type == REAL || type == INTEGER) { if (line.contains("[") && line.contains("]")) { lineReduced = line.substring(line.indexOf("[") + 1, line.indexOf("]")); p = Pattern.compile("^\\s+"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); p = Pattern.compile("\\s+$"); m = p.matcher(lineReduced); lineReduced = m.replaceAll(""); if (lineReduced != "") { StringTokenizer range = new StringTokenizer(lineReduced, ","); if (type == REAL) { attribute[i].setBounds(Double.valueOf(range.nextToken()), Double.valueOf(range.nextToken())); } if (type == INTEGER) { attribute[i].setBounds(Integer.valueOf(range.nextToken()), Integer.valueOf(range.nextToken())); } } } } }//end if() }//end if() line = reader.readLine(); }//end while() /* Almacenamos los datos en un fichero temporal para luego poder ser parseado con CSVParser por ',' */ BufferedWriter writer = new BufferedWriter(new FileWriter("temp")); while ((line = reader.readLine()) != null) { // Saltamos las líneas comentadas if (!line.startsWith("%") && !line.equals("\n") && !line.equals("\r") && !line.equals("")) { line = line.replace("'", "\""); writer.write(line + "\n"); } } writer.close(); reader.close(); FileReader filereader = new FileReader("temp"); String[][] values = CSVParser.parse(filereader, ','); filereader.close(); for (i = 0; i < values.length; i++) { for (j = 0; j < numAttributes; j++) { element = values[i][j]; element = element.replace("\r", " "); element = element.replace("\n", " "); type = attribute[j].getType(); if (element.equalsIgnoreCase("?") || element.equalsIgnoreCase("<null>") || element.equals("")) { data[j].addElement(nullValue); } else { data[j].addElement(element); } } } /* Recogemos la lista de valores nominales de los datos, para aquellos atributos que no hayan definido la lista en la declaración */ for (i = 0; i < numAttributes; i++) { type = attribute[i].getType(); if (type == NOMINAL && attribute[i].getNumNominalValues() == 0) { for (j = 0; j < data[0].size(); j++) { element = (String) data[i].elementAt(j); if (!(attribute[i].isNominalValue(element))) { attribute[i].addNominalValue(element); } } } } f = new File(filename); f.delete(); f = new File("temp"); f.delete(); }//end Start() }//end Class Exporter