/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Manuel Moreno (Universidad de C�rdoba) 01/07/2008 * @version 0.1 * @since JDK 1.5 *</p> */ package keel.Algorithms.Decision_Trees.CART.dataset; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import keel.Algorithms.Neural_Networks.NNEP_Common.data.AttributeType; import keel.Algorithms.Neural_Networks.NNEP_Common.data.CategoricalAttribute; import keel.Algorithms.Neural_Networks.NNEP_Common.data.DatasetException; import keel.Algorithms.Neural_Networks.NNEP_Common.data.IAttribute; import keel.Algorithms.Neural_Networks.NNEP_Common.data.IMetadata; import keel.Algorithms.Neural_Networks.NNEP_Common.data.IntegerNumericalAttribute; import keel.Algorithms.Neural_Networks.NNEP_Common.data.KeelDataSet; import keel.Algorithms.Neural_Networks.NNEP_Common.data.RealNumericalAttribute; import net.sf.jclec.util.intset.Interval; /** * This class helps managing the conversion from KeelDataset to DoubleTransposedDataset * */ public class DataSetManager { /** Data set header */ private static String header; /** Output attribute */ private static IAttribute outputAttribute; /** * It returns the header * * @return the header */ public static String getHeader() { return header; } /** * * It returns the output attribute * * @return the outputAttribute */ public static IAttribute getOutputAttribute() { return outputAttribute; } /** * Reads schema from the KEEL file * * @param fileName Name of the KEEL dataset file */ public static byte[] readSchema(String fileName) throws IOException, DatasetException{ KeelDataSet dataset = new KeelDataSet(fileName); dataset.open(); File file = new File(fileName); List<String> inputIds = new ArrayList<String>(); List<String> outputIds = new ArrayList<String>(); Reader reader = new BufferedReader(new FileReader(file)); String line = ((BufferedReader) reader).readLine(); StringTokenizer elementLine = new StringTokenizer(line); String element = elementLine.nextToken(); while (!element.equalsIgnoreCase("@data")){ if(element.equalsIgnoreCase("@inputs")){ while(elementLine.hasMoreTokens()){ StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(),","); while(commaTokenizer.hasMoreTokens()) inputIds.add(commaTokenizer.nextToken()); } } else if(element.equalsIgnoreCase("@outputs")){ while(elementLine.hasMoreTokens()){ StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(),","); while(commaTokenizer.hasMoreTokens()) outputIds.add(commaTokenizer.nextToken()); } } // Next line of the file line = ((BufferedReader) reader).readLine(); while(line.startsWith("%") || line.equalsIgnoreCase("")) line = ((BufferedReader) reader).readLine(); elementLine = new StringTokenizer(line); element = elementLine.nextToken(); } IMetadata metadata = dataset.getMetadata(); byte[] schema = new byte[metadata.numberOfAttributes()]; if(inputIds.isEmpty() || outputIds.isEmpty()){ for(int i=0; i<schema.length; i++){ if(i!=(schema.length-1)) schema[i] = 1; else{ outputAttribute = metadata.getAttribute(i); schema[i] = 2; //consoleReporter.setOutputAttribute(outputAttribute); } } } else{ for(int i=0; i<schema.length; i++){ if(inputIds.contains(metadata.getAttribute(i).getName())) schema[i] = 1; else if(outputIds.contains(metadata.getAttribute(i).getName())){ outputAttribute = metadata.getAttribute(i); schema[i] = 2; //consoleReporter.setOutputAttribute(outputAttribute); } else schema[i] = -1; } } StringBuffer sheader = new StringBuffer(); sheader.append("@relation " + dataset.getName() + "\n"); for(int i=0; i<metadata.numberOfAttributes(); i++){ IAttribute attribute = metadata.getAttribute(i); sheader.append("@attribute " + attribute.getName() +" "); if(attribute.getType() == AttributeType.Categorical ){ CategoricalAttribute catAtt = (CategoricalAttribute) attribute; Interval interval = catAtt.intervalValues(); sheader.append("{"); for(int j=(int)interval.getLeft(); j<=interval.size()+1; j++){ sheader.append( catAtt.show(j)+ (j!=interval.size()+1?",":"}\n")); } } else if(attribute.getType() == AttributeType.IntegerNumerical ){ IntegerNumericalAttribute intAtt = (IntegerNumericalAttribute) attribute; sheader.append("integer[" + (int) intAtt.intervalValues().getLeft() + "," + (int) intAtt.intervalValues().getRight() +"]\n"); } else if(attribute.getType() == AttributeType.DoubleNumerical ){ RealNumericalAttribute doubleAtt = (RealNumericalAttribute) attribute; sheader.append("real[" + doubleAtt.intervalValues().getLeft() + "," + doubleAtt.intervalValues().getRight() +"]\n"); } } sheader.append("@data\n"); header = sheader.toString(); // consoleReporter.setHeader(header.toString()); dataset.close(); return schema; } }