/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Neural_Networks.NNEP_Common.data;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* <p>
* @author Written by Amelia Zafra, Sebastian Ventura (University of Cordoba) 17/07/2007
* @version 0.1
* @since JDK1.5
* </p>
*/
public class ArffDataSet extends FileDataset{
/**
* <p>
* ArffDataset implementation (Weka dataset).
* </p>
*/
/////////////////////////////////////////////////////////////////
// --------------------------------------- Serialization constant
/////////////////////////////////////////////////////////////////
/** Generated by Eclipse */
private static final long serialVersionUID = 1L;
/////////////////////////////////////////////////////////////////////////
// ------------------------------------------------- Internal Variables
/////////////////////////////////////////////////////////////////////////
/** The keyword used to denote the relation name */
static String ARFF_RELATION = "@relation";
/** The keyword used to denote the attribute description */
static String ARFF_ATTRIBUTE = "@attribute";
/** The keyword used to denote the start of the arff data section */
static String ARFF_DATA = "@data";
/** Symbol which represents missed values */
protected String missedValue;
/** Symbol which represents commentted values */
protected String commentedValue;
/** Symbol which represents the separation between values */
protected String separationValue;
/** Buffer Instance */
protected String bufferInstance = new String();
/////////////////////////////////////////////////////////////////
// ------------------------------------------------ Constructor
/////////////////////////////////////////////////////////////////
/**
* <p>
* Constructor with the filename and the specification file
* </p>
* @param fileName Name of the dataset file
* @param specificationFile Specification file
*/
public ArffDataSet(String fileName, String ...specificationFile){
super(fileName);
missedValue = "?";
separationValue = ",";
commentedValue = "%";
//Generate the specification from header of data source file
//obtainMetadata(fileName);
}
/**
* <p>
* Constructor without arguments
* </p>
*/
public ArffDataSet( ){
super();
missedValue = "?";
separationValue = ",";
commentedValue = "%";
//Generate the specification from header of data source file
//obtainMetadata(fileName);
}
/////////////////////////////////////////////////////////////////
// ------------------------- Overwriting FileDataset methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Open dataset
* </p>
* @throws DatasetException If dataset can't be opened
*/
@Override
public void open(){
// Generate the specification from header of data source file
obtainMetadata(fileName);
// Initialize variables
cursorPosition = 0;
cursorInstance = new AbstractDataset.Instance();
}
/**
* <p>
* Reset dataset
* </p>
* @throws DatasetException if a source access error occurs
*/
@Override
public void reset(){
try {
fileReader.close();
fileReader = new BufferedReader(new FileReader(new File(fileName)));
//Read until finding the sentence @DATA
String line = ((BufferedReader) fileReader).readLine();
while (!line.equalsIgnoreCase(ARFF_DATA)){
line = ((BufferedReader) fileReader).readLine();
}
bufferInstance = ((BufferedReader) fileReader).readLine();
while(bufferInstance.startsWith(commentedValue) || bufferInstance.equalsIgnoreCase("")){
bufferInstance = ((BufferedReader) fileReader).readLine();
}
cursorPosition = 0;
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e){
e.printStackTrace();
}
}
/**
* <p>
* Return the next instance
* </p>
* @return The next instance
* @throws DatasetException if a source access error occurs
*/
@Override
public boolean next() throws DatasetException {
if(bufferInstance != null){
try{
cursorPosition++;
//Get the attributes of this instance
StringTokenizer token = new StringTokenizer(bufferInstance, separationValue);
int numAttributes = 0;
//AbstractDataset.Instance instance = new AbstractDataset.Instance();
while(token.hasMoreTokens()){
IAttribute attribute = metadata.getAttribute(numAttributes);
double value = attribute.parse(token.nextToken());
cursorInstance.setValue(numAttributes, value);
numAttributes++;
}
//cursorInstance = instance;
prepareNextInstance();
}catch(Exception e){ e.printStackTrace();}
return true;
}
else
return false;
}
/**
* <p>
* Returns cursor instance
* </p>
* @return Actual instance (if exists)
* @throws DatasetException if a source access error occurs
*/
@Override
public AbstractDataset.Instance read() throws DatasetException {
return cursorInstance;
}
/**
* <p>
* Close dataset
* </p>
* @throws DatasetException If dataset can't be closed
*/
@Override
public void close() throws DatasetException {
try {
fileReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/////////////////////////////////////////////////////////////////
// ----------------------------------------------------- Methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Generate the dataset specification
* </p>
* @param file Name of data source file
*/
private void obtainMetadata(String file){
File f1 = new File(file);
metadata = new Metadata();
try {
fileReader = new BufferedReader(new FileReader(f1));
//Read until finding the sentence @DATA
String line = ((BufferedReader) fileReader).readLine();
int indexAttribute = 0;
StringTokenizer elementLine = new StringTokenizer(line);
String element = elementLine.nextToken();
while (!element.equalsIgnoreCase(ARFF_DATA)){
if(element.equalsIgnoreCase(ARFF_ATTRIBUTE)){
//The next attribute
indexAttribute++;
String name = elementLine.nextToken();
String type = elementLine.nextToken();
addAttributeToSpecification(type, name);
}
if(element.equalsIgnoreCase(ARFF_RELATION)){
setName(elementLine.nextToken());
}
//Next line of the file
line = ((BufferedReader) fileReader).readLine();
while(line.startsWith(commentedValue) || line.equalsIgnoreCase(""))
line = ((BufferedReader) fileReader).readLine();
elementLine = new StringTokenizer(line);
element = elementLine.nextToken();
}
bufferInstance = ((BufferedReader) fileReader).readLine();
while(bufferInstance.startsWith(commentedValue) || bufferInstance.equalsIgnoreCase("")){
bufferInstance = ((BufferedReader) fileReader).readLine();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e){
e.printStackTrace();
}
}
/**
* <p>
* Store the next instance in bufferInstance
* </p>
*/
private void prepareNextInstance(){
try
{
//Get the next instance
String lineInstance = ((BufferedReader) fileReader).readLine();
while(lineInstance.startsWith(commentedValue) || lineInstance.equalsIgnoreCase("")){
lineInstance = ((BufferedReader) fileReader).readLine();
}
bufferInstance = lineInstance;
}catch(Exception e){
bufferInstance = null;
}
}
/**
* <p>
* Add new attribute to the dataset specification
* </p>
* @param type Attribute type
* @param name Attribute name
*/
private void addAttributeToSpecification(String type, String name){
// If the attribute is numerical
if(type.equalsIgnoreCase("REAL") || type.equalsIgnoreCase("NUMERIC") || type.equalsIgnoreCase("INTEGER")){
RealNumericalAttribute attribute = new RealNumericalAttribute();
attribute.setName(name);
//Add new attribute to the specification
metadata.addAttribute(attribute);
}
else if(type.equalsIgnoreCase("DATE")){
// Nothing
}
else
{
//Obtain the categorical values
int minIndex = type.indexOf("{");
int maxIndex = type.indexOf("}");
type = type.substring(minIndex+1, maxIndex);
if(minIndex < maxIndex){
CategoricalAttribute attribute = new CategoricalAttribute();
attribute.setName(name);
StringTokenizer categories = new StringTokenizer(type, ",");
while(categories.hasMoreTokens())
attribute.addValue(categories.nextToken());
//Add new attribute to the specification
metadata.addAttribute(attribute);
}
}
}
}