/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.neural.data.csv;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.encog.neural.data.NeuralData;
import org.encog.neural.data.NeuralDataError;
import org.encog.neural.data.NeuralDataPair;
import org.encog.neural.data.NeuralDataSet;
import org.encog.neural.data.basic.BasicNeuralData;
import org.encog.neural.data.basic.BasicNeuralDataPair;
import org.encog.util.csv.ReadCSV;
import org.encog.util.csv.CSVFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An implementation of the NeuralDataSet interface designed to provide a CSV
* file to the neural network. This implementation uses the BasicNeuralData to
* hold the data being read. This class has no ability to write CSV files. The
* columns of the CSV file will specify both the input and ideal columns.
*
* This class is not memory based, so very long files can be used, without
* running out of memory.
*
* This class can be slow, as it can take awhile to parse all of the numbers in
* a really long CSV file. Because of this it is best not to use this set
* directly with a training algorithm. Rather use it to load a
* BufferedNeuralDataSet.
*
* @author jheaton
*/
public class CSVNeuralDataSet implements NeuralDataSet {
/**
* An iterator designed to read from CSV files.
*
* @author jheaton
*/
public class CSVNeuralIterator implements Iterator<NeuralDataPair> {
/**
* A ReadCSV object used to parse the CSV file.
*/
private ReadCSV reader;
/**
* Is there data that has been read and is ready?
*/
private boolean dataReady;
/**
* Default constructor. Create a new iterator from the parent class.
*/
public CSVNeuralIterator() {
this.reader = null;
this.reader = new ReadCSV(CSVNeuralDataSet.this.filename,
CSVNeuralDataSet.this.headers,
CSVNeuralDataSet.this.format);
this.dataReady = false;
}
/**
* Close the iterator, and the underlying CSV file.
*/
public void close() {
this.reader.close();
}
/**
* Determine if there is more data to be read.
*
* @return True if there is more data to be read.
*/
public boolean hasNext() {
if (this.reader == null) {
return false;
}
if (this.dataReady) {
return true;
}
if (this.reader.next()) {
this.dataReady = true;
return true;
}
this.dataReady = false;
return false;
}
/**
* Read the next record from the CSV file.
*
* @return The next data pair read.
*/
public NeuralDataPair next() {
final NeuralData input = new BasicNeuralData(
CSVNeuralDataSet.this.inputSize);
NeuralData ideal = null;
for (int i = 0; i < CSVNeuralDataSet.this.inputSize; i++) {
input.setData(i, this.reader.getDouble(i));
}
if (CSVNeuralDataSet.this.idealSize > 0) {
ideal = new BasicNeuralData(CSVNeuralDataSet.this.idealSize);
for (int i = 0; i < CSVNeuralDataSet.this.idealSize; i++) {
ideal.setData(i, this.reader.getDouble(i
+ CSVNeuralDataSet.this.inputSize));
}
}
this.dataReady = false;
return new BasicNeuralDataPair(input, ideal);
}
/**
* Removes are not supported.
*/
public void remove() {
if (CSVNeuralDataSet.this.logger.isErrorEnabled()) {
CSVNeuralDataSet.this.logger
.error("Called remove, unsupported operation.");
}
throw new UnsupportedOperationException();
}
}
/**
* Error message indicating that adds are not supported.
*/
public static final String ADD_NOT_SUPPORTED =
"Adds are not supported with this dataset, it is read only.";
/**
* The logging object.
*/
private final Logger logger = LoggerFactory.getLogger(this.getClass());
/**
* The CSV filename to read from.
*/
private final String filename;
/**
* The number of columns of input data.
*/
private final int inputSize;
/**
* The number of columns of ideal data.
*/
private final int idealSize;
/**
* The format of this CSV file.
*/
private final CSVFormat format;
/**
* Specifies if headers are present on the first row.
*/
private final boolean headers;
/**
* A collection of iterators that have been created.
*/
private final List<CSVNeuralIterator> iterators =
new ArrayList<CSVNeuralIterator>();
/**
* Construct this data set using a comma as a delimiter.
*
* @param filename
* The CSV filename to read.
* @param inputSize
* The number of columns that make up the input set. *
* @param idealSize
* The number of columns that make up the ideal set.
* @param headers
* True if headers are present on the first line.
*/
public CSVNeuralDataSet(final String filename, final int inputSize,
final int idealSize, final boolean headers) {
this(filename, inputSize, idealSize, headers, CSVFormat.ENGLISH);
}
/**
* Construct this data set using a comma as a delimiter.
*
* @param filename
* The CSV filename to read.
* @param inputSize
* The number of columns that make up the input set. *
* @param idealSize
* The number of columns that make up the ideal set.
* @param headers
* True if headers are present on the first line.
* @param format
* What CSV format to use.
*/
public CSVNeuralDataSet(final String filename, final int inputSize,
final int idealSize, final boolean headers,
final CSVFormat format) {
this.filename = filename;
this.inputSize = inputSize;
this.idealSize = idealSize;
this.format = format;
this.headers = headers;
}
/**
* Adds are not supported.
*
* @param data1
* Not used.
*/
public void add(final NeuralData data1) {
if (this.logger.isErrorEnabled()) {
this.logger.error(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
throw new NeuralDataError(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
/**
* Adds are not supported.
*
* @param inputData
* Not used.
* @param idealData
* Not used.
*/
public void add(final NeuralData inputData, final NeuralData idealData) {
if (this.logger.isErrorEnabled()) {
this.logger.error(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
throw new NeuralDataError(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
/**
* Adds are not supported.
*
* @param inputData
* Not used.
*/
public void add(final NeuralDataPair inputData) {
if (this.logger.isErrorEnabled()) {
this.logger.error(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
throw new NeuralDataError(CSVNeuralDataSet.ADD_NOT_SUPPORTED);
}
/**
* Close any iterators from this dataset.
*/
public void close() {
for (final CSVNeuralIterator iterator : this.iterators) {
iterator.close();
}
}
/**
* @return the filename
*/
public String getFilename() {
return this.filename;
}
/**
* @return the delimiter
*/
public CSVFormat getFormat() {
return this.format;
}
/**
* @return The size of the ideal data.
*/
public int getIdealSize() {
return this.idealSize;
}
/**
* @return The size of the input data.
*/
public int getInputSize() {
return this.inputSize;
}
/**
* Get an iterator to use with the CSV data.
*
* @return An iterator.
*/
public Iterator<NeuralDataPair> iterator() {
return new CSVNeuralIterator();
}
/**
* @return True if this training data is supervised.
*/
@Override
public boolean isSupervised() {
return this.idealSize>0;
}
}