/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.util.csv;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.encog.EncogError;
/**
* Read and parse CSV format files.
*/
public class ReadCSV {
/**
* Format a date.
*
* @param date
* The date to format.
* @return The formatted date.
*/
public static String displayDate(final Date date) {
final DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
return sdf.format(date);
}
/**
* Parse a date.
*
* @param when
* The date string.
* @return The parsed date.
*/
public static Date parseDate(final String when) {
try {
final DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
return sdf.parse(when);
} catch (final ParseException e) {
return null;
}
}
/**
* The standard date format to be used.
*/
private final DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
/**
* The CSV format to use.
*/
private CSVFormat format;
/**
* The buffered reader to read the file.
*/
private final BufferedReader reader;
/**
* The names of the columns.
*/
private final Map<String, Integer> columns = new HashMap<String, Integer>();
/**
* The data.
*/
private String[] data;
/**
* The column names.
*/
private List<String> columnNames = new ArrayList<String>();
/**
* Used to parse the CSV.
*/
private ParseCSVLine parseLine;
/**
* Construct a CSV reader from an input stream. Allows a delimiter character
* to be specified. Numbers will be parsed using the current locale.
*
* @param is
* The InputStream to read from.
* @param headers
* Are headers present?
* @param delim
* What is the delimiter.
*/
public ReadCSV(final InputStream is, final boolean headers,
final char delim) {
final CSVFormat format = new CSVFormat(CSVFormat.getDecimalCharacter(),
delim);
this.parseLine = new ParseCSVLine(format);
this.reader = new BufferedReader(new InputStreamReader(is));
begin(headers, format);
}
/**
* Construct a CSV reader from an input stream. The format parameter
* specifies the separator character to use, as well as the number format.
*
* @param is
* The InputStream to read from.
* @param headers
* Are headers present?
* @param format
* What is the CSV format.
*/
public ReadCSV(final InputStream is, final boolean headers,
final CSVFormat format) {
this.reader = new BufferedReader(new InputStreamReader(is));
begin(headers, format);
}
/**
* Construct a CSV reader from a filename. The format parameter specifies
* the separator character to use, as well as the number format.
*
* @param file
* The file.
* @param headers
* The headers.
* @param delim
* The delimiter.
*/
public ReadCSV(final File file, final boolean headers,
final char delim) {
try {
final CSVFormat format = new CSVFormat(CSVFormat
.getDecimalCharacter(), delim);
this.parseLine = new ParseCSVLine(format);
this.reader = new BufferedReader(new FileReader(file));
begin(headers, format);
} catch (final IOException e) {
throw new EncogError(e);
}
}
public ReadCSV(final String filename, final boolean headers,
final char delim) {
this(new File(filename),headers,delim);
}
public ReadCSV(final String filename, final boolean headers,
final CSVFormat format) {
this(new File(filename),headers,format);
}
/**
* Construct a CSV reader from a filename. Allows a delimiter character to
* be specified.
*
* @param file
* The file.
* @param headers
* The headers.
* @param format
* The format.
*/
public ReadCSV(final File file, final boolean headers,
final CSVFormat format) {
try {
this.reader = new BufferedReader(new FileReader(file));
this.parseLine = new ParseCSVLine(format);
begin(headers, format);
} catch (final IOException e) {
throw new EncogError(e);
}
}
/**
* Reader the headers.
*
* @param headers
* Are headers present.
* @param format The format to use.
*/
private void begin(final boolean headers, final CSVFormat format) {
try {
this.parseLine = new ParseCSVLine(format);
this.format = format;
// read the column heads
if (headers) {
final String line = this.reader.readLine();
// Are we trying to parse an empty file?
if( line==null ) {
this.columnNames.clear();
return;
}
final List<String> tok = this.parseLine.parse(line);
int i = 0;
this.columnNames.clear();
for (final String header : tok) {
this.columnNames.add(header.toLowerCase());
this.columns.put(header.toLowerCase(), i++);
}
}
this.data = null;
} catch (final IOException e) {
throw new EncogError(e);
}
}
/**
* Close the file.
*
*/
public void close() {
try {
this.reader.close();
} catch (final IOException e) {
throw new EncogError(e);
}
}
/**
* Get the specified column as a string.
*
* @param i
* The column index, starting at zero.
* @return The column as a string.
*/
public String get(final int i) {
if( i>=this.data.length ) {
throw new EncogError("Can't access column " + i + " in a file that has only " + data.length + " columns.");
}
return this.data[i];
}
/**
* Get the column by its string name, as a string. This will only work if
* column headers were defined that have string names.
*
* @param column
* The column name.
* @return The column data as a string.
*/
public String get(final String column) {
final Integer i = this.columns.get(column.toLowerCase());
if (i == null) {
return null;
}
return this.data[i.intValue()];
}
/**
* Get the column count.
*
* @return The column count.
*/
public int getColumnCount() {
if (this.data == null) {
return 0;
}
return this.data.length;
}
/**
* Get the column as a date.
*
* @param column
* The column header name.
* @return The column as a date.
*/
public Date getDate(final String column) {
try {
final String str = get(column);
return this.sdf.parse(str);
} catch (final ParseException e) {
throw new EncogError(e);
}
}
/**
* Get the column as a double specified by index.
*
* @param index
* The column index, starting at zero.
* @return The data at the specified column.
*/
public double getDouble(final int index) {
final String str = get(index);
return this.format.parse(str);
}
/**
* Get the specified column as a double.
*
* @param column
* The column name that we want to get.
* @return The column data as a double.
*/
public double getDouble(final String column) {
final String str = get(column);
return this.format.parse(str);
}
/**
* Obtain a column as an integer referenced by a string.
*
* @param i
* The column header name being read.
* @return The column data as an integer.
*/
public int getInt(final int i) {
final String str = get(i);
try {
return this.format.getNumberFormatter().parse(str).intValue();
} catch (final ParseException e) {
throw new CSVError(e);
}
}
/**
* Count the columns and create a an array to hold them.
*
* @param line
* One line from the file
*/
private void initData(final String line) {
final List<String> tok = this.parseLine.parse(line);
this.data = new String[tok.size()];
}
/**
* Read the next line.
*
* @return True if there are more lines to read.
*/
public boolean next() {
try {
String line = null;
do {
line = this.reader.readLine();
} while( (line!=null) && line.trim().length()==0 );
if (line == null) {
return false;
}
if (this.data == null) {
initData(line);
}
final List<String> tok = this.parseLine.parse(line);
int i = 0;
for (final String str : tok) {
if (i < this.data.length) {
this.data[i++] = str;
}
}
return true;
} catch (final IOException e) {
throw new EncogError(e);
}
}
public List<String> getColumnNames() {
return this.columnNames;
}
public CSVFormat getFormat() {
return this.format;
}
public boolean hasMissing() {
for(int i=0;i<this.data.length;i++ ) {
String s = this.data[i].trim();
if( s.length()==0 || s.equals("?") ) {
return true;
}
}
return false;
}
}