package jeql.command.io;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import jeql.api.error.IllegalValueException;
import jeql.api.error.InvalidInputException;
import jeql.api.error.JeqlException;
import jeql.api.row.BasicRow;
import jeql.api.row.Row;
import jeql.api.row.RowIterator;
import jeql.api.row.RowList;
import jeql.api.row.RowSchema;
/**
* A {@link RowList} providing rows from a CSV file
* @author Martin Davis
*
*/
public class CSVRowList
implements RowList
{
public static final int NO_COL_NAMES = 1;
public static final int SKIP_COL_NAMES = 2;
public static final int USE_COL_NAMES = 3;
private String filename;
private int colNameStrategy = NO_COL_NAMES;
private RowSchema schema = null;
private char colSeparator = 0;
private int numColumns = -1;
public CSVRowList(String filename)
throws IOException
{
this.filename = filename;
schema = readSchema(filename);
}
public CSVRowList(String filename, int colNameStrategy, int numColumns, char separator)
throws IOException
{
this.filename = filename;
this.colNameStrategy = colNameStrategy;
this.numColumns = numColumns;
colSeparator = separator;
schema = readSchema(filename, numColumns);
}
public RowSchema getSchema()
{
return schema;
}
private RowSchema readSchema(String filename) throws IOException {
SchemaExtracter se = new SchemaExtracter(filename, colNameStrategy == USE_COL_NAMES, colSeparator);
return se.getSchema();
}
private RowSchema readSchema(String filename, int numColumns) throws IOException {
SchemaExtracter se = new SchemaExtracter(filename, colNameStrategy == USE_COL_NAMES, colSeparator);
se.setNumColumns(numColumns);
return se.getSchema();
}
public RowIterator iterator()
{
CSVRowIterator it = new CSVRowIterator(schema, filename, colNameStrategy != NO_COL_NAMES);
if (colSeparator > 0) it.setColSep(colSeparator);
return it;
}
private static class CSVRowIterator
implements RowIterator
{
private CSVRecordParser csvLineParser = new CSVRecordParser();
private String filename;
private boolean hasColNames = true;
private RowSchema schema;
private LineNumberReader lineReader = null;
private String line = null;
private boolean isClosed = false;
private int rowCount = 0;
public CSVRowIterator(RowSchema schema, String filename, boolean hasColNames)
{
this.schema = schema;
this.filename = filename;
this.hasColNames = hasColNames;
init();
}
public void setColSep(char separator)
{
csvLineParser.setColSep(separator);
}
public RowSchema getSchema()
{
return schema;
}
private void init()
{
if (isClosed) return;
if (lineReader != null) return;
try {
lineReader = new LineNumberReader(new FileReader(filename));
}
catch (FileNotFoundException ex) {
throw new JeqlException(ex);
}
if (hasColNames)
readLine();
}
private String readLine()
{
try {
return lineReader.readLine();
}
catch (IOException ex) {
throw new JeqlException(ex);
}
//return null;
}
public Row next()
{
init();
line = readLine();
rowCount++;
// if at end, can close input
if (line == null) {
close();
return null;
}
Row row;
try {
row = createRow(line);
}
catch (InvalidInputException e) {
throw new InvalidInputException(e, rowCount);
}
return row;
}
private void close()
{
if (lineReader != null) {
try {
lineReader.close();
}
catch (IOException ex) {
// eat this exception - nothing we can do about it anyway
}
}
lineReader = null;
isClosed = true;
}
private Row createRow(String line)
{
BasicRow row = new BasicRow(schema.size());
String[] vals = csvLineParser.parse(line);
int nToCopy = vals.length;
if (nToCopy > row.size())
nToCopy = row.size();
for (int i = 0; i < nToCopy; i++) {
row.setValue(i, vals[i]);
}
return row;
}
}
/*
private static class CheesyCSVLineParser
{
public static String[] parse(String line)
{
String[] val = line.split(",");
for (int i = 0; i < val.length; i++) {
if (val[i].charAt(0) == '"') {
val[i] = val[i].substring(1, val[i].length() - 1);
}
}
return val;
}
}
*/
//=====================================================
private static class SchemaExtracter
{
private String filename = null;
// -1 = use number of cols in CSV file schema or first line
private int numColumns = -1;
private boolean useColNames;
private CSVRecordParser csvLineParser = new CSVRecordParser();
private LineNumberReader lineReader = null;
private List colNames = new ArrayList();
public SchemaExtracter(String filename, boolean useColNames, char fieldSeparator) {
this.filename = filename;
this.useColNames = useColNames;
if (fieldSeparator > 0)
csvLineParser.setColSep(fieldSeparator);
}
/**
*
* @param numColumns -1 if all columns should be read
*/
public void setNumColumns(int numColumns)
{
this.numColumns = numColumns;
}
public RowSchema getSchema()
throws IOException
{
try {
lineReader = new LineNumberReader(new FileReader(filename));
readCols();
}
finally {
if (lineReader != null)
lineReader.close();
}
return buildSchema(colNames);
}
private RowSchema buildSchema(List colNames)
{
String[] names = new String[colNames.size()];
Class[] types = new Class[colNames.size()];
int index = 0;
for (Iterator i = colNames.iterator(); i.hasNext(); ) {
String name = (String) i.next();
// replace blanks with underscores
names[index] = name.replace(" ", "_");
types[index] = String.class;
index++;
}
RowSchema schema = new RowSchema(names, types);
return schema;
}
private void readCols()
throws IOException
{
String line = lineReader.readLine();
String[] cols = csvLineParser.parse(line);
int schemaColSize = cols.length;
if (numColumns >= 0) {
schemaColSize = numColumns;
}
for (int i = 0; i < schemaColSize; i++) {
// TODO: if column names are provided read them
String colName = RowSchema.getDefaultColumnName(i + 1);
if (useColNames && i < cols.length) {
colName = cols[i];
if (! RowSchema.isValidColumnName(colName)) {
throw new IllegalValueException("column name", colName);
}
}
// create a standard column name
colNames.add(colName);
//TODO: check for duplicate col names and throw error if found (prevents very long rows)
}
}
}
}