/*
* CSVReader.java
*
*/
package org.smartly.commons.csv;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
*
*/
public class CSVReader implements ICSVConstants {
private Reader _reader;
private char _separator = DEFAULT_SEPARATOR;
private char _quotechar = DEFAULT_QUOTE_CHARACTER;
private int _skipLines = DEFAULT_SKIP_LINES;
//-- late initialized --//
private BufferedReader __reader;
// internal fields
private boolean _hasNext = true;
private boolean _linesSkiped = false;
//<editor-fold defaultstate="collapsed" desc=" Constructors ">
public CSVReader() {
}
/**
* Constructs CSVReader using a null reader and a comma for the separator.<br>
*
* @param reader the reader to an underlying CSV source.
*/
public CSVReader(final Reader reader) {
this(reader, DEFAULT_SEPARATOR);
}
/**
* Constructs CSVReader with supplied separator.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries.
*/
public CSVReader(final Reader reader, char separator) {
this(reader, separator, DEFAULT_QUOTE_CHARACTER);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
*/
public CSVReader(final Reader reader, char separator, char quotechar) {
this(reader, separator, quotechar, DEFAULT_SKIP_LINES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param line the line number to skip for start reading
*/
public CSVReader(final Reader reader, char separator, char quotechar, int line) {
_reader = reader;
_separator = separator;
_quotechar = quotechar;
_skipLines = line;
}
@Override
protected void finalize() throws Throwable {
this.close();
super.finalize();
}
//</editor-fold>
public Reader getReader() {
return _reader;
}
public void setReader(Reader reader) {
this._reader = reader;
}
public char getSeparator() {
return _separator;
}
public void setSeparator(char separator) {
this._separator = separator;
}
public char getQuotechar() {
return _quotechar;
}
public void setQuotechar(char quotechar) {
this._quotechar = quotechar;
}
public int getSkipLines() {
return _skipLines;
}
public void setSkipLines(int skipLines) {
this._skipLines = skipLines;
}
// ------------------------------------------------------------------------
// p u b l i c
// ------------------------------------------------------------------------
/**
* Reads the entire file into a List with each element being a String[] of
* tokens.
*
* @return a List of String[], with each String[] representing a line of the
* file.
* @throws java.io.IOException if bad things happen during the read
*/
public List<String[]> readAll() throws IOException {
final List<String[]> result = new LinkedList<String[]>();
while (_hasNext) {
final String[] nextLineAsTokens = this.readNext();
if (nextLineAsTokens != null) {
result.add(nextLineAsTokens);
}
}
return result;
}
public List<Map<String, String>> readAllAsMap(boolean headerOnFirstRow) throws Exception {
final List<String[]> data = this.readAll();
return this.getMap(data, headerOnFirstRow);
}
/**
* Closes the underlying reader.
*
* @throws java.io.IOException if the close fails
*/
public void close() throws IOException {
if (null != __reader) {
__reader.close();
}
}
// ------------------------------------------------------------------------
// p r i v a t e
// ------------------------------------------------------------------------
private BufferedReader getBReader() {
if (null == __reader) {
__reader = new BufferedReader(_reader);
}
return __reader;
}
/**
* Reads the next line from the buffer and converts to a string array.
*
* @return a string array with each comma-separated element as a separate
* entry.
* @throws java.io.IOException if bad things happen during the read
*/
private String[] readNext() throws IOException {
String nextLine = this.getNextLine();
return _hasNext ? this.parseLine(nextLine) : null;
}
/**
* Reads the next line from the file.
*
* @return the next line from the file without trailing newline
* @throws java.io.IOException if bad things happen during the read
*/
private String getNextLine() throws IOException {
if (!_linesSkiped) {
for (int i = 0; i < _skipLines; i++) {
this.getBReader().readLine();
}
_linesSkiped = true;
}
String nextLine = this.getBReader().readLine();
if (nextLine == null) {
_hasNext = false;
}
return _hasNext ? nextLine : null;
}
/**
* Parses an incoming String and returns an array of elements.
*
* @param nextLine the string to parse
* @return the comma-tokenized list of elements, or null if nextLine is null
* @throws java.io.IOException if bad things happen during the read
*/
private String[] parseLine(String nextLine) throws IOException {
if (nextLine == null) {
return null;
}
final List<String> tokensOnThisLine = new LinkedList<String>();
StringBuffer sb = new StringBuffer();
boolean inQuotes = false;
do {
if (inQuotes) {
// continuing a quoted section, reappend newline
sb.append("\n");
nextLine = getNextLine();
if (nextLine == null) {
break;
}
}
for (int i = 0; i < nextLine.length(); i++) {
char c = nextLine.charAt(i);
if (c == _quotechar) {
// this gets complex... the quote may end a quoted block, or escape another quote.
// do a 1-char lookahead:
if (inQuotes // we are in quotes, therefore there can be escaped quotes in here.
&& nextLine.length() > (i + 1) // there is indeed another character to check.
&& nextLine.charAt(i + 1) == _quotechar) { // ..and that char. is a quote also.
// we have two quote chars in a row == one quote char, so consume them both and
// put one on the token. we do *not* exit the quoted text.
sb.append(nextLine.charAt(i + 1));
i++;
} else {
inQuotes = !inQuotes;
// the tricky case of an embedded quote in the middle: a,bc"d"ef,g
if (i > 2 //not on the begining of the line
&& nextLine.charAt(i - 1) != _separator //not at the begining of an escape sequence
&& nextLine.length() > (i + 1)
&& nextLine.charAt(i + 1) != _separator //not at the end of an escape sequence
) {
sb.append(c);
}
}
} else if (c == _separator && !inQuotes) {
tokensOnThisLine.add(sb.toString());
sb = new StringBuffer(); // start work on next token
} else {
sb.append(c);
}
}
} while (inQuotes);
tokensOnThisLine.add(sb.toString());
return tokensOnThisLine.toArray(new String[0]);
}
private List<Map<String, String>> getMap(final List<String[]> rows,
boolean headerOnFirstRow) {
final List<Map<String, String>> result = new LinkedList<Map<String, String>>();
String[] names;
if (headerOnFirstRow) {
names = rows.remove(0);
} else {
names = new String[rows.get(0).length];
for (int i = 0; i < names.length; i++) {
names[i] = "" + (i + 1);
}
}
for (String[] cols : rows) {
Map<String, String> row = new LinkedHashMap<String, String>();
for (int i = 0; i < cols.length; i++) {
row.put(names[i], cols[i]);
}
result.add(row);
}
return result;
}
}