package ilarkesto.io;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
public final class CsvParser {
private boolean isSeperator(int c) {
return c == separator;
}
private boolean isEOL(int c) {
return c == 13 || c == 10 || c == -1;
}
public List<String> nextRecord() {
List<String> result = new ArrayList<String>();
int c = readNextChar();
if (c == -1) return null;
resetLastChar();
while (true) {
if (quoted) {
c = readNextChar();
if (isSeperator(c)) {
result.add(null);
continue;
}
resetLastChar();
try {
parseOpeningQuote();
} catch (EOFException ex) {
result.add(null);
break;
} catch (EOLException ex) {
result.add(null);
parseNl();
break;
}
}
String field = parseField();
result.add(field);
try {
parseSeperator();
} catch (EOLException ex) {
parseNl();
break;
} catch (EOFException ex) {
break;
}
}
return result;
}
private String parseField() {
StringBuilder sb = new StringBuilder();
int c;
for (int i = 0; true; i++) {
c = readNextChar();
if (quoted && c == '"') return sb.toString();
if (isEOL(c)) {
if (quoted) {
if (c == -1) throw new ParseException("Unexpected OEF in field");
} else {
resetLastChar();
return sb.toString();
}
}
if (!quoted && isSeperator(c)) {
resetLastChar();
return sb.toString();
}
if (c == '\\') {
appendControlSequence(sb);
} else sb.append((char) c);
}
}
private void appendControlSequence(StringBuilder sb) {
int seq = readNextChar();
if (seq == '\\')
sb.append("\\");
else if (seq == 'b')
sb.append("\b");
else if (seq == 't')
sb.append("\t");
else if (seq == 'n')
sb.append("\n");
else if (seq == 'f')
sb.append("\f");
else if (seq == 'r')
sb.append("\r");
else if (seq == '\"')
sb.append("\"");
else if (seq == '\'')
sb.append("\'");
else throw new ParseException("Unsupported control sequence '" + (char) seq + "' (" + seq + ")");
}
private void parseSeperator() throws EOFException, EOLException {
int c = readNextChar();
if (c == -1) throw new EOFException();
if (isEOL(c)) {
resetLastChar();
throw new EOLException();
}
if (isSeperator(c)) return;
throw new ParseException("Field seperator expected, but is: '" + (char) c + "' (" + c + ")");
}
private void parseOpeningQuote() throws EOFException, EOLException {
int c = readNextChar();
if (c == -1) throw new EOFException();
if (isEOL(c)) {
resetLastChar();
throw new EOLException();
}
if (c == '"') return;
throw new ParseException("Quote '\"' expected, but is: '" + (char) c + "' (" + c + ")");
}
private void parseNl() {
while (true) {
int c = readNextChar();
if (c == -1) return;
if (!isEOL(c)) {
resetLastChar();
return;
}
}
}
// public String nextField() {
// return nextField(false);
// }
//
// public String nextField(boolean forceField) {
// StringBuilder sb = new StringBuilder();
// boolean enclosed = false; // enclosed in quotes
// int c = readNextChar();
// if (isSeparator(c) && forceField) {
// resetLastChar();
// return "";
// }
// if (c == 13 || c == 10 || c == -1) {
// if (forceField) {
// resetLastChar();
// return "";
// }
// c = readNextChar();
// if (c != 13 && c != 10) resetLastChar();
// return null;
// }
// if (isSeparator(c)) {
// return nextField(true);
// }
// if (c == '"') {
// enclosed = true;
// c = readNextChar();
// }
// while (true) {
// if (c == -1) break;
// if (c == 13 || c == 10) {
// resetLastChar();
// break;
// }
// if (isSeparator(c) && !enclosed) {
// resetLastChar();
// break;
// }
// if (c == '"' && enclosed) break;
// sb.append((char) c);
// c = readNextChar();
// }
// if (enclosed) {
// c = readNextChar();
// if (c == 13 || c == 10) {
// resetLastChar();
// } else if (!isSeparator(c))
// throw new ParseException("Invalid character after field end: #" + c + " '" + ((char) c) + "'", null);
// }
// return sb.toString();
// }
private void resetLastChar() {
if (in == null) return;
try {
in.reset();
} catch (IOException ex) {
throw new ParseException("Reset failed", ex);
}
}
// private char lastReadChar;
private int readNextChar() {
if (in == null) return -1;
try {
in.mark(1);
int c = in.read();
// lastReadChar = (char) c;
if (c == -1) {
in.close();
in = null;
}
return c;
} catch (IOException ex) {
throw new ParseException("Reading failed", ex);
}
}
public void skipLine() {
skipLines(1);
}
public void skipLines(int count) {
for (int i = 0; i < count; i++) {
try {
in.readLine();
} catch (IOException ex) {
throw new ParseException("Skipping Line failed", ex);
}
}
}
class EOLException extends Exception {
}
public class ParseException extends RuntimeException {
public ParseException(String message, Throwable cause) {
super(message, cause);
}
public ParseException(String message) {
super(message);
}
}
// --- dependencies ---
private boolean quoted;
private BufferedReader in;
public CsvParser(Reader in, boolean quoted) {
this.in = new BufferedReader(in);
this.quoted = quoted;
}
public CsvParser(File file, String encoding, boolean quoted) throws FileNotFoundException,
UnsupportedEncodingException {
this(new InputStreamReader(new FileInputStream(file), encoding), quoted);
}
private char separator = ',';
public void setSeparator(char separator) {
this.separator = separator;
}
}