package org.agnitas.util; import java.io.BufferedReader; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; public class CsvReader implements Closeable { public static final String DEFAULT_ENCODING = "UTF-8"; public static final char DEFAULT_SEPARATOR = ','; public static final char DEFAULT_STRING_QUOTE = '"'; private char separator; private char stringQuote; private boolean useStringQuote; private InputStream inputStream; private Charset encoding; private boolean lineBreakInDataAllowed = true; private boolean escapedStringQuoteInDataAllowed = true; private boolean singleReadStarted = false; private int numberOfColumns = -1; private BufferedReader inputReader = null; private int readLines = 0; private int readCharacters = 0; private boolean fillMissingTrailingColumnsWithNull = false; public CsvReader(InputStream inputStream) { this(inputStream, Charset.forName(DEFAULT_ENCODING), DEFAULT_SEPARATOR, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, String encoding) { this(inputStream, Charset.forName(encoding), DEFAULT_SEPARATOR, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, Charset encoding) { this(inputStream, encoding, DEFAULT_SEPARATOR, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, char separator) { this(inputStream, Charset.forName(DEFAULT_ENCODING), separator, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, String encoding, char separator) { this(inputStream, Charset.forName(encoding), separator, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, Charset encoding, char separator) { this(inputStream, encoding, separator, DEFAULT_STRING_QUOTE); } public CsvReader(InputStream inputStream, char separator, Character stringQuote) { this(inputStream, Charset.forName(DEFAULT_ENCODING), separator, stringQuote); } public CsvReader(InputStream inputStream, String encoding, char separator, Character stringQuote) { this(inputStream, Charset.forName(encoding), separator, stringQuote); } public CsvReader(InputStream inputStream, Charset encoding, char separator, Character stringQuote) { this.inputStream = inputStream; this.encoding = encoding; this.separator = separator; if (stringQuote != null) { this.stringQuote = stringQuote; this.useStringQuote = true; } else { this.useStringQuote = false; } if (this.encoding == null) { throw new IllegalArgumentException("Encoding is null"); } else if (this.inputStream == null) { throw new IllegalArgumentException("InputStream is null"); } else if (AgnUtils.anyCharsAreEqual(this.separator, '\r', '\n')) { throw new IllegalArgumentException("Separator '" + this.separator + "' is invalid"); } else if (useStringQuote && AgnUtils.anyCharsAreEqual(this.separator, this.stringQuote, '\r', '\n')) { throw new IllegalArgumentException("Stringquote '" + this.stringQuote + "' is invalid"); } } public boolean isFillMissingTrailingColumnsWithNull() { return fillMissingTrailingColumnsWithNull; } public void setFillMissingTrailingColumnsWithNull(boolean fillMissingTrailingColumnsWithNull) { this.fillMissingTrailingColumnsWithNull = fillMissingTrailingColumnsWithNull; } public List<String> readNextCsvLine() throws IOException, CsvDataException { if (inputReader == null) { if (inputStream == null) { throw new IllegalStateException("CsvReader is already closed"); } inputReader = new BufferedReader(new InputStreamReader(inputStream, encoding)); } readLines++; singleReadStarted = true; List<String> returnList = new ArrayList<String>(); StringBuilder nextValue = new StringBuilder(); boolean insideString = false; int nextCharInt = -1; while ((nextCharInt = inputReader.read()) != -1) { readCharacters++; char nextChar = (char) nextCharInt; if (useStringQuote && nextChar == stringQuote) { insideString = !insideString; nextValue.append(nextChar); } else if (!insideString) { if (nextChar == '\r' || nextChar == '\n') { if (nextValue.length() > 0) { returnList.add(parseValue(nextValue.toString())); } if (returnList.size() > 0) { if (numberOfColumns != -1 && numberOfColumns != returnList.size()) { if (numberOfColumns == returnList.size() + 1 || (numberOfColumns > returnList.size() && fillMissingTrailingColumnsWithNull)) { while (returnList.size() < numberOfColumns) { returnList.add(null); } } else { throw new CsvDataException("Inconsistent number of values in line " + readLines + " (expected: " + numberOfColumns + " was: " + returnList.size() + ")", readLines); } } numberOfColumns = returnList.size(); return returnList; } } else if (nextChar == separator) { returnList.add(parseValue(nextValue.toString())); nextValue = new StringBuilder(); } else { nextValue.append(nextChar); } } else { // insideString if ((nextChar == '\r' || nextChar == '\n') && !lineBreakInDataAllowed) { throw new CsvDataException("Not allowed linebreak in data in line " + readLines, readLines); } else { nextValue.append(nextChar); } } } if (insideString) { close(); throw new IOException("Unexpected EOF after quoted csv-value was started"); } else { if (nextValue.length() > 0) { returnList.add(parseValue(nextValue.toString())); } if (returnList.size() > 0) { if (numberOfColumns != -1 && numberOfColumns != returnList.size()) { if (numberOfColumns == returnList.size() + 1 || (numberOfColumns > returnList.size() && fillMissingTrailingColumnsWithNull)) { while (returnList.size() < numberOfColumns) { returnList.add(null); } } else { throw new CsvDataException("Inconsistent number of values in line " + readLines + " (expected: " + numberOfColumns + " was: " + returnList.size() + ")", readLines); } } numberOfColumns = returnList.size(); return returnList; } else { close(); return null; } } } public List<List<String>> readAll() throws IOException, CsvDataException { if (singleReadStarted) { throw new IllegalStateException("Single readNextCsvLine was called before readAll"); } try { List<List<String>> csvValues = new ArrayList<List<String>>(); List<String> lineValues; while ((lineValues = readNextCsvLine()) != null) { csvValues.add(lineValues); } return csvValues; } finally { close(); } } private String parseValue(String rawValue) throws CsvDataException { String returnValue = rawValue; String stringQuoteString = Character.toString(stringQuote); if (StringUtils.isNotEmpty(returnValue)) { if (useStringQuote && returnValue.charAt(0) == stringQuote && returnValue.charAt(returnValue.length() - 1) == stringQuote) { returnValue = returnValue.substring(1, returnValue.length() - 1); returnValue = returnValue.replace(stringQuoteString + stringQuoteString, stringQuoteString); } returnValue = returnValue.replace("\r\n", "\n").replace('\r', '\n'); } if (!escapedStringQuoteInDataAllowed && returnValue.indexOf(stringQuote) >= 0) { throw new CsvDataException("Not allowed stringquote in data in line " + readLines, readLines); } return returnValue; } @Override public void close() { IOUtils.closeQuietly(inputReader); inputReader = null; IOUtils.closeQuietly(inputStream); inputStream = null; } public int getReadLines() { return readLines; } public int getReadChracters() { return readCharacters; } public boolean isLineBreakInDataAllowed() { return lineBreakInDataAllowed; } public void setLineBreakInDataAllowed(boolean lineBreakInDataAllowed) { this.lineBreakInDataAllowed = lineBreakInDataAllowed; } public boolean isEscapedStringQuoteInDataAllowed() { return escapedStringQuoteInDataAllowed; } public void setEscapedStringQuoteInDataAllowed(boolean escapedStringQuoteInDataAllowed) { this.escapedStringQuoteInDataAllowed = escapedStringQuoteInDataAllowed; } /** * This method reads the stream to the end and counts all csv value lines, * which can be less than the absolute linebreak count of the stream for the reason of quoted linebreaks. * The result also contains the first line, which may consist of columnheaders. * * @return * @throws IOException * @throws CsvDataException */ public int getCsvLineCount() throws IOException, CsvDataException { if (singleReadStarted) { throw new IllegalStateException("Single readNextCsvLine was called before getCsvLineCount"); } try { int csvLineCount = 0; while (readNextCsvLine() != null) { csvLineCount++; } return csvLineCount; } finally { close(); } } public static List<String> parseCsvLine(char separator, char stringQuote, String csvLine) throws IOException { List<String> returnList = new ArrayList<String>(); StringBuilder nextValue = new StringBuilder(); boolean insideString = false; for (char nextChar : csvLine.toCharArray()) { if (nextChar == stringQuote) { insideString = !insideString; nextValue.append(nextChar); } else if (!insideString) { if (nextChar == separator) { returnList.add(parseValue(stringQuote, nextValue.toString())); nextValue = new StringBuilder(); } else { nextValue.append(nextChar); } } else { // insideString nextValue.append(nextChar); } } if (insideString) { throw new IOException("Unexpected EOL after quoted csv-value was started"); } else { if (nextValue.length() > 0) { returnList.add(parseValue(stringQuote, nextValue.toString())); } if (returnList.size() > 0) { return returnList; } else { return null; } } } private static String parseValue(char stringQuote, String rawValue) { String returnValue = rawValue; String stringQuoteString = Character.toString(stringQuote); if (StringUtils.isNotEmpty(returnValue)) { if (returnValue.charAt(0) == stringQuote && returnValue.charAt(returnValue.length() - 1) == stringQuote) { returnValue = returnValue.substring(1, returnValue.length() - 1); returnValue = returnValue.replace(stringQuoteString + stringQuoteString, stringQuoteString); } returnValue = returnValue.replace("\r\n", "\n").replace('\r', '\n'); } return returnValue; } }