package org.sigmah.server.servlet.exporter.utils; /* * #%L * Sigmah * %% * Copyright (C) 2010 - 2016 URD * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * #L% */ import java.util.ArrayList; import java.util.List; public class CsvParser { private boolean hasNext = true; private final char separator; private final char quotechar; private final char escape; private String[] csvLines; private int lineCount; public static final int INITIAL_READ_SIZE = 64; /** * The default escape character to use if none is supplied to the constructor. */ public static final char DEFAULT_ESCAPE_CHARACTER = '\\'; public CsvParser() { this.separator = CsvBuilder.DEFAULT_SEPARATOR; this.quotechar = CsvBuilder.DEFAULT_QUOTE_CHARACTER; this.escape = DEFAULT_ESCAPE_CHARACTER; } /* * Parses CSV string returns list of arrays which represents each line of CSV */ public List<String[]> parseCsv(String csvString) { hasNext = true; csvLines = csvString.split(CsvBuilder.DEFAULT_LINE_END); lineCount = 0; List<String[]> allElements = new ArrayList<String[]>(); while (hasNext) { String[] nextLineAsTokens = readNext(); if (nextLineAsTokens != null) allElements.add(nextLineAsTokens); } return allElements; } private String[] readNext() { String nextLine = getNextLine(); return hasNext ? parseLine(nextLine) : null; } private String getNextLine() { String nextLine = null; if (lineCount <= csvLines.length - 1) { nextLine = csvLines[lineCount++]; } if (nextLine == null) { hasNext = false; } return hasNext ? nextLine : null; } /** * Parses an incoming String and returns an array of elements. * * @param nextLine * the string to parse * @return the comma-tokenized list of elements, or null if nextLine is null */ private String[] parseLine(String nextLine) { if (nextLine == null) { return null; } List<String> tokensOnThisLine = new ArrayList<String>(); StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE); boolean inQuotes = false; do { if (inQuotes) { // continuing a quoted section, reappend newline sb.append("\n"); nextLine = getNextLine(); if (nextLine == null) break; } for (int i = 0; i < nextLine.length(); i++) { char c = nextLine.charAt(i); if (c == this.escape) { if (isEscapable(nextLine, inQuotes, i)) { sb.append(nextLine.charAt(i + 1)); i++; } else { i++; // ignore the escape } } else if (c == quotechar) { if (isEscapedQuote(nextLine, inQuotes, i)) { sb.append(nextLine.charAt(i + 1)); i++; } else { inQuotes = !inQuotes; // the tricky case of an embedded quote in the middle: a,bc"d"ef,g if (i > 2 // not on the beginning of the line && nextLine.charAt(i - 1) != this.separator // not at the beginning of an escape sequence && nextLine.length() > (i + 1) && nextLine.charAt(i + 1) != this.separator // not at the end of an escape sequence ) { sb.append(c); } } } else if (c == separator && !inQuotes) { tokensOnThisLine.add(sb.toString()); sb = new StringBuilder(INITIAL_READ_SIZE); // start work on next token } else { sb.append(c); } } } while (inQuotes); tokensOnThisLine.add(sb.toString()); return tokensOnThisLine.toArray(new String[0]); } /** * precondition: the current character is a quote or an escape * * @param nextLine * the current line * @param inQuotes * true if the current context is quoted * @param i * current index in line * @return true if the following character is a quote */ private boolean isEscapedQuote(String nextLine, boolean inQuotes, int i) { return inQuotes // we are in quotes, therefore there can be escaped quotes in here. && nextLine.length() > (i + 1) // there is indeed another character to check. && nextLine.charAt(i + 1) == quotechar; } /** * precondition: the current character is an escape * * @param nextLine * the current line * @param inQuotes * true if the current context is quoted * @param i * current index in line * @return true if the following character is a quote */ private boolean isEscapable(String nextLine, boolean inQuotes, int i) { return inQuotes // we are in quotes, therefore there can be escaped quotes in here. && nextLine.length() > (i + 1) // there is indeed another character to check. && (nextLine.charAt(i + 1) == quotechar || nextLine.charAt(i + 1) == this.escape); } }