/* * Copyright (c) 2009-2015 * IT-Consulting Stephan Schloepke (http://www.schloepke.de/) * klemm software consulting Mirko Klemm (http://www.klemm-scs.com/) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.jbasics.csv; import org.jbasics.exception.DelegatedException; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import java.util.List; public class CSVParser { private final boolean parseWithHeaders; private final boolean skipEmptyLines; private final char separator; public CSVParser() { this(true); } public CSVParser(final boolean skipEmptyLines) { this(true, ',', skipEmptyLines); } public CSVParser(final boolean parseWithHeaders, final char separator, final boolean skipEmptyLines) { this.parseWithHeaders = parseWithHeaders; this.separator = separator; this.skipEmptyLines = skipEmptyLines; } public CSVParser(final char separator) { this(true, separator, true); } public CSVParser(final char separator, final boolean skipEmptyLines) { this(true, separator, skipEmptyLines); } public CSVTable parse(final URL location) throws IOException { final URLConnection connection = location.openConnection(); final String encoding = connection.getContentEncoding(); if (encoding != null) { try { final Charset charset = Charset.forName(encoding); return parse(connection.getInputStream(), charset); } catch (final UnsupportedCharsetException e) { // we should fall back to default } } return parse(connection.getInputStream()); } public CSVTable parse(final InputStream in, final Charset charset) throws IOException { return parse(new InputStreamReader(in, charset)); } public CSVTable parse(final InputStream in) throws IOException { return parse(new InputStreamReader(in)); } public CSVTable parse(final Readable reader) throws IOException { final List<CSVRecord> records = new ArrayList<CSVRecord>(); final List<String> fields = new ArrayList<String>(); final CharBuffer buf = CharBuffer.allocate(128); final StringBuffer fieldData = new StringBuffer(32); ParsingState state = ParsingState.NONE; boolean skipReadNext = false; char c = ' '; while (reader.read(buf) > 0) { buf.flip(); while (buf.hasRemaining()) { if (!skipReadNext) { c = buf.get(); } else { skipReadNext = false; } switch (state) { case QUOTED_END: if (c == '"') { fieldData.append(c); state = ParsingState.QUOTED; } else { state = ParsingState.NONE; skipReadNext = true; } break; case QUOTED: if (c == '"') { state = ParsingState.QUOTED_END; } else { fieldData.append(c); } break; case RECORD_END: state = ParsingState.NONE; if (this.skipEmptyLines) { if (fields.size() == 0 && fieldData.length() == 0) { break; } } fields.add(fieldData.toString()); fieldData.setLength(0); records.add(new CSVRecord(fields)); fields.clear(); if (c == '\n') { break; } case NONE: switch (c) { case '\n': case '\r': state = ParsingState.RECORD_END; break; case '"': state = ParsingState.QUOTED; break; default: if (c == this.separator) { fields.add(fieldData.toString()); fieldData.setLength(0); } else { fieldData.append(c); } } } } buf.clear(); } if (fieldData.length() > 0) { fields.add(fieldData.toString()); } if (!fields.isEmpty()) { records.add(new CSVRecord(fields)); } if (reader instanceof Closeable) { ((Closeable) reader).close(); } if (this.parseWithHeaders && !records.isEmpty()) { return new CSVTable(null, this.separator, records.get(0), records.subList(1, records.size()).toArray(new CSVRecord[records.size() - 1])); } else { return new CSVTable(null, this.separator, (CSVRecord) null, records.toArray(new CSVRecord[records.size()])); } } public CSVTable parse(final String input) { try { return parse(new StringReader(input)); } catch (final IOException e) { throw DelegatedException.delegate(e); } } public CSVTable parseWithRuntimeException(final URL location) { try { return parse(location); } catch(IOException e) { throw DelegatedException.delegate(e); } } public CSVTable parseWithRuntimeException(final InputStream in, final Charset charset) { try { return parse(in, charset); } catch(IOException e) { throw DelegatedException.delegate(e); } } public CSVTable parseWithRuntimeException(final InputStream in) { try { return parse(in); } catch(IOException e) { throw DelegatedException.delegate(e); } } public CSVTable parseWithRuntimeException(final Readable reader) { try { return parse(reader); } catch(IOException e) { throw DelegatedException.delegate(e); } } private enum ParsingState { NONE, QUOTED, QUOTED_END, RECORD_END } }