/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.utils.io.csv;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import com.asakusafw.utils.io.Source;
/**
* A simple implementation of CSV reader.
* @since 0.6.2
*/
public class CsvReader implements Source<List<String>> {
private final Reader reader;
private List<String> next;
/**
* Creates a new instance.
* @param reader the CSV source
*/
public CsvReader(Reader reader) {
this.reader = reader instanceof BufferedReader ? reader : new BufferedReader(reader);
}
@Override
public boolean next() throws IOException {
this.next = prepare();
return next != null;
}
private List<String> prepare() throws IOException {
List<String> line = new ArrayList<>();
StringBuilder buf = new StringBuilder();
State state = State.INIT;
LOOP: while (true) {
int c = reader.read();
if (c < 0) {
c = -1;
}
switch (c) {
case '"':
switch (state) {
case INIT:
state = State.ESCAPE_BODY;
break;
case BODY:
// invalid case
state = State.ESCAPE_BODY;
break;
case ESCAPE_BODY:
state = State.ESCAPE_QUOTE;
break;
case ESCAPE_QUOTE:
state = State.ESCAPE_BODY;
buf.append('"');
break;
default:
throw new AssertionError();
}
break;
case ',':
switch (state) {
case INIT:
case BODY:
case ESCAPE_QUOTE:
// end of field
state = State.INIT;
line.add(buf.toString());
buf.setLength(0);
break;
case ESCAPE_BODY:
buf.append(',');
break;
default:
throw new AssertionError();
}
break;
case '\r':
switch (state) {
case ESCAPE_BODY:
buf.append('\r');
break;
default:
// ignore CR
break;
}
break;
case '\n':
switch (state) {
case INIT:
case BODY:
case ESCAPE_QUOTE:
// end of record
line.add(buf.toString());
buf.setLength(0);
break LOOP;
case ESCAPE_BODY:
buf.append('\n');
break;
default:
throw new AssertionError();
}
break;
case -1:
switch (state) {
case INIT:
// end of record w/o contents
break LOOP;
case BODY:
case ESCAPE_QUOTE:
// end of record w/ contents
line.add(buf.toString());
buf.setLength(0);
break LOOP;
case ESCAPE_BODY:
// invalid state
line.add(buf.toString());
buf.setLength(0);
break LOOP;
default:
throw new AssertionError();
}
default:
switch (state) {
case INIT:
case BODY:
state = State.BODY;
buf.append((char) c);
break;
case ESCAPE_BODY:
state = State.ESCAPE_BODY;
buf.append((char) c);
break;
case ESCAPE_QUOTE:
// invalid state
state = State.BODY;
buf.append((char) c);
break;
default:
throw new AssertionError();
}
break;
}
}
if (line.isEmpty()) {
return null;
}
return line;
}
@Override
public List<String> get() throws IOException {
return next;
}
@Override
public void close() throws IOException {
reader.close();
}
private enum State {
INIT,
BODY,
ESCAPE_BODY,
ESCAPE_QUOTE,
}
}