/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.server.service.externaldata;
import com.foundationdb.ais.model.Column;
import com.foundationdb.ais.model.Table;
import com.foundationdb.qp.operator.QueryContext;
import com.foundationdb.qp.row.Row;
import com.foundationdb.server.api.dml.scan.NewRow;
import com.foundationdb.server.error.ExternalRowReaderException;
import com.foundationdb.server.types.common.types.TypesTranslator;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
/** Read from a flat file into <code>NewRow</code> rows suitable for inserting. */
public class CsvRowReader extends RowReader
{
private final int delim, quote, escape, nl, cr;
private enum State { ROW_START, FIELD_START, IN_FIELD, IN_QUOTE, AFTER_QUOTE };
private State state;
public CsvRowReader(Table table, List<Column> columns, InputStream inputStream,
CsvFormat format, QueryContext queryContext, TypesTranslator typesTranslator) {
super(table, columns, inputStream, format.getEncoding(), format.getNullBytes(),
queryContext, typesTranslator);
this.delim = format.getDelimiterByte();
this.quote = format.getQuoteByte();
this.escape = format.getEscapeByte();
this.nl = format.getNewline();
this.cr = format.getReturn();
}
public void skipRows(long nrows) throws IOException {
while (true) {
int b = read();
if (b < 0) break;
if (b == nl) {
nrows--;
if (nrows <= 0) break;
}
}
}
@Override
public Row nextRow() throws IOException {
{
int b = read();
if (b < 0) return null;
unread(b);
}
newRow();
state = State.ROW_START;
while (true) {
int b = read();
switch (state) {
case ROW_START:
if (b < 0) {
return null;
}
else if ((b == cr) || (b == nl)) {
continue;
}
else if (b == delim) {
addField(false);
state = State.FIELD_START;
}
else if (b == quote) {
state = State.IN_QUOTE;
}
else {
addToField(b);
state = State.IN_FIELD;
}
break;
case FIELD_START:
if ((b < 0) || (b == cr) || (b == nl)) {
addField(false);
return finishRow();
}
else if (b == delim) {
addField(false);
}
else if (b == quote) {
state = State.IN_QUOTE;
}
else {
addToField(b);
state = State.IN_FIELD;
}
break;
case IN_FIELD:
if ((b < 0) || (b == cr) || (b == nl)) {
addField(false);
return finishRow();
}
else if (b == delim) {
addField(false);
state = State.FIELD_START;
}
else if (b == quote) {
throw new ExternalRowReaderException("QUOTE in the middle of a field");
}
else {
addToField(b);
}
break;
case IN_QUOTE:
if (b < 0)
throw new ExternalRowReaderException("EOF inside QUOTE");
else if (b == quote) {
if (escape == quote) {
// Must be doubled; peek next character.
b = read();
if (b == quote) {
addToField(b);
continue;
}
else {
unread(b);
}
}
state = State.AFTER_QUOTE;
}
else if (b == escape) {
// Non-doubling escape.
b = read();
if (b < 0) throw new ExternalRowReaderException("EOF after ESCAPE");
addToField(b);
}
else {
addToField(b);
}
break;
case AFTER_QUOTE:
if ((b < 0) || (b == cr) || (b == nl)) {
addField(true);
return finishRow();
}
else if (b == delim) {
addField(true);
state = State.FIELD_START;
}
else {
throw new ExternalRowReaderException("junk after quoted field");
}
break;
}
}
}
}