/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.io.text.csv;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.function.UnaryOperator;
import com.asakusafw.runtime.io.text.FieldWriter;
import com.asakusafw.runtime.io.text.LineSeparator;
import com.asakusafw.runtime.io.text.TextUtil;
import com.asakusafw.runtime.io.text.UnmappableOutput;
import com.asakusafw.runtime.io.text.UnmappableOutputException;
import com.asakusafw.runtime.io.text.driver.FieldOutput;
import com.asakusafw.runtime.io.text.driver.StandardFieldOutputOption;
/**
* A {@link FieldWriter} for RFC4180 style CSV files.
* @since 0.9.1
*/
public class CsvFieldWriter implements FieldWriter {
private final Writer writer;
private final char fieldSeparator;
private final char quoteCharacter;
private final String recordSeparatorSequence;
private final boolean denyLineFeedInQuote;
private final QuoteStyle defaultQuoteStyle;
private final QuoteStyle headerQuoteStyle;
private final UnaryOperator<CharSequence> transformer;
private final StringBuilder lineBuffer = new StringBuilder();
private int currentFieldIndex = -1;
private final List<UnmappableOutput> unmappables = new ArrayList<>();
private char[] writeBuffer;
private State lastState = State.BEFORE_RECORD;
/**
* Creates a new instance.
* @param writer the destination writer
* @param lineSeparator the line separator kind
* @param fieldSeparator the field separator character
* @param quoteCharacter the quote character
* @param allowLineFeed {@code true} to allow LF in field, otherwise {@code false}
* @param defaultQuoteStyle the default quote style
* @param headerQuoteStyle the quote style for headers
* @param transformer the output transformer (nullable)
*/
public CsvFieldWriter(
Writer writer,
LineSeparator lineSeparator, char fieldSeparator, char quoteCharacter,
boolean allowLineFeed,
QuoteStyle defaultQuoteStyle, QuoteStyle headerQuoteStyle,
UnaryOperator<CharSequence> transformer) {
this.writer = writer;
this.fieldSeparator = fieldSeparator;
this.quoteCharacter = quoteCharacter;
this.recordSeparatorSequence = lineSeparator.getSequence();
this.denyLineFeedInQuote = allowLineFeed == false;
this.defaultQuoteStyle = defaultQuoteStyle;
this.headerQuoteStyle = headerQuoteStyle;
this.transformer = transformer == null ? UnaryOperator.identity() : transformer;
}
@Override
public void putField(FieldOutput output) throws IOException {
CharSequence contents = output.get();
switch (lastState) {
case BEFORE_RECORD:
break;
case END_OF_FIELD:
putEndOfFieldBody();
break;
default:
throw new AssertionError(lastState);
}
currentFieldIndex++;
if (contents == null) {
handleUnmap(UnmappableOutput.ErrorCode.UNDEFINED_NULL_SEQUENCE);
} else {
QuoteStyle style = computeQuoteStyle(output);
switch (style) {
case NEEDED:
putFieldComputeQuote(contents);
break;
case ALWAYS:
putFieldAlwaysQuote(contents);
break;
case NEVER:
putFieldNeverQuote(contents);
break;
default:
throw new AssertionError(style);
}
}
lastState = State.END_OF_FIELD;
}
private QuoteStyle computeQuoteStyle(FieldOutput output) {
Collection<? extends FieldOutput.Option> options = output.getOptions();
QuoteStyle result = defaultQuoteStyle;
if (options.isEmpty() == false) {
for (FieldOutput.Option option : options) {
if (option instanceof QuoteStyle) {
result = (QuoteStyle) option;
break;
} else if (option == StandardFieldOutputOption.HEADER) {
result = headerQuoteStyle;
}
}
}
return result;
}
private void putFieldComputeQuote(CharSequence contents) {
if (isQuoteRequired(contents)) {
putWithQuote(contents);
} else {
emit(contents);
}
}
private void putFieldAlwaysQuote(CharSequence contents) {
putWithQuote(contents);
}
private void putFieldNeverQuote(CharSequence contents) {
boolean sawCr = false;
for (int i = 0, n = contents.length(); i < n; i++) {
char c = contents.charAt(i);
if (c == '\r') {
handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR);
sawCr = true;
} else if (c == '\n') {
if (sawCr == false) {
handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR);
}
sawCr = false;
} else if (c == fieldSeparator) {
handleUnmap(UnmappableOutput.ErrorCode.EXTRA_FIELD_SEPARATOR);
sawCr = false;
} else if (c == quoteCharacter) {
handleUnmap(UnmappableOutput.ErrorCode.RESTRICTED_SEQUENCE,
TextUtil.quote(String.valueOf(c)));
sawCr = false;
}
emit(c);
}
}
private void putWithQuote(CharSequence contents) {
emit(quoteCharacter);
for (int i = 0, n = contents.length(); i < n; i++) {
char c = contents.charAt(i);
if (c == '\n' && denyLineFeedInQuote) {
handleUnmap(UnmappableOutput.ErrorCode.RESTRICTED_SEQUENCE,
TextUtil.quote(String.valueOf('\n')));
} else if (c == quoteCharacter) {
emit(c);
}
emit(c);
}
emit(quoteCharacter);
}
@Override
public void putEndOfRecord() throws IOException {
CharSequence output = transformer.apply(lineBuffer);
if (output != null) {
write(output);
resetBuffer();
switch (lastState) {
case BEFORE_RECORD:
handleUnmap(UnmappableOutput.ErrorCode.EXTRA_EMPTY_FIELD);
putEndOfRecordBody();
break;
case END_OF_FIELD:
putEndOfRecordBody();
break;
default:
throw new AssertionError(lastState);
}
write(lineBuffer);
}
resetBuffer();
currentFieldIndex = -1;
lastState = State.BEFORE_RECORD;
if (unmappables.isEmpty() == false) {
// Raise UnmappableOutputException even if the output was transformed
UnmappableOutputException e = new UnmappableOutputException(unmappables);
unmappables.clear();
throw e;
}
}
private void putEndOfFieldBody() {
emit(fieldSeparator);
}
private void putEndOfRecordBody() {
emit(recordSeparatorSequence);
}
private boolean isQuoteRequired(CharSequence cs) {
for (int i = 0, n = cs.length(); i < n; i++) {
char c = cs.charAt(i);
if (c == '\r' || c == '\n' || c == fieldSeparator || c == quoteCharacter) {
return true;
}
}
return false;
}
private void emit(int c) {
assert c >= 0;
lineBuffer.append((char) c);
}
private void emit(CharSequence string) {
lineBuffer.append(string);
}
private void write(CharSequence output) throws IOException {
// java.io.Writer#write() may call CharSequence.toString()
if (output instanceof StringBuilder) {
StringBuilder src = (StringBuilder) output;
char[] cbuf = writeBuffer;
if (cbuf == null) {
cbuf = new char[4096];
writeBuffer = cbuf;
}
for (int offset = 0, step = cbuf.length, n = output.length(); offset < n; offset += step) {
int length = Math.min(n - offset, step);
src.getChars(offset, offset + length, cbuf, 0);
writer.write(cbuf, 0, length);
}
} else {
writer.append(output);
}
}
private void resetBuffer() {
lineBuffer.setLength(0);
}
private void handleUnmap(UnmappableOutput.ErrorCode kind) {
handleUnmap(kind, null);
}
private void handleUnmap(UnmappableOutput.ErrorCode kind, String sequence) {
unmappables.add(new UnmappableOutput(kind, currentFieldIndex, sequence));
}
@Override
public void close() throws IOException {
writer.close();
}
private enum State {
BEFORE_RECORD,
END_OF_FIELD,
}
}