/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.io.text.tabular; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.function.UnaryOperator; import com.asakusafw.runtime.io.text.FieldWriter; import com.asakusafw.runtime.io.text.LineSeparator; import com.asakusafw.runtime.io.text.TextUtil; import com.asakusafw.runtime.io.text.UnmappableOutput; import com.asakusafw.runtime.io.text.UnmappableOutputException; import com.asakusafw.runtime.io.text.driver.FieldOutput; /** * A {@link FieldWriter} for tabular style text contents. * @since 0.9.1 */ public class TabularFieldWriter implements FieldWriter { private static final int ABSENT = -2; private final Writer writer; private final int escapeCharacter; private final CharMap escapeEncode; private final CharMap escapeDecode; private final char fieldSeparator; private final String recordSeparatorSequence; private final String nullSequence; private final boolean escapeRecordSeparator; private final UnaryOperator<CharSequence> transformer; private final StringBuilder lineBuffer = new StringBuilder(); private int currentFieldIndex = -1; private final List<UnmappableOutput> unmappables = new ArrayList<>(); private char[] writeBuffer; private State lastState = State.BEFORE_RECORD; /** * Creates a new instance. * @param writer the destination writer * @param lineSeparator the line separator kind * @param fieldSeparator the field separator character * @param escapeSequences the escape sequences definition (nullable) * @param transformer the output transformer (nullable) */ public TabularFieldWriter( Writer writer, LineSeparator lineSeparator, char fieldSeparator, EscapeSequence escapeSequences, UnaryOperator<CharSequence> transformer) { this.writer = writer; EscapeSequence esc = escapeSequences; this.escapeCharacter = esc == null ? ABSENT : esc.getEscapeCharacter(); this.escapeEncode = esc == null ? CharMap.EMPTY : CharMap.backward(esc); this.escapeDecode = esc == null ? CharMap.EMPTY : CharMap.forward(esc); this.recordSeparatorSequence = lineSeparator.getSequence(); this.nullSequence = buildNullSequence(escapeCharacter, escapeEncode); this.fieldSeparator = fieldSeparator; this.escapeRecordSeparator = esc == null ? false : esc.canEscapeLineSeparator(); this.transformer = transformer == null ? UnaryOperator.identity() : transformer; } private String buildNullSequence(int escape, CharMap map) { if (escape == ABSENT || map == null || map.getNullKey() == CharMap.ABSENT) { return null; } return new StringBuilder(2).append((char) escape).append((char) map.getNullKey()).toString(); } @Override public void putField(FieldOutput output) throws IOException { CharSequence contents = output.get(); switch (lastState) { case BEFORE_RECORD: break; case END_OF_FIELD: putEndOfFieldBody(); break; case END_OF_FIELD_WITH_ESCAPE: putEndOfFieldSawEscape(); break; default: throw new AssertionError(lastState); } currentFieldIndex++; State state = State.BODY; if (contents == null) { state = putNull(); } else { for (int i = 0, n = contents.length(); i < n; i++) { char c = contents.charAt(i); switch (state) { case BODY: state = putCharBody(c); break; case SAW_CR: state = putCharSawCr(c); break; case SAW_ESCAPE: state = putCharSawEscape(c); break; case SAW_ESCAPE_CR: state = putCharSawEscapeCr(c); break; default: throw new AssertionError(state); } } } switch (state) { case BODY: lastState = State.END_OF_FIELD; break; case SAW_ESCAPE: lastState = State.END_OF_FIELD_WITH_ESCAPE; break; case SAW_CR: consumeSawCr(); lastState = State.END_OF_FIELD; break; case SAW_ESCAPE_CR: consumeSawEscapeCr(); lastState = State.END_OF_FIELD; break; default: throw new AssertionError(state); } } @Override public void putEndOfRecord() throws IOException { CharSequence output = transformer.apply(lineBuffer); if (output != null) { write(output); resetBuffer(); switch (lastState) { case BEFORE_RECORD: handleUnmap(UnmappableOutput.ErrorCode.EXTRA_EMPTY_FIELD); putEndOfRecordBody(); break; case END_OF_FIELD: putEndOfRecordBody(); break; case END_OF_FIELD_WITH_ESCAPE: putEndOfRecordSawEscape(); break; default: throw new AssertionError(lastState); } write(lineBuffer); } resetBuffer(); currentFieldIndex = -1; lastState = State.BEFORE_RECORD; if (unmappables.isEmpty() == false) { // Raise UnmappableOutputException even if the output was transformed UnmappableOutputException e = new UnmappableOutputException(unmappables); unmappables.clear(); throw e; } } private State putCharBody(char c) { int d = escapeEncode.get(c); if (d != CharMap.ABSENT) { emit(escapeCharacter); emit(d); return State.BODY; } if (c == '\r') { return State.SAW_CR; } else if (c == '\n') { if (escapeRecordSeparator) { emit(escapeCharacter); } else { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR); } emit(c); return State.BODY; } else if (c == fieldSeparator) { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_FIELD_SEPARATOR); emit(c); return State.BODY; } else if (c == escapeCharacter) { emit(c); return State.SAW_ESCAPE; } else { emit(c); return State.BODY; } } private State putCharSawCr(char c) { if (c == '\n') { consumeSawCr(); emit('\n'); return State.BODY; } else { consumeSawCr(); return putCharBody(c); } } private State putCharSawEscape(char c) { assert escapeEncode.get(escapeCharacter) == CharMap.ABSENT; int d = escapeEncode.get(c); if (d != CharMap.ABSENT) { emit(escapeCharacter); emit(d); return State.BODY; } if (c == '\r') { return State.SAW_ESCAPE_CR; } else if (c == '\n') { if (escapeRecordSeparator == false) { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR); } emit(c); return State.BODY; } else if (c == fieldSeparator) { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_FIELD_SEPARATOR); emit(c); return State.BODY; } else if (c == escapeCharacter) { assert escapeDecode.get(c) == CharMap.ABSENT; emit(c); return State.SAW_ESCAPE; } else { if (escapeDecode.get(c) != CharMap.ABSENT) { handleUnmap(UnmappableOutput.ErrorCode.CONFLICT_SEQUENCE, TextUtil.quote(new StringBuilder(2).append(escapeCharacter).append(c))); } emit(c); return State.BODY; } } private State putCharSawEscapeCr(char c) { assert escapeEncode.get(escapeCharacter) == CharMap.ABSENT; if (c == '\n') { consumeSawEscapeCr(); emit('\n'); return State.BODY; } else { consumeSawEscapeCr(); return putCharBody(c); } } private void consumeSawCr() { if (escapeRecordSeparator) { emit(escapeCharacter); } else { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR); } emit('\r'); } private void consumeSawEscapeCr() { assert escapeEncode.get(escapeCharacter) == CharMap.ABSENT; assert escapeEncode.get('\r') == CharMap.ABSENT; if (escapeRecordSeparator == false) { handleUnmap(UnmappableOutput.ErrorCode.EXTRA_RECORD_SEPARATOR); } emit('\r'); } private void putEndOfFieldBody() { emit(fieldSeparator); } private void putEndOfFieldSawEscape() { assert escapeEncode.get(escapeCharacter) == CharMap.ABSENT; if (escapeEncode.get(fieldSeparator) != CharMap.ABSENT) { handleUnmap(UnmappableOutput.ErrorCode.LOST_FIELD_SEPARATOR); } putEndOfFieldBody(); } private void putEndOfRecordBody() { emit(recordSeparatorSequence); } private void putEndOfRecordSawEscape() { assert escapeEncode.get(escapeCharacter) == CharMap.ABSENT; if (escapeRecordSeparator) { handleUnmap(UnmappableOutput.ErrorCode.LOST_RECORD_SEPARATOR); } putEndOfRecordBody(); } private State putNull() { if (nullSequence == null) { handleUnmap(UnmappableOutput.ErrorCode.UNDEFINED_NULL_SEQUENCE); } else { emit(nullSequence); } return State.BODY; } private void emit(int c) { assert c >= 0; lineBuffer.append((char) c); } private void emit(String string) { lineBuffer.append(string); } private void write(CharSequence output) throws IOException { // java.io.Writer#write() may call CharSequence.toString() if (output instanceof StringBuilder) { StringBuilder src = (StringBuilder) output; char[] cbuf = writeBuffer; if (cbuf == null) { cbuf = new char[4096]; writeBuffer = cbuf; } for (int offset = 0, step = cbuf.length, n = output.length(); offset < n; offset += step) { int length = Math.min(n - offset, step); src.getChars(offset, offset + length, cbuf, 0); writer.write(cbuf, 0, length); } } else { writer.append(output); } } private void resetBuffer() { lineBuffer.setLength(0); } private void handleUnmap(UnmappableOutput.ErrorCode kind) { handleUnmap(kind, null); } private void handleUnmap(UnmappableOutput.ErrorCode kind, String sequence) { unmappables.add(new UnmappableOutput(kind, currentFieldIndex, sequence)); } @Override public void close() throws IOException { writer.close(); } private enum State { BEFORE_RECORD, BODY, SAW_CR, SAW_ESCAPE, SAW_ESCAPE_CR, END_OF_FIELD, END_OF_FIELD_WITH_ESCAPE, } }