package org.embulk.standards; import com.google.common.base.Optional; import org.embulk.config.Config; import org.embulk.config.ConfigDefault; import org.embulk.spi.time.Timestamp; import org.embulk.spi.time.TimestampFormatter; import org.embulk.config.Task; import org.embulk.config.TaskSource; import org.embulk.config.ConfigSource; import org.embulk.spi.Column; import org.embulk.spi.Schema; import org.embulk.spi.ColumnVisitor; import org.embulk.spi.FormatterPlugin; import org.embulk.spi.Page; import org.embulk.spi.PageOutput; import org.embulk.spi.PageReader; import org.embulk.spi.FileOutput; import org.embulk.spi.util.LineEncoder; import org.embulk.spi.util.Timestamps; import org.embulk.spi.util.Newline; import org.msgpack.value.Value; import java.util.Map; public class CsvFormatterPlugin implements FormatterPlugin { public enum QuotePolicy { ALL("ALL"), MINIMAL("MINIMAL"), NONE("NONE"); private final String string; private QuotePolicy(String string) { this.string = string; } public String getString() { return string; } } public interface PluginTask extends Task, LineEncoder.EncoderTask, TimestampFormatter.Task { @Config("header_line") @ConfigDefault("true") boolean getHeaderLine(); @Config("delimiter") @ConfigDefault("\",\"") char getDelimiterChar(); @Config("quote") @ConfigDefault("\"\\\"\"") char getQuoteChar(); @Config("quote_policy") @ConfigDefault("\"MINIMAL\"") QuotePolicy getQuotePolicy(); @Config("escape") @ConfigDefault("null") Optional<Character> getEscapeChar(); @Config("null_string") @ConfigDefault("\"\"") String getNullString(); @Config("newline_in_field") @ConfigDefault("\"LF\"") Newline getNewlineInField(); @Config("column_options") @ConfigDefault("{}") Map<String, TimestampColumnOption> getColumnOptions(); } public interface TimestampColumnOption extends Task, TimestampFormatter.TimestampColumnOption { } @Override public void transaction(ConfigSource config, Schema schema, FormatterPlugin.Control control) { PluginTask task = config.loadConfig(PluginTask.class); // validate column_options for (String columnName : task.getColumnOptions().keySet()) { schema.lookupColumn(columnName); // throws SchemaConfigException } control.run(task.dump()); } @Override public PageOutput open(TaskSource taskSource, final Schema schema, FileOutput output) { final PluginTask task = taskSource.loadTask(PluginTask.class); final LineEncoder encoder = new LineEncoder(output, task); final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions()); final char delimiter = task.getDelimiterChar(); final QuotePolicy quotePolicy = task.getQuotePolicy(); final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"'; final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote); final String newlineInField = task.getNewlineInField().getString(); final String nullString = task.getNullString(); // create a file encoder.nextFile(); // write header if (task.getHeaderLine()) { writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString); } return new PageOutput() { private final PageReader pageReader = new PageReader(schema); private final String delimiterString = String.valueOf(delimiter); public void add(Page page) { pageReader.setPage(page); while (pageReader.nextRecord()) { schema.visitColumns(new ColumnVisitor() { public void booleanColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { addValue(Boolean.toString(pageReader.getBoolean(column))); } else { addNullString(); } } public void longColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { addValue(Long.toString(pageReader.getLong(column))); } else { addNullString(); } } public void doubleColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { addValue(Double.toString(pageReader.getDouble(column))); } else { addNullString(); } } public void stringColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { addValue(pageReader.getString(column)); } else { addNullString(); } } public void timestampColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { Timestamp value = pageReader.getTimestamp(column); addValue(timestampFormatters[column.getIndex()].format(value)); } else { addNullString(); } } public void jsonColumn(Column column) { addDelimiter(column); if (!pageReader.isNull(column)) { Value value = pageReader.getJson(column); addValue(value.toJson()); } else { addNullString(); } } private void addDelimiter(Column column) { if (column.getIndex() != 0) { encoder.addText(delimiterString); } } private void addValue(String v) { encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString)); } private void addNullString() { encoder.addText(nullString); } }); encoder.addNewLine(); } } public void finish() { encoder.finish(); } public void close() { encoder.close(); } }; } private void writeHeader(Schema schema, LineEncoder encoder, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString) { String delimiterString = String.valueOf(delimiter); for (Column column : schema.getColumns()) { if (column.getIndex() != 0) { encoder.addText(delimiterString); } encoder.addText(setEscapeAndQuoteValue(column.getName(), delimiter, policy, quote, escape, newline, nullString)); } encoder.addNewLine(); } private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString) { StringBuilder escapedValue = new StringBuilder(); char previousChar = ' '; boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)) ? true : false; for (int i = 0; i < v.length(); i++) { char c = v.charAt(i); if (policy != QuotePolicy.NONE && c == quote) { escapedValue.append(escape); escapedValue.append(c); isRequireQuote = true; } else if (c == '\r') { if (policy == QuotePolicy.NONE) { escapedValue.append(escape); } escapedValue.append(newline); isRequireQuote = true; } else if (c == '\n') { if (previousChar != '\r') { if (policy == QuotePolicy.NONE) { escapedValue.append(escape); } escapedValue.append(newline); isRequireQuote = true; } } else if (c == delimiter) { if (policy == QuotePolicy.NONE) { escapedValue.append(escape); } escapedValue.append(c); isRequireQuote = true; } else { escapedValue.append(c); } previousChar = c; } if (policy != QuotePolicy.NONE && isRequireQuote) { return setQuoteValue(escapedValue.toString(), quote); } else { return escapedValue.toString(); } } private String setQuoteValue(String v, char quote) { StringBuilder sb = new StringBuilder(); sb.append(quote); sb.append(v); sb.append(quote); return sb.toString(); } }