package org.embulk.standards; import com.google.common.annotations.VisibleForTesting; import org.embulk.config.Config; import org.embulk.config.ConfigDefault; import org.embulk.config.ConfigSource; import org.embulk.config.Task; import org.embulk.config.TaskSource; import org.embulk.spi.Column; import org.embulk.spi.DataException; import org.embulk.spi.Exec; import org.embulk.spi.FileInput; import org.embulk.spi.PageBuilder; import org.embulk.spi.PageOutput; import org.embulk.spi.ParserPlugin; import org.embulk.spi.Schema; import org.embulk.spi.json.JsonParseException; import org.embulk.spi.json.JsonParser; import org.embulk.spi.type.Types; import org.embulk.spi.util.FileInputInputStream; import org.msgpack.value.Value; import org.slf4j.Logger; import java.io.IOException; public class JsonParserPlugin implements ParserPlugin { public interface PluginTask extends Task { @Config("stop_on_invalid_record") @ConfigDefault("false") boolean getStopOnInvalidRecord(); } private final Logger log; public JsonParserPlugin() { this.log = Exec.getLogger(JsonParserPlugin.class); } @Override public void transaction(ConfigSource configSource, Control control) { PluginTask task = configSource.loadConfig(PluginTask.class); control.run(task.dump(), newSchema()); } @VisibleForTesting Schema newSchema() { return Schema.builder().add("record", Types.JSON).build(); // generate a schema } @Override public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output) { PluginTask task = taskSource.loadTask(PluginTask.class); final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord(); final Column column = schema.getColumn(0); // record column try (PageBuilder pageBuilder = newPageBuilder(schema, output); FileInputInputStream in = new FileInputInputStream(input)) { while (in.nextFile()) { try (JsonParser.Stream stream = newJsonStream(in)) { Value value; while ((value = stream.next()) != null) { try { if (!value.isMapValue()) { throw new JsonRecordValidateException( String.format("A Json record must not represent map value but it's %s", value.getValueType().name())); } pageBuilder.setJson(column, value); pageBuilder.addRecord(); } catch (JsonRecordValidateException e) { if (stopOnInvalidRecord) { throw new DataException(String.format("Invalid record: %s", value.toJson()), e); } log.warn(String.format("Skipped record (%s): %s", e.getMessage(), value.toJson())); } } } catch (IOException | JsonParseException e) { throw new DataException(e); } } pageBuilder.finish(); } } private PageBuilder newPageBuilder(Schema schema, PageOutput output) { return new PageBuilder(Exec.getBufferAllocator(), schema, output); } private JsonParser.Stream newJsonStream(FileInputInputStream in) throws IOException { return new JsonParser().open(in); } static class JsonRecordValidateException extends DataException { JsonRecordValidateException(String message) { super(message); } } }