package org.embulk.exec; import java.util.List; import java.util.ArrayList; import javax.validation.constraints.NotNull; import com.google.inject.Inject; import com.google.inject.Injector; import com.google.common.base.Throwables; import org.embulk.config.Config; import org.embulk.config.ConfigDefault; import org.embulk.config.Task; import org.embulk.config.TaskSource; import org.embulk.config.ConfigSource; import org.embulk.config.TaskReport; import org.embulk.exec.SamplingParserPlugin.SampleBufferTask; import org.embulk.plugin.PluginType; import org.embulk.spi.Buffer; import org.embulk.spi.FileInputRunner; import org.embulk.spi.Schema; import org.embulk.spi.Page; import org.embulk.spi.PageOutput; import org.embulk.spi.PageReader; import org.embulk.spi.InputPlugin; import org.embulk.spi.FilterPlugin; import org.embulk.spi.Exec; import org.embulk.spi.ExecSession; import org.embulk.spi.ExecAction; import org.embulk.spi.util.Filters; import org.slf4j.Logger; public class PreviewExecutor { private final Injector injector; private final ConfigSource systemConfig; public interface PreviewTask extends Task { @Config("exec") @ConfigDefault("{}") public ConfigSource getExecConfig(); @Config("in") @NotNull public ConfigSource getInputConfig(); @Config("filters") @ConfigDefault("[]") public List<ConfigSource> getFilterConfigs(); // TODO take preview_sample_rows from exec: config @Config("preview_sample_rows") @ConfigDefault("15") public int getSampleRows(); public TaskSource getInputTask(); public void setInputTask(TaskSource taskSource); } public interface PreviewExecutorTask extends Task { @Config("preview_sample_buffer_bytes") @ConfigDefault("32768") // 32 * 1024 public int getSampleBufferBytes(); } @Inject public PreviewExecutor(Injector injector, @ForSystemConfig ConfigSource systemConfig) { this.injector = injector; this.systemConfig = systemConfig; } public PreviewResult preview(ExecSession exec, final ConfigSource config) { try { return Exec.doWith(exec.forPreview(), new ExecAction<PreviewResult>() { public PreviewResult run() { try (SetCurrentThreadName dontCare = new SetCurrentThreadName("preview")) { return doPreview(config); } } }); } catch (Exception ex) { throw Throwables.propagate(ex.getCause()); } } protected InputPlugin newInputPlugin(PreviewTask task) { return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type")); } protected List<FilterPlugin> newFilterPlugins(PreviewTask task) { return Filters.newFilterPluginsFromConfigSources(Exec.session(), task.getFilterConfigs()); } private PreviewResult doPreview(ConfigSource config) { PreviewTask task = config.loadConfig(PreviewTask.class); InputPlugin inputPlugin = newInputPlugin(task); List<FilterPlugin> filterPlugins = newFilterPlugins(task); if (inputPlugin instanceof FileInputRunner) { // file input runner Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"), createSampleBufferConfigFromExecConfig(task.getExecConfig())); FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample)); return doPreview(task, previewRunner, filterPlugins); } else { return doPreview(task, inputPlugin, filterPlugins); } } private static ConfigSource createSampleBufferConfigFromExecConfig(ConfigSource execConfig) { final PreviewExecutorTask execTask = execConfig.loadConfig(PreviewExecutorTask.class); return Exec.newConfigSource().set("sample_buffer_bytes", execTask.getSampleBufferBytes()); } private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins) { try { input.transaction(task.getInputConfig(), new InputPlugin.Control() { public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount) { Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() { public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas) { Schema inputSchema = filterSchemas.get(0); Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1); PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema); try { for (int taskIndex=0; taskIndex < taskCount; taskIndex++) { try { out = Filters.open(filterPlugins, filterTasks, filterSchemas, out); input.run(inputTask, inputSchema, taskIndex, out); } catch (NoSampleException ex) { if (taskIndex == taskCount - 1) { throw ex; } } } } finally { out.close(); } } }); // program never reaches here because SamplingPageOutput.finish throws an error. throw new NoSampleException("No input records to preview"); } }); throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError"); } catch (PreviewedNoticeError previewed) { return previewed.getPreviewResult(); } } private static class SamplingPageOutput implements PageOutput { private final Logger log = Exec.getLogger(this.getClass()); private final int sampleRows; private final Schema schema; private List<Page> pages; private int recordCount; private PreviewResult res; public SamplingPageOutput(int sampleRows, Schema schema) { this.sampleRows = sampleRows; this.schema = schema; this.pages = new ArrayList<Page>(); this.res = null; } public int getRecordCount() { return recordCount; } @Override public void add(Page page) { pages.add(page); recordCount += PageReader.getRecordCount(page); if (recordCount >= sampleRows) { finish(); } } @Override public void finish() { if (res != null) { log.error("PreviewResult recreation will cause a bug. The plugin must call PageOutput#finish() only once."); } if (recordCount == 0) { throw new NoSampleException("No input records to preview"); } res = new PreviewResult(schema, pages); pages = null; throw new PreviewedNoticeError(res); } @Override public void close() { if (pages != null) { for (Page page : pages) { page.release(); } pages = null; } } } }