package org.embulk.exec;
import java.util.List;
import java.util.Set;
import java.util.ArrayList;
import java.util.concurrent.ExecutionException;
import com.google.common.collect.ImmutableList;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.google.inject.Binder;
import com.google.inject.multibindings.Multibinder;
import org.embulk.plugin.PluginType;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.config.ConfigDiff;
import org.embulk.config.Task;
import org.embulk.config.TaskSource;
import org.embulk.config.ConfigSource;
import org.embulk.config.TaskReport;
import org.embulk.spi.Schema;
import org.embulk.spi.Page;
import org.embulk.spi.Buffer;
import org.embulk.spi.InputPlugin;
import org.embulk.spi.FileInputPlugin;
import org.embulk.spi.ParserPlugin;
import org.embulk.spi.GuessPlugin;
import org.embulk.spi.Exec;
import org.embulk.spi.ExecAction;
import org.embulk.spi.ExecSession;
import org.embulk.spi.FileInput;
import org.embulk.spi.PageOutput;
import org.embulk.spi.TransactionalFileInput;
import org.embulk.spi.FileInputRunner;
import static org.embulk.spi.util.Inputs.each;
public class GuessExecutor
{
private final List<PluginType> defaultGuessPlugins;
private interface GuessExecutorSystemTask
extends Task
{
@Config("guess_plugins")
@ConfigDefault("[]")
public List<PluginType> getGuessPlugins();
}
private interface GuessExecutorTask
extends Task
{
@Config("guess_plugins")
@ConfigDefault("[]")
public List<PluginType> getGuessPlugins();
@Config("exclude_guess_plugins")
@ConfigDefault("[]")
public List<PluginType> getExcludeGuessPlugins();
@Config("guess_sample_buffer_bytes")
@ConfigDefault("32768") // 32 * 1024
public int getSampleBufferBytes();
}
public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
{
Multibinder<PluginType> multibinder = Multibinder.newSetBinder(binder, PluginType.class, ForGuess.class);
multibinder.addBinding().toInstance(type);
}
// Used by FileInputRunner#guess(..)
public static ConfigSource createSampleBufferConfigFromExecConfig(ConfigSource execConfig)
{
final GuessExecutorTask execTask = execConfig.loadConfig(GuessExecutorTask.class);
return Exec.newConfigSource().set("sample_buffer_bytes", execTask.getSampleBufferBytes());
}
@Inject
public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
@ForGuess Set<PluginType> defaultGuessPlugins)
{
GuessExecutorSystemTask systemTask = systemConfig.loadConfig(GuessExecutorSystemTask.class);
ImmutableList.Builder<PluginType> list = ImmutableList.builder();
list.addAll(defaultGuessPlugins);
list.addAll(systemTask.getGuessPlugins());
this.defaultGuessPlugins = list.build();
}
public ConfigDiff guess(ExecSession exec, final ConfigSource config)
{
try {
return Exec.doWith(exec, new ExecAction<ConfigDiff>() {
public ConfigDiff run()
{
try (SetCurrentThreadName dontCare = new SetCurrentThreadName("guess")) {
return doGuess(config);
}
}
});
} catch (ExecutionException ex) {
throw Throwables.propagate(ex.getCause());
}
}
protected InputPlugin newInputPlugin(ConfigSource inputConfig)
{
return Exec.newPlugin(InputPlugin.class, inputConfig.get(PluginType.class, "type"));
}
private ConfigDiff doGuess(ConfigSource config)
{
ConfigSource inputConfig = config.getNested("in");
ConfigSource execConfig = config.getNestedOrGetEmpty("exec");
InputPlugin input = newInputPlugin(inputConfig);
ConfigDiff inputGuessed;
if (input instanceof ConfigurableGuessInputPlugin) {
inputGuessed = ((ConfigurableGuessInputPlugin) input).guess(execConfig, inputConfig);
}
else {
try {
inputGuessed = input.guess(inputConfig);
} catch (AbstractMethodError ex) {
// for backward compatibility with embulk v0.4 interface
throw new UnsupportedOperationException(input.getClass().getSimpleName()+".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
}
}
ConfigDiff wrapped = Exec.newConfigDiff();
wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
return wrapped;
}
// called by FileInputRunner
public ConfigDiff guessParserConfig(Buffer sample, ConfigSource inputConfig, ConfigSource execConfig)
{
List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
GuessExecutorTask task = execConfig.loadConfig(GuessExecutorTask.class);
guessPlugins.addAll(task.getGuessPlugins());
guessPlugins.removeAll(task.getExcludeGuessPlugins());
return guessParserConfig(sample, inputConfig, guessPlugins);
}
private ConfigDiff guessParserConfig(Buffer sample,
ConfigSource config, List<PluginType> guessPlugins)
{
// repeat guessing upto 10 times
ConfigDiff lastGuessed = Exec.newConfigDiff();
for (int i=0; i < 10; i++) {
// include last-guessed config to run guess input
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
ConfigSource guessInputConfig = originalConfig.deepCopy();
guessInputConfig.getNestedOrSetEmpty("parser")
.set("type", "system_guess") // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
.set("guess_plugins", guessPlugins)
.set("orig_config", originalConfig);
// run FileInputPlugin
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
ConfigDiff guessed;
try {
input.transaction(guessInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
{
if (taskCount == 0) {
throw new NoSampleException("No input files to guess");
}
input.run(inputTaskSource, null, 0, new PageOutput() {
@Override
public void add(Page page)
{
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
}
@Override
public void finish() { }
@Override
public void close() { }
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
} catch (GuessedNoticeError error) {
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
}
// merge to the last-guessed config
if (lastGuessed.equals(guessed)) {
// not changed
return lastGuessed;
}
lastGuessed = guessed;
}
return lastGuessed;
}
public static class GuessParserPlugin
implements ParserPlugin
{
private interface PluginTask
extends Task
{
@Config("guess_plugins")
public List<PluginType> getGuessPluginTypes();
@Config("orig_config")
public ConfigSource getOriginalConfig();
}
@Override
public void transaction(ConfigSource config, ParserPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
control.run(task.dump(), null);
}
@Override
public void run(TaskSource taskSource, Schema schema,
FileInput input, PageOutput pageOutput)
{
PluginTask task = taskSource.loadTask(PluginTask.class);
final ConfigSource originalConfig = task.getOriginalConfig();
// get sample buffer
Buffer sample = readSample(input, 32*1024); // TODO get sample size from system config. See also SamplingParserPlugin().
// load guess plugins
ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
for (PluginType guessType : task.getGuessPluginTypes()) {
GuessPlugin guess = Exec.newPlugin(GuessPlugin.class, guessType);
builder.add(guess);
}
List<GuessPlugin> guesses = builder.build();
// run guess plugins
ConfigSource mergedConfig = originalConfig.deepCopy();
ConfigDiff mergedGuessed = Exec.newConfigDiff();
for (int i=0; i < guesses.size(); i++) {
ConfigDiff guessed = guesses.get(i).guess(originalConfig, sample);
guessed = addAssumedDecoderConfigs(originalConfig, guessed);
mergedGuessed.merge(guessed);
mergedConfig.merge(mergedGuessed);
if (!mergedConfig.equals(originalConfig)) {
// config updated
throw new GuessedNoticeError(mergedGuessed);
}
}
throw new GuessedNoticeError(mergedGuessed);
}
private static Buffer readSample(FileInput fileInput, int sampleSize)
{
Buffer sample = Buffer.allocate(sampleSize);
try {
SamplingParserPlugin.readSample(fileInput, sample, 0, sampleSize);
} catch (RuntimeException ex) {
// ignores exceptions because FileDecoderPlugin can throw exceptions
// such as "Unexpected end of ZLIB input stream" if decoder plugin
// is wrongly guessed.
}
if (sample.limit() > 0) {
return sample;
}
throw new NoSampleException("No input buffer to guess");
}
private static class ConfigSourceList extends ArrayList<ConfigSource> { };
private static ConfigDiff addAssumedDecoderConfigs(ConfigSource originalConfig, ConfigDiff guessed)
{
List<ConfigSource> guessedDecoders = guessed.get(ConfigSourceList.class, "decoders", null);
if (guessedDecoders == null) {
return guessed;
} else {
List<ConfigSource> assumedDecoders = originalConfig.get(ConfigSourceList.class, "decoders", new ConfigSourceList());
ImmutableList.Builder<ConfigSource> added = ImmutableList.builder();
for (ConfigSource assuemed : assumedDecoders) {
added.add(Exec.newConfigSource());
}
added.addAll(guessedDecoders);
return guessed.set("decoders", added.build());
}
}
}
public static class GuessedNoticeError
extends Error
{
private final ConfigDiff guessedConfig;
public GuessedNoticeError(ConfigDiff guessedConfig)
{
this.guessedConfig = guessedConfig;
}
public ConfigDiff getGuessedConfig()
{
return guessedConfig;
}
}
}