package org.embulk.test;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Module;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.embulk.EmbulkEmbed;
import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigLoader;
import org.embulk.config.ConfigSource;
import org.embulk.config.ModelManager;
import org.embulk.config.TaskReport;
import org.embulk.exec.PreviewResult;
import org.embulk.plugin.PluginClassLoader;
import org.embulk.spi.ColumnConfig;
import org.embulk.spi.FileOutputRunner;
import org.embulk.spi.Page;
import org.embulk.spi.Schema;
import org.embulk.spi.SchemaConfig;
import org.embulk.spi.TempFileException;
import org.embulk.spi.TempFileSpace;
import org.embulk.spi.type.Type;
import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
import static com.google.common.base.Preconditions.checkArgument;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.file.Files.newBufferedReader;
import static java.util.Locale.ENGLISH;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.file.Files.newBufferedReader;
import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
import static org.embulk.test.EmbulkTests.copyResource;
public class TestingEmbulk
implements TestRule
{
public static class Builder
{
private List<Module> modules = new ArrayList<>();
Builder()
{ }
public <T> Builder registerPlugin(final Class<T> iface, final String name, final Class<?> impl)
{
modules.add(new Module() {
public void configure(Binder binder)
{
registerPluginTo(binder, iface, name, impl);
}
});
return this;
}
public TestingEmbulk build()
{
return new TestingEmbulk(this);
}
}
public static Builder builder()
{
return new Builder();
}
private final List<Module> modules;
private EmbulkEmbed embed;
private TempFileSpace tempFiles;
TestingEmbulk(Builder builder)
{
this.modules = ImmutableList.copyOf(builder.modules);
reset();
}
public void reset()
{
destroy();
this.embed = new EmbulkEmbed.Bootstrap()
.addModules(modules)
.overrideModules(TestingBulkLoader.override())
.initializeCloseable();
try {
this.tempFiles = new TempFileSpace(Files.createTempDirectory("embulk-test-temp-").toFile());
}
catch (IOException ex) {
throw new TempFileException(ex);
}
}
public void destroy()
{
if (embed != null) {
embed.destroy();
embed = null;
}
if (tempFiles != null) {
tempFiles.cleanup();
tempFiles = null;
}
}
@Override
public Statement apply(Statement base, Description description)
{
return new EmbulkTestingEmbedWatcher().apply(base, description);
}
private class EmbulkTestingEmbedWatcher
extends TestWatcher
{
@Override
protected void starting(Description description)
{
reset();
}
@Override
protected void finished(Description description)
{
destroy();
}
}
public Path createTempFile(String suffix)
{
return tempFiles.createTempFile(suffix).toPath();
}
public Injector injector()
{
return embed.getInjector();
}
public ConfigLoader configLoader()
{
return embed.newConfigLoader();
}
public ConfigSource newConfig()
{
return configLoader().newConfigSource();
}
public ConfigSource loadYamlResource(String name)
{
return configLoader()
.fromYamlString(EmbulkTests.readResource(name));
}
private static final List<String> SUPPORTED_TYPES = ImmutableList.of(
"boolean", "long", "double", "string", "timestamp", "json"
);
public static interface RunResult
{
ConfigDiff getConfigDiff();
List<Throwable> getIgnoredExceptions();
Schema getInputSchema();
Schema getOutputSchema();
List<TaskReport> getInputTaskReports();
List<TaskReport> getOutputTaskReports();
}
public class InputBuilder
{
private ConfigSource inConfig = null;
private List<ConfigSource> filtersConfig = ImmutableList.of();
private ConfigSource execConfig = newConfig();
private Path outputPath = null;
private InputBuilder()
{ }
public InputBuilder in(ConfigSource inConfig)
{
checkNotNull(inConfig, "inConfig");
this.inConfig = inConfig.deepCopy();
return this;
}
public InputBuilder filters(List<ConfigSource> filtersConfig)
{
checkNotNull(filtersConfig, "filtersConfig");
ImmutableList.Builder<ConfigSource> builder = ImmutableList.builder();
for (ConfigSource filter : filtersConfig) {
builder.add(filter.deepCopy());
}
this.filtersConfig = builder.build();
return this;
}
public InputBuilder exec(ConfigSource execConfig)
{
checkNotNull(execConfig, "execConfig");
this.execConfig = execConfig.deepCopy();
return this;
}
public InputBuilder outputPath(Path outputPath)
{
checkNotNull(outputPath, "outputPath");
this.outputPath = outputPath;
return this;
}
public ConfigDiff guess()
{
checkState(inConfig != null, "in config must be set");
// config = {exec: execConfig, in: inConfig}
ConfigSource config = newConfig()
.set("exec", execConfig)
.set("in", inConfig)
.set("filters", filtersConfig);
// embed.guess returns GuessExecutor.ConfigDiff
return embed.guess(config).getNested("in");
}
/**
* This method returns PreviewResult.
*
* @return PreviewResult returns the result by PreviewExecutor
* @throws IOException
*/
public PreviewResult preview()
throws IOException
{
checkState(inConfig != null, "inputPath must be set");
checkState(outputPath != null, "outputPath must be set");
// Execute preview to get PreviewResult
ConfigSource previewConfig = newConfig()
.set("exec", execConfig.set("min_output_tasks", 1)) // exec: config
.set("in", inConfig)
.set("filters", filtersConfig);
PreviewResult result = embed.preview(previewConfig);
PreviewResultInputPlugin.setPreviewResult(result);
String fileName = outputPath.getFileName().toString();
checkArgument(fileName.endsWith(".csv"), "outputPath must end with .csv");
Path dir = outputPath.getParent().resolve(fileName.substring(0, fileName.length() - 4));
Files.createDirectories(dir);
// Execute run to write PreviewResult's Page objects to output files
ConfigSource runConfig = newConfig()
.set("in", newConfig().set("type", "preview_result")) // in: config
.set("out", newConfig() // out: config
.set("type", "file")
.set("path_prefix", dir.resolve("fragments_").toString())
.set("file_ext", "csv")
.set("formatter", newConfig()
.set("type", "csv")
.set("header_line", false)
.set("newline", "LF")));
embed.run(runConfig);
return buildPreviewResultWithOutput(result, dir, outputPath);
}
public RunResult run()
throws IOException
{
checkState(inConfig != null, "in config must be set");
checkState(outputPath != null, "outputPath must be set");
String fileName = outputPath.getFileName().toString();
checkArgument(fileName.endsWith(".csv"), "outputPath must end with .csv");
Path dir = outputPath.getParent().resolve(fileName.substring(0, fileName.length() - 4));
Files.createDirectories(dir);
// exec: config
execConfig.set("min_output_tasks", 1);
// out: config
ConfigSource outConfig = newConfig()
.set("type", "file")
.set("path_prefix", dir.resolve("fragments_").toString())
.set("file_ext", "csv")
.set("formatter", newConfig()
.set("type", "csv")
.set("header_line", false)
.set("newline", "LF"));
// combine exec:, out: and in:
ConfigSource config = newConfig()
.set("exec", execConfig)
.set("in", inConfig)
.set("filters", filtersConfig)
.set("out", outConfig);
// embed.run returns TestingBulkLoader.TestingExecutionResult because
// LoaderState.buildExecuteResultWithWarningException is overridden.
RunResult result = (RunResult) embed.run(config);
return buildRunResultWithOutput(result, dir, outputPath);
}
}
public class ParserBuilder
{
private ConfigSource parserConfig = newConfig();
private ConfigSource execConfig = newConfig();
private Path inputPath = null;
private Path outputPath = null;
private ParserBuilder()
{ }
public ParserBuilder parser(ConfigSource parserConfig)
{
checkNotNull(parserConfig, "parserConfig");
this.parserConfig = parserConfig.deepCopy();
return this;
}
public ParserBuilder exec(ConfigSource execConfig)
{
checkNotNull(execConfig, "execConfig");
this.execConfig = execConfig.deepCopy();
return this;
}
public ParserBuilder inputPath(Path inputPath)
{
checkNotNull(inputPath, "inputPath");
this.inputPath = inputPath;
return this;
}
public ParserBuilder inputResource(String resourceName)
throws IOException
{
checkNotNull(resourceName, "resourceName");
Path path = createTempFile("csv");
copyResource(resourceName, path);
return inputPath(path);
}
public ParserBuilder outputPath(Path outputPath)
{
checkNotNull(outputPath, "outputPath");
this.outputPath = outputPath;
return this;
}
public ConfigDiff guess()
{
checkState(inputPath != null, "inputPath must be set");
// in: config
ConfigSource inConfig = newConfig()
.set("type", "file")
.set("path_prefix", inputPath.toAbsolutePath().toString());
inConfig.set("parser", parserConfig);
// config = {exec: execConfig, in: inConfig}
ConfigSource config = newConfig()
.set("exec", execConfig)
.set("in", inConfig);
// embed.guess calls GuessExecutor and returns ConfigDiff
return embed.guess(config).getNested("in").getNested("parser");
}
public RunResult run()
throws IOException
{
checkState(parserConfig != null, "parser config must be set");
checkState(inputPath != null, "inputPath must be set");
checkState(outputPath != null, "outputPath must be set");
String fileName = outputPath.getFileName().toString();
checkArgument(fileName.endsWith(".csv"), "outputPath must end with .csv");
Path dir = outputPath.getParent().resolve(fileName.substring(0, fileName.length() - 4));
Files.createDirectories(dir);
// in: config
ConfigSource inConfig = newConfig()
.set("type", "file")
.set("path_prefix", inputPath.toAbsolutePath().toString());
inConfig.set("parser", parserConfig);
// exec: config
execConfig.set("min_output_tasks", 1);
// out: config
ConfigSource outConfig = newConfig()
.set("type", "file")
.set("path_prefix", dir.resolve("fragments_").toString())
.set("file_ext", "csv")
.set("formatter", newConfig()
.set("type", "csv")
.set("header_line", false)
.set("newline", "LF"));
// config = {exec: execConfig, in: inConfig, out: outConfig}
ConfigSource config = newConfig()
.set("exec", execConfig)
.set("in", inConfig)
.set("out", outConfig);
// embed.run returns TestingBulkLoader.TestingExecutionResult because
// LoaderState.buildExecuteResultWithWarningException is overridden.
RunResult result = (RunResult) embed.run(config);
return buildRunResultWithOutput(result, dir, outputPath);
}
}
public class OutputBuilder
{
private ConfigSource outConfig = null;
private ConfigSource execConfig = newConfig();
private Path inputPath;
private SchemaConfig inputSchema;
public OutputBuilder()
{ }
public OutputBuilder out(ConfigSource outConfig)
{
checkNotNull(outConfig, "outConfig");
this.outConfig = outConfig;
return this;
}
public OutputBuilder exec(ConfigSource execConfig)
{
checkNotNull(execConfig, "execConfig");
this.execConfig = execConfig;
return this;
}
public OutputBuilder inputPath(Path inputPath)
{
checkNotNull(inputPath, "inputPath");
this.inputPath = inputPath;
return this;
}
public OutputBuilder inputResource(String resourceName)
throws IOException
{
checkNotNull(resourceName, "resourceName");
Path path = createTempFile("csv");
copyResource(resourceName, path);
return inputPath(path);
}
public OutputBuilder inputSchema(SchemaConfig inputSchema)
{
checkNotNull(inputSchema, "inputSchema");
this.inputSchema = inputSchema;
return this;
}
public RunResult run()
throws IOException
{
checkState(outConfig != null, "out config must be set");
checkState(inputPath != null, "inputPath must be set");
String fileName = inputPath.toAbsolutePath().toString();
checkArgument(fileName.endsWith(".csv"), "inputPath must end with .csv");
// exec: config
execConfig.set("min_output_tasks", 1);
// in: config
ConfigSource inConfig = newConfig()
.set("type", "file")
.set("path_prefix", fileName)
.set("parser", newParserConfig());
// config = {exec: execConfig, in: inConfig, out: outConfig}
ConfigSource config = newConfig()
.set("exec", execConfig)
.set("in", inConfig)
.set("out", outConfig);
// embed.run returns TestingBulkLoader.TestingExecutionResult because
// LoaderState.buildExecuteResultWithWarningException is overridden.
return (RunResult) embed.run(config);
}
private ConfigSource newParserConfig()
{
return newConfig()
.set("charset", "UTF-8")
.set("newline", "LF")
.set("type", "csv")
.set("delimiter", ",")
.set("quote", "\"")
.set("escape", "\"")
.set("columns", newSchemaConfig());
}
private SchemaConfig newSchemaConfig()
{
ImmutableList.Builder<ColumnConfig> schema = ImmutableList.builder();
try (BufferedReader reader = newBufferedReader(inputPath, UTF_8)) {
for (String column : reader.readLine().split(",")) {
ColumnConfig columnConfig = newColumnConfig(column);
if (columnConfig != null) {
schema.add(columnConfig);
}
}
return new SchemaConfig(schema.build());
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
private ColumnConfig newColumnConfig(String column)
{
String[] tuple = column.split(":", 2);
checkArgument(tuple.length == 2, "tuple must be a pair of column name and type");
String type = tuple[1];
if (!SUPPORTED_TYPES.contains(type)) {
throw new IllegalArgumentException(String.format(ENGLISH,
"Unknown column type %s. Supported types are boolean, long, double, string, timestamp and json: %s",
tuple[1], column));
}
return new ColumnConfig(newConfig()
.set("name", tuple[0])
.set("type", type));
}
}
private PreviewResult buildPreviewResultWithOutput(PreviewResult result, Path outputDir, Path outputPath)
throws IOException
{
copyToPath(outputDir, outputPath);
return result;
}
private RunResult buildRunResultWithOutput(RunResult result, Path outputDir, Path outputPath)
throws IOException
{
copyToPath(outputDir, outputPath);
return result;
}
private void copyToPath(Path outputDir, Path outputPath)
throws IOException
{
try (OutputStream out = Files.newOutputStream(outputPath)) {
List<Path> fragments = new ArrayList<Path>();
try (DirectoryStream<Path> stream = Files.newDirectoryStream(outputDir, "fragments_*.csv")) {
for (Path fragment : stream) {
fragments.add(fragment);
}
}
Collections.sort(fragments);
for (Path fragment : fragments) {
try (InputStream in = Files.newInputStream(fragment)) {
ByteStreams.copy(in, out);
}
}
}
}
public InputBuilder inputBuilder()
{
return new InputBuilder();
}
public ParserBuilder parserBuilder()
{
return new ParserBuilder();
}
public OutputBuilder outputBuilder()
{
return new OutputBuilder();
}
public RunResult runParser(ConfigSource parserConfig, Path inputPath, Path outputPath)
throws IOException
{
return parserBuilder()
.parser(parserConfig)
.inputPath(inputPath)
.outputPath(outputPath)
.run();
}
public RunResult runParser(ConfigSource parserConfig, Path inputPath, Path outputPath, ConfigSource execConfig)
throws IOException
{
return parserBuilder()
.parser(parserConfig)
.inputPath(inputPath)
.outputPath(outputPath)
.exec(execConfig)
.run();
}
public RunResult runInput(ConfigSource inConfig, Path outputPath)
throws IOException
{
return inputBuilder()
.in(inConfig)
.outputPath(outputPath)
.run();
}
public RunResult runInput(ConfigSource inConfig, Path outputPath, ConfigSource execConfig)
throws IOException
{
return inputBuilder()
.exec(execConfig)
.in(inConfig)
.outputPath(outputPath)
.run();
}
public RunResult runOutput(ConfigSource outConfig, Path inputPath)
throws IOException
{
return outputBuilder()
.out(outConfig)
.inputPath(inputPath)
.run();
}
public RunResult runOutput(ConfigSource outConfig, Path inputPath, ConfigSource execConfig)
throws IOException
{
return outputBuilder()
.exec(execConfig)
.out(outConfig)
.inputPath(inputPath)
.run();
}
public ConfigDiff guessInput(ConfigSource inSeedConfig)
{
return inputBuilder()
.in(inSeedConfig)
.guess();
}
public ConfigDiff guessInput(ConfigSource inSeedConfig, ConfigSource execConfig)
{
return inputBuilder()
.exec(execConfig)
.in(inSeedConfig)
.guess();
}
public ConfigDiff guessParser(Path inputPath)
{
return parserBuilder()
.inputPath(inputPath)
.guess();
}
public ConfigDiff guessParser(ConfigSource parserSeedConfig, Path inputPath)
{
return parserBuilder()
.parser(parserSeedConfig)
.inputPath(inputPath)
.guess();
}
public ConfigDiff guessParser(ConfigSource parserSeedConfig, Path inputPath, ConfigSource execConfig)
{
return parserBuilder()
.parser(parserSeedConfig)
.inputPath(inputPath)
.exec(execConfig)
.guess();
}
// TODO add runFilter(ConfigSource filterConfig, Path inputPath, Path outputPath) where inputPath is a path to
// a CSV file whose column types can be naturally guessed using csv guess plugin.
}