package org.embulk.standards;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.Files;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.FileVisitResult;
import java.nio.file.attribute.BasicFileAttributes;
import com.google.common.collect.ImmutableList;
import com.google.common.base.Optional;
import org.embulk.config.Config;
import org.embulk.config.ConfigInject;
import org.embulk.config.ConfigDefault;
import org.embulk.config.Task;
import org.embulk.config.TaskSource;
import org.embulk.config.ConfigSource;
import org.embulk.config.ConfigDiff;
import org.embulk.config.TaskReport;
import org.embulk.spi.BufferAllocator;
import org.embulk.spi.Exec;
import org.embulk.spi.FileInputPlugin;
import org.embulk.spi.TransactionalFileInput;
import org.embulk.spi.util.InputStreamTransactionalFileInput;
import org.slf4j.Logger;
import java.nio.file.FileVisitOption;
import java.util.EnumSet;
import java.util.Set;
public class LocalFileInputPlugin
implements FileInputPlugin
{
public interface PluginTask
extends Task
{
@Config("path_prefix")
String getPathPrefix();
@Config("last_path")
@ConfigDefault("null")
Optional<String> getLastPath();
@Config("follow_symlinks")
@ConfigDefault("false")
boolean getFollowSymlinks();
List<String> getFiles();
void setFiles(List<String> files);
@ConfigInject
BufferAllocator getBufferAllocator();
}
private final Logger log = Exec.getLogger(getClass());
private final static Path CURRENT_DIR = Paths.get(".").normalize();
@Override
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
// list files recursively
List<String> files = listFiles(task);
log.info("Loading files {}", files);
task.setFiles(files);
// number of processors is same with number of files
int taskCount = task.getFiles().size();
return resume(task.dump(), taskCount, control);
}
@Override
public ConfigDiff resume(TaskSource taskSource,
int taskCount,
FileInputPlugin.Control control)
{
PluginTask task = taskSource.loadTask(PluginTask.class);
control.run(taskSource, taskCount);
// build next config
ConfigDiff configDiff = Exec.newConfigDiff();
// last_path
if (task.getFiles().isEmpty()) {
// keep the last value
if (task.getLastPath().isPresent()) {
configDiff.set("last_path", task.getLastPath().get());
}
} else {
List<String> files = new ArrayList<String>(task.getFiles());
Collections.sort(files);
configDiff.set("last_path", files.get(files.size() - 1));
}
return configDiff;
}
@Override
public void cleanup(TaskSource taskSource,
int taskCount,
List<TaskReport> successTaskReports)
{ }
public List<String> listFiles(PluginTask task)
{
Path pathPrefix = Paths.get(task.getPathPrefix()).normalize();
final Path directory;
final String fileNamePrefix;
if (Files.isDirectory(pathPrefix)) {
directory = pathPrefix;
fileNamePrefix = "";
} else {
fileNamePrefix = pathPrefix.getFileName().toString();
Path d = pathPrefix.getParent();
directory = (d == null ? CURRENT_DIR : d);
}
final ImmutableList.Builder<String> builder = ImmutableList.builder();
final String lastPath = task.getLastPath().orNull();
try {
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
int maxDepth = Integer.MAX_VALUE;
Set<FileVisitOption> opts;
if (task.getFollowSymlinks()) {
opts = EnumSet.of(FileVisitOption.FOLLOW_LINKS);
} else {
opts = EnumSet.noneOf(FileVisitOption.class);
log.info("\"follow_symlinks\" is set false. Note that symbolic links to directories are skipped.");
}
Files.walkFileTree(directory, opts, maxDepth, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
{
if (path.equals(directory)) {
return FileVisitResult.CONTINUE;
} else if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
return FileVisitResult.SKIP_SUBTREE;
} else {
Path parent = path.getParent();
if (parent == null) {
parent = CURRENT_DIR;
}
if (parent.equals(directory)) {
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
return FileVisitResult.CONTINUE;
} else {
return FileVisitResult.SKIP_SUBTREE;
}
} else {
return FileVisitResult.CONTINUE;
}
}
}
@Override
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
{
try {
// Avoid directories from listing.
// Directories are normally unvisited with |FileVisitor#visitFile|, but symbolic links to
// directories are visited like files unless |FOLLOW_LINKS| is set in |Files#walkFileTree|.
// Symbolic links to directories are explicitly skipped here by checking with |Path#toReadlPath|.
if (Files.isDirectory(path.toRealPath())) {
return FileVisitResult.CONTINUE;
}
} catch (IOException ex){
throw new RuntimeException("Can't resolve symbolic link", ex);
}
if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
return FileVisitResult.CONTINUE;
} else {
Path parent = path.getParent();
if (parent == null) {
parent = CURRENT_DIR;
}
if (parent.equals(directory)) {
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
builder.add(path.toString());
return FileVisitResult.CONTINUE;
}
} else {
builder.add(path.toString());
}
return FileVisitResult.CONTINUE;
}
}
});
} catch (IOException ex) {
throw new RuntimeException(String.format("Failed get a list of local files at '%s'", directory), ex);
}
return builder.build();
}
@Override
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
{
final PluginTask task = taskSource.loadTask(PluginTask.class);
final File file = new File(task.getFiles().get(taskIndex));
return new InputStreamTransactionalFileInput(
task.getBufferAllocator(),
new InputStreamTransactionalFileInput.Opener() {
public InputStream open() throws IOException
{
return new FileInputStream(file);
}
})
{
@Override
public void abort()
{ }
@Override
public TaskReport commit()
{
return Exec.newTaskReport();
}
};
}
}