package com.mozilla.grouperfish.batch.transforms;
import com.mozilla.grouperfish.base.Assert;
import com.mozilla.grouperfish.batch.scheduling.Helpers;
import com.mozilla.grouperfish.model.Fail;
import com.mozilla.grouperfish.model.Task;
import com.mozilla.grouperfish.services.api.FileSystem;
import com.mozilla.grouperfish.services.api.FileSystem.FsError;
/**
* A transform can be implemented as a local executable that does not
* know about hadoop or how to talk to HDFS, and instead uses a
* (temporary) local work directory.
*
* Such transforms are made available through the LocalTransform
* wrapper which will copy inputs from HDFS to the local file system,
* and results back to HDFS.
*
* The actual executable will receive a local directory (as an absolute
* path) instead of an HDFS uri.
*/
public class LocalTransform extends ExecutableTransform {
private final FileSystem localFs;
private final boolean needsToCopy;
/**
* A local transform in a distributed environment:
* Task input data is copied from the dfs to the local fs before
* running, and results are copied back afterwards.
*
* @param name The transform executable. It should take the location of the input data
* as its single argument.
* @param dfs The distributed filesystem used by grouperfish (e.g. HDFS).
* @param localFs The local filesystem where working directories for local processes can be created.
*/
public LocalTransform(
final String name,
final FileSystem dfs,
final FileSystem localFs) {
super(name, dfs);
Assert.nonNull(localFs);
this.localFs = localFs;
this.needsToCopy = !dfs.equals(localFs);
}
@Override
protected String taskDirectoryUri(final Task task) throws FsError {
return localFs.uri(Helpers.taskDirectory(task)).substring("file://".length());
}
@Override
public TransformResult run(Task task) throws Fail, InterruptedException {
if (needsToCopy) {
try {
Helpers.copy(Helpers.inputFilename(task), dataFs(), localFs);
Helpers.copy(Helpers.parametersFilename(task), dataFs(), localFs);
}
catch (final Exception e) {
throw Fail.hard(task, "Could not copy data to local fs.", e);
}
}
final TransformResult result = super.run(task);
if (needsToCopy) {
try {
Helpers.copy(Helpers.resultsFilename(task), localFs, dataFs());
}
catch (final Exception e) {
throw Fail.hard(task, "Could not copy results back to distributed fs.", e);
}
}
return result;
}
}