package dk.statsbiblioteket.medieplatform.hadoop;
import dk.statsbiblioteket.medieplatform.autonomous.ConfigConstants;
import dk.statsbiblioteket.util.console.ProcessRunner;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Input is line-number, origignal file path.
* Output is original file path, converted file path
*/
public class ConvertMapper extends Mapper<Text, Text, Text, Text> {
protected final static Logger log = Logger.getLogger(ConvertMapper.class);
public static final String HADOOP_CONVERTER_OUTPUT_EXTENSION_PATH = "hadoop.converter.output.extension";
public static final String HADOOP_CONVERTER_OUTPUT_PATH = "hadoop.converter.output.path";
public static final String HADOOP_CONVERTER_PATH = "hadoop.converter.executable.path";
private String batchID;
private String commandPath;
private File batchFolder;
private String resultExtention;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
batchID = context.getConfiguration().get(ConfigConstants.BATCH_ID);
commandPath = context.getConfiguration().get(HADOOP_CONVERTER_PATH);
String outputFolder = context.getConfiguration().get(HADOOP_CONVERTER_OUTPUT_PATH);
resultExtention = context.getConfiguration().get(HADOOP_CONVERTER_OUTPUT_EXTENSION_PATH);
batchFolder = new File(outputFolder, batchID);
batchFolder.mkdirs();
}
public String getBatchID() {
return batchID;
}
public String getCommandPath() {
return commandPath;
}
public File getBatchFolder() {
return batchFolder;
}
/**
* run command on the given file
*
* @param dataPath the path to the jp2 file
*
* @return the path to the converted file
* @throws java.io.IOException if the execution of the tool failed in some fashion
*/
protected File convert(String dataPath) throws IOException {
File resultPath = getConvertedPath(dataPath);
String[] commandLine = makeCommandLine(dataPath, getCommandPath(), resultPath);
ProcessRunner runner = new ProcessRunner(commandLine);
log.debug("Running command '" + Arrays.deepToString(commandLine) + "'");
Map<String, String> myEnv = new HashMap<String, String>(System.getenv());
runner.setEnviroment(myEnv);
runner.setOutputCollectionByteSize(Integer.MAX_VALUE);
//this call is blocking
runner.run();
if (runner.getReturnCode() == 0) {
return resultPath;
} else {
String message
= "failed to run, returncode:" + runner.getReturnCode() + ", stdOut:" + runner.getProcessOutputAsString() + " stdErr:" + runner
.getProcessErrorAsString();
throw new IOException(message);
}
}
protected String[] makeCommandLine(String dataPath, String commandPath, File resultFile) {
String[] commandBits = commandPath.split(" ");
List<String> commandList = Arrays.asList(commandBits);
ArrayList<String> result = new ArrayList<String>(commandList);
result.addAll(
Arrays.asList(
"-i", dataPath, "-o", resultFile.getAbsolutePath()));
return result.toArray(new String[result.size()]);
}
protected File getConvertedPath(String dataPath) {
return new File(getBatchFolder(), new File(dataPath + resultExtention).getName());
}
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
try {
log.debug("Mapping for '" + key + "' and '" + value + "'");
File converted = convert(value.toString());
context.write(key, new Text(converted.getAbsolutePath()));
} catch (Exception e) {
log.error(e);
throw new IOException(e);
}
}
}