package net.sourceforge.seqware.pipeline.workflowV2.engine.oozie.object; import io.seqware.pipeline.SqwKeys; import java.io.File; import java.util.ArrayList; import java.util.List; import net.sourceforge.seqware.common.util.configtools.ConfigTools; import net.sourceforge.seqware.pipeline.workflowV2.model.AbstractJob; import net.sourceforge.seqware.pipeline.workflowV2.model.SqwFile; import org.jdom.Element; public class OozieProvisionFileJob extends OozieJob { private String metadataOutputPrefix; private String outputDir; private final SqwFile file; public OozieProvisionFileJob(AbstractJob job, SqwFile file, String name, String oozie_working_dir, boolean useSge, File seqwareJar, String slotsSgeParamFormat, String maxMemorySgeParamFormat, StringTruncator truncator) { super(job, name, oozie_working_dir, useSge, seqwareJar, slotsSgeParamFormat, maxMemorySgeParamFormat, truncator); // oozie provision file jobs should only require 2GB, leaving a margin of safety String startMem = ConfigTools.getSettings().get(SqwKeys.SW_CONTROL_NODE_MEMORY.getSettingKey()); job.setMaxMemory(startMem == null ? "3000" : startMem); this.file = file; } @Override protected Element createSgeElement() { File runnerScript = emitRunnerScript(); File optionsFile = emitOptionsFile(); Element sge = new Element("sge", SGE_XMLNS); add(sge, "script", runnerScript.getAbsolutePath()); add(sge, "options-file", optionsFile.getAbsolutePath()); return sge; } @Override protected Element createJavaElement() { Element java = new Element("java", WF_XMLNS); add(java, "job-tracker", "${jobTracker}"); add(java, "name-node", "${nameNode}"); Element config = add(java, "configuration"); addProp(config, "mapred.job.queue.name", "${queueName}"); addProp(config, "oozie.launcher.mapred.job.map.memory.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapred.job.reduce.memory.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapreduce.map.memory.physical.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapreduce.reduce.memory.physical.mb", jobObj.getMaxMemory()); add(java, "main-class", "net.sourceforge.seqware.pipeline.runner.Runner"); String settings = String.format("-D%s='%s'", ConfigTools.SEQWARE_SETTINGS_PROPERTY, ConfigTools.getSettingsFilePath()); add(java, "java-opts", settings); for (String arg : runnerArgs()) { add(java, "arg", arg); } return java; } private File emitRunnerScript() { File localFile = file(scriptsDir, runnerFileName(this.getLongName()), true); ArrayList<String> args = generateRunnerLine(); writeScript(concat(" ", args), localFile); return localFile; } private List<String> runnerArgs() { List<String> args = runnerMetaDataArgs(); /* * So, despite the fact that ProvisionFiles knows the destination of the file, we still need the following since ProvisionFiles * reports just the filename as the destination, and then Runner prepends that file longName with the value of the following. * Madness. * * Based on code from pegasus.object.ProvisionFilesJob.buildCommandString() */ if (file.getOutputPath() == null) { args.add("--metadata-output-file-prefix"); args.add(this.metadataOutputPrefix + "/" + this.outputDir); } args.add("--module"); args.add("net.sourceforge.seqware.pipeline.modules.utilities.ProvisionFiles"); args.add("--"); if (file.isInput()) { args.add("--skip-record-file"); args.add("--input-file"); args.add(file.getSourcePath()); if (file.getOutputPath() != null) { args.add("--output-file"); args.add(file.getOutputPath()); } else { args.add("--output-dir"); args.add(outputDir); } } else { // output file args.add("--input-file-metadata"); args.add(String.format("%s::%s::%s/%s", jobObj.getAlgo(), file.getType(), oozie_working_dir, file.getSourcePath())); if (file.getOutputPath() != null) { args.add("--output-file"); args.add(file.getOutputPath()); } else { args.add("--output-dir"); args.add(metadataOutputPrefix + "/" + outputDir); } if (file.isSkipIfMissing()) { args.add("--skip-if-missing"); } } if (file.isForceCopy()) { args.add("--force-copy"); } if (!file.getAnnotations().isEmpty()) { File emitAnnotations = super.emitAnnotations(file.getAnnotations()); args.add("--annotation-file"); args.add(emitAnnotations.getAbsolutePath()); } return args; } public String getOutputDir() { return outputDir; } public void setOutputDir(String outputDir) { this.outputDir = outputDir; } public String getMetadataOutputPrefix() { return metadataOutputPrefix; } public void setMetadataOutputPrefix(String metadataOutputPrefix) { this.metadataOutputPrefix = metadataOutputPrefix; } public ArrayList<String> generateRunnerLine() { ArrayList<String> args = new ArrayList<>(); String pathToJRE = createPathToJava(); args.add(pathToJRE + "java"); args.add("-Xmx" + jobObj.getCommand().getMaxMemory()); args.add("-classpath"); args.add(seqwareJarPath); args.add("net.sourceforge.seqware.pipeline.runner.Runner"); args.addAll(runnerArgs()); return args; } }