package net.sourceforge.seqware.pipeline.workflowV2.engine.oozie.object; import java.io.File; import java.util.ArrayList; import java.util.List; import net.sourceforge.seqware.common.util.configtools.ConfigTools; import net.sourceforge.seqware.pipeline.modules.GenericCommandRunner; import net.sourceforge.seqware.pipeline.workflowV2.model.AbstractJob; import net.sourceforge.seqware.pipeline.workflowV2.model.Command; import org.apache.commons.io.FilenameUtils; import org.jdom.Element; public class OozieBashJob extends OozieJob { private File jobScript = null; public OozieBashJob(AbstractJob job, String name, String oozie_working_dir, boolean useSge, File seqwareJar, String threadsSgeParamFormat, String maxMemorySgeParamFormat, StringTruncator truncator) { super(job, name, oozie_working_dir, useSge, seqwareJar, threadsSgeParamFormat, maxMemorySgeParamFormat, truncator); } @Override protected Element createSgeElement() { File runnerScript = emitRunnerScript(); File optionsFile = emitOptionsFile(); Element sge = new Element("sge", SGE_XMLNS); add(sge, "script", runnerScript.getAbsolutePath()); add(sge, "options-file", optionsFile.getAbsolutePath()); return sge; } @Override protected Element createJavaElement() { Element java = new Element("java", WF_XMLNS); add(java, "job-tracker", "${jobTracker}"); add(java, "name-node", "${nameNode}"); Element config = add(java, "configuration"); addProp(config, "mapred.job.queue.name", "${queueName}"); addProp(config, "oozie.launcher.mapred.job.map.memory.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapred.job.reduce.memory.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapreduce.map.memory.physical.mb", jobObj.getMaxMemory()); addProp(config, "oozie.launcher.mapreduce.reduce.memory.physical.mb", jobObj.getMaxMemory()); add(java, "main-class", "net.sourceforge.seqware.pipeline.runner.Runner"); String settings = String.format("-D%s='%s'", ConfigTools.SEQWARE_SETTINGS_PROPERTY, ConfigTools.getSettingsFilePath()); add(java, "java-opts", settings); for (String arg : runnerArgs(getJobScript())) { add(java, "arg", arg); } return java; } public static String scriptFileName(String jobName) { return jobName + ".sh"; } private File emitJobScript() { File file = file(scriptsDir, scriptFileName(this.getLongName()), true); writeScript(concat(" ", jobObj.getCommand().getArguments()), file); return file; } private File emitRunnerScript() { File file = file(scriptsDir, runnerFileName(this.getLongName()), true); ArrayList<String> args = generateRunnerLine(); writeScript(concat(" ", args), file); return file; } private List<String> runnerArgs(File jobScript) { List<String> args = runnerMetaDataArgs(); args.add("--module"); args.add("net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"); args.add("--"); Command cmd = jobObj.getCommand(); if (cmd.isGcrSkipIfMissing()) { args.add("--gcr-skip-if-missing"); } if (cmd.isGcrSkipIfOutputExists()) { args.add("--gcr-skip-if-output-exists"); } if (cmd.getGcrOutputFile() != null) { args.add("--gcr-output-file"); args.add(cmd.getGcrOutputFile()); } if (cmd.getOutputLineCapacity() != null) { // if we later decide to separately set stderr and stdout, this will need to be changed args.add("--" + GenericCommandRunner.GCR_STDERR_BUFFERSIZE); args.add(String.valueOf(cmd.getOutputLineCapacity())); args.add("--" + GenericCommandRunner.GCR_STDOUT_BUFFERSIZE); args.add(String.valueOf(cmd.getOutputLineCapacity())); } args.add("--gcr-algorithm"); args.add(jobObj.getAlgo()); args.add("--gcr-command"); args.add(jobScript.getAbsolutePath()); // store permanent copy of full output args.add("--gcr-permanent-storage-prefix"); args.add("generated-scripts/" + FilenameUtils.removeExtension(jobScript.getName())); if (!jobObj.getAnnotations().isEmpty()) { File emitAnnotations = super.emitAnnotations(jobObj.getAnnotations()); args.add("--gcr-annotation-file"); args.add(emitAnnotations.getAbsolutePath()); } return args; } public ArrayList<String> generateRunnerLine() { ArrayList<String> args = new ArrayList<>(); String pathToJRE = createPathToJava(); args.add(pathToJRE + "java"); args.add("-Xmx" + jobObj.getCommand().getMaxMemory()); args.add("-classpath"); args.add(seqwareJarPath); args.add("net.sourceforge.seqware.pipeline.runner.Runner"); args.addAll(runnerArgs(getJobScript())); return args; } /** * @return the jobScript */ public File getJobScript() { if (this.jobScript == null) { this.jobScript = emitJobScript(); } return jobScript; } }