package org.molgenis.compute.commandline;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateException;
import org.apache.commons.io.FileUtils;
import org.molgenis.compute.design.ComputeParameter;
import org.molgenis.compute.design.ComputeProtocol;
import org.molgenis.compute.design.WorkflowElement;
import org.molgenis.compute.runtime.ComputeTask;
import org.molgenis.framework.ui.FreemarkerView;
import org.molgenis.util.Tuple;
import java.io.*;
import java.util.*;
//import nl.vu.psy.rite.exceptions.RiteException;
//import nl.vu.psy.rite.operations.Recipe;
//import nl.vu.psy.rite.operations.Step;
//import nl.vu.psy.rite.operations.implementations.bash.BashOperation;
//import nl.vu.psy.rite.persistence.mongo.MongoRecipeStore;
public class ComputeCommandLine
{
//now, the default scheduler is PBS
public static final String SCHEDULER_BSUB = "BSUB";
public static final String SCHEDULER_PBS = "PBS";
//we find out scheduler during jobs generation and then use it in submit generator
private String currentScheduler = "null";
protected ComputeBundle computeBundle;
protected File parametersfile, workflowfile, worksheetfile, protocoldir, workingdir;
protected String outputdir, templatedir, backend;
protected Hashtable<String, Object> userValues = new Hashtable<String, Object>();
private List<ComputeTask> tasks = new ArrayList<ComputeTask>();
private Worksheet worksheet;
private void generateJobs(LinkedHashMap<String, String> argsMap) throws Exception
{
computeBundle = new ComputeBundleFromDirectory(this);
// Add our parsed command line parameters 'as is' to the bundle:
for (String p : argsMap.keySet())
{
if (!p.equals("mcdir")) // <- Conficts with "McDir"...
{
ComputeParameter cp = new ComputeParameter();
cp.setName(p);
cp.setDefaultValue(argsMap.get(p));
computeBundle.addComputeParameter(cp);
}
}
//
// Append the commandline params to the list of ComputeParamters, so we
// can use them in Protocols.
//
ComputeParameter McDir = new ComputeParameter();
McDir.setName("McDir");
McDir.setDefaultValue(userValues.get("McDir").toString());
computeBundle.addComputeParameter(McDir);
ComputeParameter McId = new ComputeParameter();
McId.setName("McId");
McId.setDefaultValue(userValues.get("McId").toString());
computeBundle.addComputeParameter(McId);
ComputeParameter McParameters = new ComputeParameter();
McParameters.setName("McParameters");
McParameters.setDefaultValue(userValues.get("McParameters").toString());
computeBundle.addComputeParameter(McParameters);
ComputeParameter McProtocols = new ComputeParameter();
McProtocols.setName("McProtocols");
McProtocols.setDefaultValue(userValues.get("McProtocols").toString());
computeBundle.addComputeParameter(McProtocols);
ComputeParameter McTemplates = new ComputeParameter();
McTemplates.setName("McTemplates");
McTemplates.setDefaultValue(userValues.get("McTemplates").toString());
computeBundle.addComputeParameter(McTemplates);
ComputeParameter McWorkflow = new ComputeParameter();
McWorkflow.setName("McWorkflow");
McWorkflow.setDefaultValue(userValues.get("McWorkflow").toString());
computeBundle.addComputeParameter(McWorkflow);
ComputeParameter McWorksheet = new ComputeParameter();
McWorksheet.setName("McWorksheet");
McWorksheet.setDefaultValue(userValues.get("McWorksheet").toString());
computeBundle.addComputeParameter(McWorksheet);
ComputeParameter McScripts = new ComputeParameter();
McScripts.setName("McScripts");
McScripts.setDefaultValue(userValues.get("McScripts").toString());
computeBundle.addComputeParameter(McScripts);
this.worksheet = new Worksheet(computeBundle);
List<ComputeProtocol> protocollist = computeBundle.getComputeProtocols();
// create map of all workflow elements (needed for dependencies)
Map<String, WorkflowElement> wfeMap = new LinkedHashMap<String, WorkflowElement>();
for (WorkflowElement wfe : computeBundle.getWorkflowElements())
{
wfeMap.put(wfe.getName(), wfe);
}
// process workflow elements
System.out.println("Starting script generation for PBS clusters.");
for (WorkflowElement wfe : computeBundle.getWorkflowElements())
{
print("Starting generation of workflow element: " + wfe.getName());
// get protocol and find its targets
ComputeProtocol protocol = findProtocol(wfe.getProtocol_Name(), protocollist);
// get template + insert header and footer
String scripttemplate = addHeaderFooter(protocol.getScriptTemplate(), protocol.getScriptInterpreter());
// fold and reduce worksheet
// String[] targets = parseHeaderElement(FOREACH, scripttemplate);
List<String> targets = protocol.getIterateOver_Name();
if (0 == targets.size())
{
targets.add("line_number");
}
// task_number will be added by folding
List<Tuple> folded = Worksheet.foldWorksheet(this.worksheet.worksheet,
this.computeBundle.getComputeParameters(), targets);
// each element of folded worksheet produces one
// protocolApplication (i.e. a script)
String schedulerName = folded.get(0).getString("scheduler");
if(schedulerName.equalsIgnoreCase(SCHEDULER_BSUB))
{
currentScheduler = SCHEDULER_BSUB;
//change walltime format hh:mm:ss -> hh:mm
String strWalltime = protocol.getWalltime();
int lastDots = strWalltime.lastIndexOf(":");
strWalltime = strWalltime.substring(0, lastDots);
protocol.setWalltime(strWalltime);
}
else
{
//default is PBS
currentScheduler = SCHEDULER_PBS;
}
for (Tuple work : folded)
{
// fill template with work and put in script
ComputeTask job = new ComputeTask();
job.setName(this.createJobName(wfe, work));
job.setInterpreter(protocol.getScriptInterpreter() == null ? worksheet.getdefaultvalue("interpreter")
: protocol.getScriptInterpreter());
// if walltime, cores, mem not specified in protocol, then use
// value from worksheet
String walltime = (protocol.getWalltime() == null ? worksheet.getdefaultvalue("walltime") : protocol
.getWalltime());
// job.setWalltime(walltime);
work.set("walltime", walltime);
// String queue = (protocol.getClusterQueue() == null ?
// worksheet.getdefaultvalue("clusterQueue")
// : protocol.getClusterQueue());
// FIXME: Here I make queue dependent on walltime and memory per
// node..., which is specifically for Millipede..
// This you can find out on the cluster
// int m = Integer.parseInt(mem); // memory in GB?
// queue = (4 < m && 2 < cores ? "quads" : "nodes");
// int wt_h = Integer.parseInt(walltime.substring(0, 2));
// int wt_m = Integer.parseInt(walltime.substring(3, 5));
// int wt_s = Integer.parseInt(walltime.substring(6, 8));
// if (lessOrEqualThan(24, wt_h, wt_m, wt_s)) {
// done
// } else if (lessOrEqualThan(72, wt_h, wt_m, wt_s)) {
// queue = queue + "medium";
// } else if (lessOrEqualThan(240, wt_h, wt_m, wt_s)) {
// queue = queue + "long";
// } else
// throw new Exception("Walltime too large: " + walltime +
// ". Maximum is 240h.");
// work.set("clusterQueue", queue);
// done with FIXME
Integer cores = (protocol.getCores() == null ? Integer.parseInt(worksheet.getdefaultvalue("cores"))
: protocol.getCores());
work.set("cores", cores);
String mem = (protocol.getMem() == null ? worksheet.getdefaultvalue("mem").toString() : protocol
.getMem().toString());
if(schedulerName.equalsIgnoreCase(SCHEDULER_BSUB))
{
//for BSBS, the memory is specified in KB
mem = Integer.parseInt(mem) * 1024 * 1024 + "";
work.set("mem", mem);
}
else
{
//the default scheduler is PBS, gb is added to the memory size
work.set("mem", mem + "gb");
}
// set jobname. If a job starts/completes, we put this in a
// logfile
work.set("jobname", job.getName());
// record in worksheet job names for each element
// (in column with same name as wfe)
// this.worksheet.set(targets, work, wfe.getName(),
// job.getName());
// retrieve previousSteps
for (String previousStep : wfe.getPreviousSteps_Name())
{
// get the WorkflowElement of previous step
WorkflowElement previousWfe = wfeMap.get(previousStep);
ComputeProtocol wfeProtocol = findProtocol(previousWfe.getProtocol_Name(),
computeBundle.getComputeProtocols());
// see how long the list is
int size = 1;
for (String target : wfeProtocol.getIterateOver_Name())
{
if (work.getObject(target) instanceof List)
{
size = work.getList(target).size();
// BUG? What if user puts lists of different length
// in worksheet?
break;
}
}
// we calculate dependencies
Set<String> dependencies = new LinkedHashSet<String>();
for (int i = 0; i < size; i++)
{
String jobName = previousWfe.getName();
for (String target : wfeProtocol.getIterateOver_Name())
{
// if (work.getList(target).size() > 1) {
// replace target by number
int i_fix = Math.min(work.getList(target).size() - 1, i);
jobName += "_" + work.getList(target).get(i_fix);
// jobName += "_XXX" + i;
// } else {
// jobName += "_" + i;// work.getString(target);
// jobName += "_YYY";
// }
}
dependencies.add(stepnr(previousWfe.getName()) + jobName);
}
job.getPrevSteps_Name().addAll(dependencies);
}
// add the script
job.setComputeScript(filledtemplate(scripttemplate, work, job.getName()));
this.tasks.add(job);
print("Generated " + job.getName() + ", depending on " + job.getPrevSteps_Name());
}
}
// UNCOMMENT THE FOLLOWING CODE IF YOU WANT: as a last step add a job
// that writes a "pipeline.finished" file
/*
* ComputeTask job = new ComputeTask();
* job.setName(getworkflowfilename()); job.setInterpreter("bash");
*
* // if walltime, cores, mem not specified in protocol, then use value
* from worksheet job.setWalltime("00:00:10"); job.setCores(1);
* job.setMem(1);
*
* // final job is dependent on all other jobs Set<String> dependencies
* = new HashSet<String>(); for (ComputeTask cj : this.jobs) {
* dependencies.add(cj.getName()); }
* job.getPrevSteps_Name().addAll(dependencies);
*
* // add the script job.setComputeScript("touch $PBS_O_WORKDIR" +
* File.separator + getworkflowfilename() + ".finished");
*
* this.jobs.add(job);
*
* // print("compute parameters: " +
* computeBundle.getComputeParameters().toString()); //
* print("user parameters: " + computeBundle.getUserParameters()); //
* print("full worksheet: " + computeBundle.getWorksheet());
*/
}
private String addHeaderFooter(String scripttemplate, String interpreter)
{
// THIS SHOULD BE REPLACED WITH TEMPLATES:
String ls = System.getProperty("line.separator");
scripttemplate = "<#include \"Header.ftl\"/>" + scripttemplate + ls + "<#include \"Footer.ftl\"/>";
// + "<#include \"Macros.ftl\"/>" + ls
// + "<@begin/>" + ls
// + (interpreter.equalsIgnoreCase("R") ? "<@Rbegin/>" + ls : "")
// + scripttemplate
// + (interpreter.equalsIgnoreCase("R") ? "<@Rend/>" + ls : "")
// + "<@end/>" + ls;
return (scripttemplate);
}
private String stepnr(String wfeName)
{
// retrieve step number of wfeName in total workflow
List<WorkflowElement> workflow = computeBundle.getWorkflowElements();
for (int i = 0; i < workflow.size(); i++)
{
if (wfeName.equalsIgnoreCase(workflow.get(i).getName()))
{
return ("s" + (i < 10 ? "0" : "") + i + "_");
}
}
return null;
}
private String createJobName(WorkflowElement wfe, Tuple tuple)
{
String jobName = wfe.getName();
ComputeProtocol wfeProtocol = findProtocol(wfe.getProtocol_Name(), computeBundle.getComputeProtocols());
// in case no targets, we number
List<String> targets = wfeProtocol.getIterateOver_Name();
if (0 == targets.size())
{
jobName += "_" + tuple.getString("line_number");
}
// // otherwise use targets
else
{
for (String target : targets)
{
jobName += "_" + tuple.getString(target);
}
}
return stepnr(wfe.getName()) + jobName;
}
public String filledtemplate(String scripttemplate, Tuple work, String jobname) throws IOException,
TemplateException
{
// first create map
Map<String, Object> parameters = new HashMap<String, Object>();
// add the helper
parameters.put("freemarkerHelper", new FreemarkerHelper(this.computeBundle));
parameters.put("parameters", work);
parameters.put("workflowElements", this.computeBundle.getWorkflowElements());
for (String field : work.getFields())
{
parameters.put(field, work.getObject(field));
}
// System.out.println(">> parameters > " + parameters);
// System.out.println(">> script template > " + scripttemplate);
Configuration cfg = new Configuration();
// add path to loader
// FileTemplateLoader ftl1 = new FileTemplateLoader(this.workflowdir);
cfg.setDirectoryForTemplateLoading(this.protocoldir);
Template template = new Template(jobname, new StringReader(scripttemplate), cfg);
StringWriter filledtemplate = new StringWriter();
template.process(parameters, filledtemplate);
// put debug info in script
// String script = "\n#####\n";
// script = script + "## The following ${parameters} are values:\n";
// script = script + "## - " + worksheet.foldon + " " +
// worksheet.getConstants() + "\n";
// script = script +
// "## The following parameters are lists, <#list parameters as p>${p}</#list> \n";
// script = script + "## - " + worksheet.list + "\n";
// script = script + "#####\n\n";
// script = script + filledtemplate.toString();
return filledtemplate.toString();
}
private ComputeProtocol findProtocol(String protocol_name, List<ComputeProtocol> protocollist)
{
for (ComputeProtocol c : protocollist)
{
if (c.getName().equalsIgnoreCase(protocol_name)) return c;
}
return null;
}
// public String[] parseHeaderElement(String header, String protocol)
// {
// int posInput = protocol.indexOf(header) + header.length();
// int posNewLine = protocol.indexOf("\n", posInput);
// String list = protocol.substring(posInput, posNewLine);
//
// String[] elements = list.split(",");
//
// for (int i = 0; i < elements.length; i++)
// {
// elements[i] = elements[i].trim();
// }
//
// return elements;
// }
public static void main(String[] args)
{
// Parse command line arguments
LinkedHashMap<String, String> argsMap = ArgumentParser.parseParameters(args);
ComputeCommandLine ccl = new ComputeCommandLine();
ccl.workflowfile = new File(argsMap.get("workflow"));
ccl.protocoldir = new File(argsMap.get("protocols"));
ccl.parametersfile = new File(argsMap.get("parameters"));
ccl.worksheetfile = new File(argsMap.get("worksheet"));
ccl.outputdir = argsMap.get("outputdir");
// Put all parameters 'as is' in map
for (String p : argsMap.keySet())
ccl.userValues.put(p, argsMap.get(p));
// But let's also ensure backward compatability:
ccl.userValues.put("McDir", argsMap.get("mcdir"));
ccl.userValues.put("McId", argsMap.get("id"));
ccl.userValues.put("McParameters", argsMap.get("parameters"));
ccl.userValues.put("McProtocols", argsMap.get("protocols"));
ccl.userValues.put("McTemplates", "N/A");
ccl.userValues.put("McWorkflow", argsMap.get("workflow"));
ccl.userValues.put("McWorksheet", argsMap.get("worksheet"));
ccl.userValues.put("McScripts", argsMap.get("outputdir"));
ccl.backend = "cluster";
ccl.workingdir = new File(".");
try
{
ccl.generateJobs(argsMap);
ccl.copyWorksheetAndWorkflow();
ccl.generateScripts();
// ccl.generateRite();
}
catch (Exception e)
{
e.printStackTrace();
System.exit(1);
}
print("Finished with generation!");
System.exit(0);
}
private void copyWorksheetAndWorkflow()
{
try
{
for (File f : Arrays.asList(this.workflowfile, this.worksheetfile, this.parametersfile))
{
String sourcepath = f.getCanonicalPath();
// make this part windows compentible
String fileSeparatorPatternString;
if (File.separator.equalsIgnoreCase("/"))
{
fileSeparatorPatternString = "/";
}
else
{
fileSeparatorPatternString = "\\\\";
}
String[] filenamelist = sourcepath.split(fileSeparatorPatternString);
String filename = filenamelist[filenamelist.length - 1];
// Files.copy(f, new File(this.outputdir + File.separator +
// filename));
String destinationpath = new String(this.outputdir + File.separator + filename);
destinationpath = destinationpath.replaceAll(File.separator + "+", File.separator);
if (!destinationpath.equals(sourcepath))
{
FileUtils.copyFile(f, new File(this.outputdir + File.separator + filename));
}
}
}
catch (IOException e1)
{
e1.printStackTrace();
}
}
private String getworkflowfilename()
{
// make this part windows compentible
String fileSeparatorPatternString;
if (File.separator.equals("/"))
{
fileSeparatorPatternString = "/";
}
else
{
fileSeparatorPatternString = "\\\\";
}
String[] workflowfilenamelist = this.workflowfile.toString().split(fileSeparatorPatternString);
String f = workflowfilenamelist[workflowfilenamelist.length - 1];
// replace dots with underscore, because qsub does not allow for dots in
// job names or so...
f = f.replace('.', '_');
return f;
}
/**
* generate for rite
*
* @throws RiteException
**/
/*
* private void generateRite() throws RiteException { MongoRecipeStore msr =
* new MongoRecipeStore("localhost", 27017, "testsh", "recipes");
*
* for (ComputeTask job : this.jobs) { Recipe r = new Recipe(job.getName());
* Step s = new Step(""); BashOperation bsho = new BashOperation();
* bsho.setScript(job.getComputeScript()); s.add(bsho); r.add(s);
*
* if (0 < job.getPrevSteps_Name().size()) { for (String previous :
* job.getPrevSteps_Name()) { r.addDependency(previous); } }
*
* msr.putRecipe(r); }
*
* /* r = new Recipe("hello world2"); r.addDependency("hello world"); s =
* new Step("bla"); bsho = new BashOperation();
* bsho.setScript("echo Hello to you!"); s.add(bsho); r.add(s);
*
* msr.putRecipe(r);
*/
/* } */
/** Convert all compute jobs into scripts + submit.sh */
private void generateScripts()
{
new File(outputdir).mkdirs();
// extra: custom
Map<String, Object> params = new HashMap<String, Object>();
params.put("jobs", tasks);
params.put("workflowfilename", this.getworkflowfilename());
params.put("scheduler", currentScheduler);
String result = new FreemarkerView(this.protocoldir + File.separator + "Submit.sh.ftl", params).render();
try
{
FileUtils.write(new File(outputdir + File.separator + "submit.sh"), result);
// and produce submit.sh
// PrintWriter submitWriter = new PrintWriter(new File(outputdir + File.separator + "submit.sh"));
// also produce a runlocal.sh
// PrintWriter submitWriterLocal = new PrintWriter(new File(outputdir + File.separator + "runlocal.sh"));
// touch "workflow file name".started in same directory as
// submit.sh, when starting submit.sh
String cmd = "DIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"";
// submitWriter.println(cmd);
// submitWriterLocal.println(cmd);
cmd = "touch $DIR" + File.separator + getworkflowfilename() + ".started";
// submitWriter.println(cmd);
// submitWriterLocal.println(cmd);
//
// Temporary hack for executing scripts with runlocal hence directly
// without using a scheduler like PBS.
// To prevent lots of errors due to scripts trying to write various
// *.log, *.out, *.err, etc. files in the $PBS_O_WORKDIR,
// we set $PBS_O_WORKDIR to the same directory as where runlocal.sh
// resides.
//
cmd = "export PBS_O_WORKDIR=${DIR}";
// submitWriterLocal.println(cmd);
for (ComputeTask job : this.tasks)
{
// // create submit in submit.sh
String dependency = "";
// if (job.getPrevSteps_Name().size() > 0)
// {
// dependency = "-W depend=afterok";
//
// for (String previous : job.getPrevSteps_Name())
// {
// dependency += ":$" + previous;
// }
// }
//
// // do stuff for submit.sh
// submitWriter.println("#" + job.getName());
// submitWriter.println(job.getName() + "=$(qsub -N " + job.getName() + " " + dependency + " "
// + job.getName() + ".sh)");
// submitWriter.println("echo $" + job.getName());
// submitWriter.println("sleep 8");
//
// // do stuff for submitlocal.sh
// submitWriterLocal.println("echo Starting with " + job.getName() + "...");
// submitWriterLocal.println("sh " + job.getName() + ".sh");
// submitWriterLocal.println("#Dependencies: " + dependency);
// submitWriterLocal.println("");
//
// // produce .sh file in outputdir for each job
PrintWriter jobWriter = new PrintWriter(new File(outputdir + File.separator + job.getName() + ".sh"));
//
// // write the script
jobWriter.println(job.getComputeScript());
//
jobWriter.close();
}
//
// submitWriter.close();
// submitWriterLocal.close();
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void print(String string)
{
System.out.println(">> " + string);
}
}