package org.molgenis.compute.workflowgenerator;
import org.molgenis.compute.ComputeJob;
import org.molgenis.compute.ComputeParameter;
import org.molgenis.compute.ComputeProtocol;
import org.molgenis.compute.monitor.*;
import org.molgenis.compute.pipelinemodel.*;
import org.molgenis.compute.scriptserver.MCF;
import org.molgenis.compute.monitor.ComputeAppPaths;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.pheno.ObservedValue;
import org.molgenis.protocol.Workflow;
import org.molgenis.protocol.WorkflowElement;
import org.molgenis.protocol.WorkflowElementParameter;
import org.molgenis.util.HttpServletRequestTuple;
import org.molgenis.util.Tuple;
import javax.servlet.ServletContext;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* Created by IntelliJ IDEA. User: georgebyelas Date: 18/10/2011 Time: 10:43 To
* change this template use File | Settings | File Templates.
*/
public class WorkflowGeneratorDB
{
private static final String INTERPRETER_BASH = "bash";
private static final String INTERPRETER_R = "R";
private static final String INTERPRETER_JDL = "jdl";
public static final String ENV_CLUSTER = "cluster";
public static final String ENV_GRID = "grid";
private String env = null;
private boolean flagJustGenerate = false;
private static final String LOG = "log";// reserved word for logging feature
// type used in ComputeFeature
private static final String DATE_FORMAT_NOW = "yyyy-MM-dd-HH-mm-ss";
private SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT_NOW);
// format to run pipeline in compute
private Pipeline pipeline = null;
private Step currentStep = null;
private String strCurrentPipelineStep = "INITIAL";
private int pipelineElementNumber = 0;
private int stepNumber = 0;
private List<ComputeParameter> allComputeParameters = null;
// compute
private MCF mcf = null;
private DatabaseUpdater updater = null;
// map of all compute features/values
private Hashtable<String, String> weavingValues = null;
Hashtable<String, String> userValues = null;
// whole workflow application
private ComputeJob wholeWorkflowApp = null;
// some necessary values
private Workflow target = null;
private String applicationName = null;
private ParameterWeaver weaver = new ParameterWeaver();
private String remoteLocation = null;
private boolean isToWriteLocally = false;
private String localLocation = "/";
public void processSingleWorksheet(Database db, Tuple request, Hashtable<String, String> userValues,
Workflow workflow, String applicationName /* should be unique somehow */, String environment)
throws Exception
{
this.env = environment;
this.userValues = userValues;
this.target = workflow;
this.applicationName = applicationName;
if (!db.inTx()) db.beginTx();
if (mcf == null)
{
HttpServletRequestTuple req = (HttpServletRequestTuple) request;
ServletContext servletContext = req.getRequest().getSession().getServletContext();
mcf = (MCF) servletContext.getAttribute("MCF");
createDatabaseUpdater(mcf);
}
System.out.println(">>> generate apps");
// create new pipeline and set current step to null
pipeline = new Pipeline();
currentStep = null;
stepNumber = 0;
pipelineElementNumber = 0;
// application for the whole workflow
wholeWorkflowApp = new ComputeJob();
// get the chosen workflow
// Workflow workflow = db.query(Workflow.class).find().get(0);
wholeWorkflowApp.setProtocol(workflow);
wholeWorkflowApp.setInterpreter("WorkflowInterpreter");
// it would be nice to select compute features of only selected workflow
allComputeParameters = db.query(ComputeParameter.class).equals(ComputeParameter.WORKFLOW, workflow.getId())
.find();
// allComputeParameters = db.query(ComputeParameter.class).find();
System.out.println("we have so many features: " + allComputeParameters.size());
System.out.println("workflow" + workflow.getName());
// add few parameters
wholeWorkflowApp.setTime(now());
// set app name everywhere and add to database
wholeWorkflowApp.setName(applicationName);
pipeline.setId(applicationName);
weaver.setJobID(applicationName);
// db.beginTx();
db.add(wholeWorkflowApp);
// process workflow elements
List<WorkflowElement> workflowElements = db.query(WorkflowElement.class)
.equals(WorkflowElement.WORKFLOW, workflow.getId()).find();
for (int i = 0; i < workflowElements.size(); i++)
{
WorkflowElement workflowElement = workflowElements.get(i);
processWorkflowElement(db, request, workflowElement);
}
String logfile = weaver.getLogfilename();
pipeline.setPipelinelogpath(logfile);
db.commitTx();
executePipeline(db, pipeline);
}
private void createDatabaseUpdater(MCF mcf)
{
if (env.equalsIgnoreCase(ENV_CLUSTER))
{
if (mcf.getBasis().equalsIgnoreCase(MCF.GRID)) updater = new DatabaseUpdaterGridGain(mcf);
else if ((mcf.getBasis().equalsIgnoreCase(MCF.SSH))) updater = new DatabaseUpdaterSsh(mcf);
}
else if (env.equalsIgnoreCase(ENV_GRID))
{
updater = new DatabaseUpdaterGrid(mcf);
}
}
public Date now()
{
Calendar cal = Calendar.getInstance();
return cal.getTime();
}
public void executePipeline(Database db, Pipeline pipeline)
{
if (mcf != null && !flagJustGenerate)
{
if (env.equalsIgnoreCase(ENV_CLUSTER)) mcf.setClusterPipeline(pipeline);
else if (env.equalsIgnoreCase(ENV_GRID)) mcf.setGridPipeline(pipeline);
if (!updater.isStarted())
{
updater.setSettings(20, 20);
updater.setDatabase(db);
updater.start();
}
}
else
System.out.println(pipeline.toString());
}
private void processWorkflowElement(Database db, Tuple request, WorkflowElement workflowElement)
throws DatabaseException, ParseException, IOException
{
weavingValues = new Hashtable<String, String>();
weavingValues.putAll(userValues);
System.out.println(">>> workflow element: " + workflowElement.getName());
// create complex features, which will be processed after simple
// features
Vector<ComputeParameter> featuresToDerive = new Vector<ComputeParameter>();
// get protocol and template
ComputeProtocol protocol = db.findById(ComputeProtocol.class, workflowElement.getProtocol_Id());
// process compute features
for (ComputeParameter computeFeature : allComputeParameters)
{
if (computeFeature.getIsUser()) continue;
else if (computeFeature.getDefaultValue().contains("${"))
{
featuresToDerive.addElement(computeFeature);
}
else
{
weavingValues.put(computeFeature.getName(), computeFeature.getDefaultValue());
}
}
// process workflow element parameters
List<WorkflowElementParameter> workflowElementParameters = db.query(WorkflowElementParameter.class)
.equals(WorkflowElementParameter.WORKFLOWELEMENT, workflowElement.getId()).find();
for (WorkflowElementParameter par : workflowElementParameters)
{
ComputeParameter feature = findComputeFeature(par.getParameter_Name());
weavingValues.put(par.getParameter_Name(), feature.getDefaultValue());
}
generateComputeApplication(db, request, workflowElement, protocol, weavingValues, featuresToDerive);
}
private ComputeParameter findComputeFeature(String targetName)
{
for (ComputeParameter f : allComputeParameters)
{
if (f.getName().equalsIgnoreCase(targetName)) return f;
}
return null;
}
private void generateComputeApplication(Database db, Tuple request, WorkflowElement workflowElement,
ComputeProtocol protocol, Hashtable<String, String> weavingValues, Vector<ComputeParameter> featuresToDerive)
throws IOException, DatabaseException, ParseException
{
ComputeJob app = new ComputeJob();
app.setProtocol(protocol);
app.setWorkflowElement(workflowElement);
app.setTime(now());
String appName = applicationName + "_" + workflowElement.getName() + "_" + pipelineElementNumber;
app.setName(appName);
System.out.println("---application---> " + appName);
String protocolTemplate = protocol.getScriptTemplate();
// weave complex features
for (int i = 0; i < featuresToDerive.size(); i++)
{
ComputeParameter feature = featuresToDerive.elementAt(i);
String featureName = feature.getName();
String featureTemplate = feature.getDefaultValue();
String featureValue = weaver.weaveFreemarker(featureTemplate, weavingValues);
weavingValues.put(featureName, featureValue);
}
String result = weaver.weaveFreemarker(protocolTemplate, weavingValues);
app.setComputeScript(result);
app.setInterpreter(protocol.getInterpreter());
db.add(app);
List<ComputeJob> res = db.query(ComputeJob.class).equals(ComputeJob.NAME, app.getName()).find();
if (res.size() != 1) throw new DatabaseException("ERROR while inserting into db");
app = res.get(0);
Set entries = weavingValues.entrySet();
Iterator it = entries.iterator();
// this is used for database update with ComputeAppPaths
Vector<String> logpathfiles = new Vector<String>();
while (it.hasNext())
{
Map.Entry entry = (Map.Entry) it.next();
String name = (String) entry.getKey();
String value = (String) entry.getValue();
ObservedValue observedValue = new ObservedValue();
observedValue.setValue(value);
observedValue.setProtocolApplication(app);
observedValue.setTarget(target.getId());
ComputeParameter feature = findComputeFeature(name);
if (feature.getDataType().equalsIgnoreCase(LOG))
{
logpathfiles.addElement(value);
}
observedValue.setFeature(feature.getId());
System.out.println(feature.getName() + "->" + value);
db.add(observedValue);
}
pipelineElementNumber++;
// create compute pipeline
String scriptID = app.getName();
weaver.setScriptID(scriptID);
weaver.setDefaults();
if (protocol.getWalltime() != null)
{
weaver.setWalltime(protocol.getWalltime());
// quick fix for the cluster queue
if (protocol.getWalltime().equalsIgnoreCase("00:30:00"))
{
weaver.setClusterQueue("short");
}
else
weaver.setClusterQueue("nodes");
}
if (protocol.getCores() != null) weaver.setCores(protocol.getCores() + "");
if (protocol.getMem() != null) weaver.setMemoryReq(protocol.getMem() + "");
// at some point of time can be added for the verification
weaver.setVerificationCommand("\n");
weaver.setDatasetLocation(remoteLocation);
String scriptRemoteLocation = remoteLocation + "scripts/";
String logfile = weaver.getLogfilename();
Script pipelineScript = null;
if (protocol.getInterpreter().equalsIgnoreCase(INTERPRETER_BASH))
{
pipelineScript = makeShScript(scriptID, scriptRemoteLocation, result);
}
else if (protocol.getInterpreter().equalsIgnoreCase(INTERPRETER_R))
{
pipelineScript = makeRScript(scriptID, scriptRemoteLocation, result);
}
else if (protocol.getInterpreter().equalsIgnoreCase(INTERPRETER_JDL))
{
pipelineScript = makeJDLScript(scriptID, scriptRemoteLocation, result);
}
pipeline.setPipelinelogpath(logfile);
if (isToWriteLocally) weaver.writeToFile(localLocation + pipelineElementNumber + scriptID, new String(
pipelineScript.getScriptData()));
List<String> strPreviousWorkflowElements = workflowElement.getPreviousSteps_Name();
if (strPreviousWorkflowElements.size() == 0)// script does not depend on
// other scripts
{
if (currentStep == null) // it is a first script in the pipeline
{
Step step = new Step(workflowElement.getName());
step.setNumber(stepNumber);
stepNumber++;
currentStep = step;
pipeline.addStep(step);
}
currentStep.addScript(pipelineScript);
}
else
// scripts depends on previous scripts
{
String strPrevious = strPreviousWorkflowElements.get(0);
if (!strPrevious.equalsIgnoreCase(strCurrentPipelineStep))
{
// Step step = new Step("step_" + app.getName());
Step step = new Step(workflowElement.getName());
step.setNumber(stepNumber);
stepNumber++;
currentStep = step;
pipeline.addStep(step);
}
currentStep.addScript(pipelineScript);
strCurrentPipelineStep = strPrevious;
}
// here ComputeAppPaths generation
ComputeAppPaths appPaths = new ComputeAppPaths();
appPaths.setApplication(app);
appPaths.setErrpath(weaver.getErrfilename());
appPaths.setOutpath(weaver.getOutfilename());
appPaths.setExtralog(weaver.getExtralogfilename());
if (logpathfiles.size() > 0) for (int iii = 0; iii < logpathfiles.size(); iii++)
appPaths.addLogpath(logpathfiles.elementAt(iii));
updater.addComputeAppPath(appPaths);
}
// here the first trial of generation for the grid
// will be refactored later
private Script makeJDLScript(String scriptID, String scriptRemoteLocation, String result)
{
String gridHeader = weaver.makeGridHeader();
String downloadTop = weaver.makeGridDownload(weavingValues);
String uploadBottom = weaver.makeGridUpload(weavingValues);
// while testing - hardcoded
// some special fields should be specified for the jdl file
// error and output logs
weavingValues.put("error_log", "err_" + scriptID + ".log");
weavingValues.put("output_log", "out_" + scriptID + ".log");
// extra files to be download and upload - now empty
weavingValues.put("extra_inputs", "");
weavingValues.put("extra_outputs", "");
weavingValues.put("script_location", scriptRemoteLocation);
weavingValues.put("script_name", scriptID + ".sh");
//
String jdlfile = weaver.makeJDL(weavingValues);
System.out.println("name: " + scriptID);
System.out.println("remote location: " + scriptRemoteLocation);
System.out.println("command: " + result);
String script = gridHeader + "\n" + downloadTop + "\n" + result + "\n" + uploadBottom;
System.out.println("jdl-file: \n" + jdlfile);
System.out.println("-------\nscript: \n" + script);
Script scriptFile = new GridScript(scriptID, scriptRemoteLocation, script.getBytes());
FileToSaveRemotely jdlFile = new FileToSaveRemotely(scriptID + ".jdl", jdlfile.getBytes());
scriptFile.addFileToTransfer(jdlFile);
if (isToWriteLocally) weaver.writeToFile(localLocation + pipelineElementNumber + scriptID + ".jdl", new String(
jdlfile.getBytes()));
return scriptFile;
}
private Script makeRScript(String scriptID, String scriptRemoteLocation, String result)
{
weaver.setActualCommand("cd " + scriptRemoteLocation + "\n R CMD BATCH " + scriptRemoteLocation + "myscript.R");
String scriptFile = weaver.makeScript();
Script script = new ClusterScript(scriptID, scriptRemoteLocation, scriptFile.getBytes());
FileToSaveRemotely rScript = new FileToSaveRemotely("myscript.R", result.getBytes());
script.addFileToTransfer(rScript);
System.out.println(script.toString());
return script;
}
private Script makeShScript(String scriptID, String scriptRemoteLocation, String result)
{
weaver.setActualCommand(result);
String scriptFile = weaver.makeScript();
return new ClusterScript(scriptID, scriptRemoteLocation, scriptFile.getBytes());
}
// root remote location should be set
public void setRemoteLocation(String remoteLocation)
{
this.remoteLocation = remoteLocation;
}
public void setToWriteLocally(boolean toWriteLocally)
{
isToWriteLocally = toWriteLocally;
}
public void setLocalLocation(String localLocation)
{
this.localLocation = localLocation;
}
public Pipeline getCurrectPipeline()
{
return pipeline;
}
public void setFlagJustGenerate(boolean b)
{
flagJustGenerate = b;
}
public String getFormattedTime()
{
// SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT_NOW);
return sdf.format(now());
}
}