package org.molgenis.generator;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateException;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.molgenis.compute.ComputeJob;
import org.molgenis.compute.ComputeParameter;
import org.molgenis.compute.ComputeProtocol;
import org.molgenis.compute.commandline.Worksheet;
import org.molgenis.pheno.ObservationTarget;
import org.molgenis.protocol.Workflow;
import org.molgenis.protocol.WorkflowElement;
import org.molgenis.util.Pair;
import org.molgenis.util.Tuple;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: georgebyelas
* Date: 05/04/2012
* Time: 09:46
* To change this template use File | Settings | File Templates.
*/
//absolite class with fold/reduce trials
public class GenericJobGenerator implements JobGenerator
{
private static Logger logger = Logger.getLogger(GenericJobGenerator.class);
private FoldingMaker foldingMaker = new FoldingMaker();
private FoldingParser foldingParser = new FoldingParser();
//template sources
private String templateGridDownload;
private String templateGridDownloadExe;
private String templateGridUpload;
private String templateGridJDL;
private String templateGridAfterExecution;
private String templateGridUploadLog;
private String templateClusterSubmission;
private String templateClusterHeader;
private String templateClusterFooter;
//template filenames
private String fileTemplateGridDownload = "templ-download-grid.ftl";
private String fileTemplateGridDownloadExe = "templ-exe-grid.ftl";
private String fileTemplateGridUpload = "templ-upload-grid.ftl";
private String fileTemplateGridUploadLog = "templ-upload-grid-log.ftl";
private String fileTemplateGridJDL = "templ-jdl-grid.ftl";
private String fileTemplateGridAfterExecution = "templ-after-exe.ftl";
private String fileTemplateClusterHeader = "templ-pbs-header.ftl";
private String fileTemplateClusterFooter = "templ-pbs-footer.ftl";
private String fileTemplateClusterSubmission = "templ-submit.ftl";
//used for grid generation
private Hashtable<String, GridTransferContainer> pairJobTransfers = null;
//used for cluster generation - submit script
private Hashtable<WorkflowElement, ComputeJob> pairWEtoCJ = null;
private Hashtable<ComputeJob, WorkflowElement> pairCJtoWE = null;
private String submitScript = null;
private Hashtable<String, String> config;
public Vector<ComputeJob> generateComputeJobsWorksheet(Workflow workflow, List<Tuple> worksheet, String backend)
{
Vector<ComputeJob> computeJobs = new Vector<ComputeJob>();
if (backend.equalsIgnoreCase(JobGenerator.GRID))
pairJobTransfers = new Hashtable<String, GridTransferContainer>();
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ = new Hashtable<WorkflowElement, ComputeJob>();
pairCJtoWE = new Hashtable<ComputeJob, WorkflowElement>();
submitScript = "";
}
//because Hashtable does not allow null keys or values
Hashtable<String, String> values = new Hashtable<String, String>();
//parameters with templates
Vector<ComputeParameter> complexParameters = new Vector<ComputeParameter>();
//fill hashtable with workflow global parameters only once
Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection();
Iterator<ComputeParameter> itParameter = parameters.iterator();
while (itParameter.hasNext())
{
ComputeParameter parameter = itParameter.next();
if (parameter.getDefaultValue() != null)
{
if (parameter.getDefaultValue().contains("${"))
{
complexParameters.addElement(parameter);
}
else
{
values.put(parameter.getName(), parameter.getDefaultValue());
}
}
else
values.put(parameter.getName(), "");
}
//produce jobs for every worksheet record
for (int i = 0; i < worksheet.size(); i++)
{
int ngs_id = 1;
//add parameters from worksheet to values
Tuple tuple = worksheet.get(i);
List<String> names = tuple.getFields();
String id = "id";
for (String name : names)
{
String value = tuple.getString(name);
//to avoid empty worksheet fields
if (value == null)
{
break;
}
values.put(name, value);
id += "_" + value;
}
//temporary until folding is implemented
if (workflow.getName().equalsIgnoreCase("ngs_demo"))
{
//id = "id_" + System.currentTimeMillis();
id = "id" + ngs_id;
ngs_id++;
}
//weave complex parameters
int count = 0;
while ((complexParameters.size() > 0) && (count < 10))
{
Vector<ComputeParameter> toRemove = new Vector<ComputeParameter>();
for (ComputeParameter computeParameter : complexParameters)
{
String complexValue = weaveFreemarker(computeParameter.getDefaultValue(), values);
// values.put(computeParameter.getName(), complexValue);
// complexParameters.remove(computeParameter);
if (complexValue.contains("${"))
{
System.out.println(computeParameter.getName() + " -> " + complexValue);
}
else
{
values.put(computeParameter.getName(), complexValue);
toRemove.add(computeParameter);
}
}
complexParameters.removeAll(toRemove);
System.out.println("loop " + count + " removed " + toRemove.size());
count++;
}
//check correctness
int number = 0;
Enumeration keys = values.keys();
while (keys.hasMoreElements())
{
String key = (String) keys.nextElement();
String value = values.get(key);
System.out.println(number + "\t" + key + " -> " + value);
number++;
}
//read all workflow elements
Collection<WorkflowElement> workflowElements = workflow.getWorkflowWorkflowElementCollection();
Iterator<WorkflowElement> itr = workflowElements.iterator();
while (itr.hasNext())
{
WorkflowElement el = itr.next();
ComputeProtocol protocol = (ComputeProtocol) el.getProtocol();
String template = protocol.getScriptTemplate();
String jobListing = weaveFreemarker(template, values);
ComputeJob job = new ComputeJob();
String jobName = config.get(JobGenerator.GENERATION_ID) + "_" +
workflow.getName() + "_" +
el.getName() + "_" + id;
job.setName(jobName);
job.setProtocol(protocol);
job.setComputeScript(jobListing);
computeJobs.add(job);
//fill containers for grid jobs to ensure correct data transfer
// and for cluster to generate submit script
if (backend.equalsIgnoreCase(JobGenerator.GRID))
{
GridTransferContainer container = fillContainerStr(protocol, values);
pairJobTransfers.put(job.getName(), container);
}
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ.put(el, job);
pairCJtoWE.put(job, el);
}
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
logger.log(Level.DEBUG, el.getName());
logger.log(Level.DEBUG, jobListing);
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
}
}
return computeJobs;
}
private GridTransferContainer fillContainerStr(ComputeProtocol protocol, Hashtable<String, String> values)
{
GridTransferContainer container = new GridTransferContainer();
List<ComputeParameter> inputs = protocol.getInputs();
for (ComputeParameter input : inputs)
{
String name = input.getName();
String value = values.get(name);
container.addInput(name, value);
}
List<ComputeParameter> outputs = protocol.getOutputs();
for (ComputeParameter output : outputs)
{
String name = output.getName();
String value = values.get(name);
container.addOutput(name, value);
}
List<ComputeParameter> exes = protocol.getExes();
for (ComputeParameter exe : exes)
{
String name = exe.getName();
String value = values.get(name);
container.addExe(name, value);
}
List<ComputeParameter> logs = protocol.getLogs();
for (ComputeParameter log : logs)
{
String name = log.getName();
String value = values.get(name);
container.addLog(name, value);
}
return container;
}
public Vector<ComputeJob> generateComputeJobsWorksheetWithFolding(Workflow workflow, List<Tuple> worksheet, String backend)
{
Vector<ComputeJob> computeJobs = new Vector<ComputeJob>();
if (backend.equalsIgnoreCase(JobGenerator.GRID))
pairJobTransfers = new Hashtable<String, GridTransferContainer>();
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ = new Hashtable<WorkflowElement, ComputeJob>();
pairCJtoWE = new Hashtable<ComputeJob, WorkflowElement>();
submitScript = "";
}
//because Hashtable does not allow null keys or values
Hashtable<String, String> values = new Hashtable<String, String>();
//parameters with templates
Vector<ComputeParameter> complexParameters = new Vector<ComputeParameter>();
//fill hashtable with workflow global parameters only once
Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection();
Collection<WorkflowElement> workflowElements = workflow.getWorkflowWorkflowElementCollection();
Iterator<WorkflowElement> itr = workflowElements.iterator();
while (itr.hasNext())
{
WorkflowElement el = itr.next();
ComputeProtocol protocol = (ComputeProtocol) el.getProtocol();
String template = protocol.getScriptTemplate();
List<String> targets = protocol.getIterateOver_Name();
if (targets.size() == 0)
{
targets.add("line_number");
}
List<Tuple> folded = Worksheet.foldWorksheet(worksheet, (List<ComputeParameter>) parameters, targets);
Iterator<ComputeParameter> itParameter = parameters.iterator();
while (itParameter.hasNext())
{
ComputeParameter parameter = itParameter.next();
if (parameter.getDefaultValue() != null)
{
if (parameter.getDefaultValue().contains("${"))
{
complexParameters.addElement(parameter);
}
else
{
values.put(parameter.getName(), parameter.getDefaultValue());
}
}
else
values.put(parameter.getName(), "");
}
int ngs_count = 1;
//produce jobs for every worksheet record
for (int i = 0; i < folded.size(); i++)
{
//add parameters from worksheet to values
Tuple tuple = folded.get(i);
List<String> names = tuple.getFields();
String id = "id";
for (String name : names)
{
String value = tuple.getString(name);
//to avoid empty worksheet fields
if (value == null)
{
values.put(name, "");
//break;
}
else
values.put(name, value);
id += "_" + value;
}
//temporary until folding is implemented
if (workflow.getName().equalsIgnoreCase("ngs_demo"))
{
//id = "id_" + System.currentTimeMillis();
id = "id_" + ngs_count;
ngs_count++;
}
//weave complex parameters
int count = 0;
while ((complexParameters.size() > 0) && (count < 10))
{
Vector<ComputeParameter> toRemove = new Vector<ComputeParameter>();
for (ComputeParameter computeParameter : complexParameters)
{
String complexValue = weaveFreemarker(computeParameter.getDefaultValue(), values);
// values.put(computeParameter.getName(), complexValue);
// complexParameters.remove(computeParameter);
if (complexValue.contains("${"))
{
System.out.println(computeParameter.getName() + " -> " + complexValue);
}
else
{
values.put(computeParameter.getName(), complexValue);
toRemove.add(computeParameter);
}
}
complexParameters.removeAll(toRemove);
System.out.println("loop " + count + " removed " + toRemove.size());
count++;
}
//check correctness
int number = 0;
Enumeration keys = values.keys();
while (keys.hasMoreElements())
{
String key = (String) keys.nextElement();
String value = values.get(key);
System.out.println(number + "\t" + key + " -> " + value);
number++;
}
String jobListing = weaveFreemarker(template, values);
ComputeJob job = new ComputeJob();
String jobName = config.get(JobGenerator.GENERATION_ID) + "_" +
workflow.getName() + "_" +
el.getName() + "_" + id;
job.setName(jobName);
job.setProtocol(protocol);
job.setComputeScript(jobListing);
computeJobs.add(job);
//fill containers for grid jobs to ensure correct data transfer
// and for cluster to generate submit script
if (backend.equalsIgnoreCase(JobGenerator.GRID))
{
GridTransferContainer container = fillContainerStr(protocol, values);
pairJobTransfers.put(job.getName(), container);
}
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ.put(el, job);
pairCJtoWE.put(job, el);
}
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
logger.log(Level.DEBUG, el.getName());
logger.log(Level.DEBUG, jobListing);
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
}
}
return computeJobs;
}
public Vector<ComputeJob> generateComputeJobsWorksheetWithFoldingNew(Workflow workflow, List<Tuple> f, String backend)
{
//create the table with targets, which is equal to worksheet if there are no targets
List<Hashtable> table = null;
//remove unused parameters from the worksheet
List<Hashtable> worksheet = foldingMaker.transformToTable(f);
foldingMaker.setWorkflow(workflow);
worksheet = foldingMaker.removeUnused(worksheet, (List<ComputeParameter>) workflow.getWorkflowComputeParameterCollection());
//some supplementary hashtables
Vector<ComputeJob> computeJobs = new Vector<ComputeJob>();
if (backend.equalsIgnoreCase(JobGenerator.GRID))
pairJobTransfers = new Hashtable<String, GridTransferContainer>();
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ = new Hashtable<WorkflowElement, ComputeJob>();
pairCJtoWE = new Hashtable<ComputeJob, WorkflowElement>();
submitScript = "";
}
Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection();
Collection<WorkflowElement> workflowElements = workflow.getWorkflowWorkflowElementCollection();
Iterator<WorkflowElement> itr = workflowElements.iterator();
while (itr.hasNext())
{
//because Hashtable does not allow null keys or values used for weaving
Hashtable<String, Object> values = new Hashtable<String, Object>();
//parameters which are templates
Vector<ComputeParameter> complexParameters = new Vector<ComputeParameter>();
WorkflowElement el = itr.next();
ComputeProtocol protocol = (ComputeProtocol) el.getProtocol();
String template = protocol.getScriptTemplate();
//chack if we have any targets
List<ComputeParameter> targets = foldingMaker.findTargets(protocol.getScriptTemplate());
if (targets != null)
{
table = foldingMaker.fold(targets, worksheet);
}
else
table = worksheet;
Iterator<ComputeParameter> itParameter = parameters.iterator();
while (itParameter.hasNext())
{
ComputeParameter parameter = itParameter.next();
if (parameter.getDefaultValue() != null)
{
if (parameter.getDefaultValue().contains("${"))
{
complexParameters.addElement(parameter);
}
else
{
values.put(parameter.getName(), parameter.getDefaultValue());
}
}
else
values.put(parameter.getName(), "");
}
//weave complex parameters without folding
//because folding related to protocols
for (int i = 0; i < worksheet.size(); i++)
{
Hashtable<String, Object> line = worksheet.get(i);
Enumeration ekeys = line.keys();
while (ekeys.hasMoreElements())
{
String ekey = (String) ekeys.nextElement();
Object eValues = line.get(ekey);
values.put(ekey, eValues);
}
int count = 0;
while ((complexParameters.size() > 0) && (count < 10))
{
Vector<ComputeParameter> toRemove = new Vector<ComputeParameter>();
for (ComputeParameter computeParameter : complexParameters)
{
String complexValue = foldingMaker.weaveFreemarker(computeParameter.getDefaultValue(), values);
if (complexValue.contains("${"))
{
System.out.println(computeParameter.getName() + " -> " + complexValue);
}
else
{
values.put(computeParameter.getName(), complexValue);
toRemove.add(computeParameter);
}
}
complexParameters.removeAll(toRemove);
System.out.println("loop " + count + " removed " + toRemove.size());
count++;
}
}
//now we start to use foldered worksheet
for (int i = 0; i < table.size(); i++)
{
String id = "id";
Hashtable<String, Object> line = table.get(i);
Enumeration ekeys = line.keys();
while (ekeys.hasMoreElements())
{
String ekey = (String) ekeys.nextElement();
Object eValues = line.get(ekey);
values.put(ekey, eValues);
id += "_" + eValues.toString();
}
//temporary until folding is implemented
if (workflow.getName().equalsIgnoreCase("ngs_demo"))
{
//id = "id_" + System.currentTimeMillis();
id = "id_" + i;
}
String jobListing = foldingMaker.weaveFreemarker(template, values);
ComputeJob job = new ComputeJob();
String jobName = config.get(JobGenerator.GENERATION_ID) + "_" +
workflow.getName() + "_" +
el.getName() + "_" + id;
job.setName(jobName);
job.setProtocol(protocol);
job.setComputeScript(jobListing);
computeJobs.add(job);
//fill containers for grid jobs to ensure correct data transfer
// and for cluster to generate submit script
if (backend.equalsIgnoreCase(JobGenerator.GRID))
{
GridTransferContainer container = fillContainer(protocol, values);
pairJobTransfers.put(job.getName(), container);
}
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ.put(el, job);
pairCJtoWE.put(job, el);
}
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
logger.log(Level.DEBUG, el.getName());
logger.log(Level.DEBUG, jobListing);
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
}
}
return computeJobs;
}
public Vector<ComputeJob> generateComputeJobsFoldedWorksheet(Workflow workflow, List<Tuple> f, String backend)
{
//create the table with targets, which is equal to worksheet if there are no targets
List<Hashtable> table = null;
//remove unused parameters from the worksheet
List<Hashtable> worksheet = foldingMaker.transformToTable(f);
foldingMaker.setWorkflow(workflow);
worksheet = foldingMaker.removeUnused(worksheet, (List<ComputeParameter>) workflow.getWorkflowComputeParameterCollection());
//some supplementary hashtables
Vector<ComputeJob> computeJobs = new Vector<ComputeJob>();
if (backend.equalsIgnoreCase(JobGenerator.GRID))
pairJobTransfers = new Hashtable<String, GridTransferContainer>();
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ = new Hashtable<WorkflowElement, ComputeJob>();
pairCJtoWE = new Hashtable<ComputeJob, WorkflowElement>();
submitScript = "";
}
Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection();
Collection<WorkflowElement> workflowElements = workflow.getWorkflowWorkflowElementCollection();
for (WorkflowElement el : workflowElements)
{
//because Hashtable does not allow null keys or values used for weaving
Hashtable<String, Object> values = new Hashtable<String, Object>();
//parameters which are templates and do directly depend on worksheet values
Vector<ComputeParameter> complexParametersDepend = new Vector<ComputeParameter>();
//parameters which are templates but do not directly depend on worksheet values
Vector<ComputeParameter> complexParametersIndepend = new Vector<ComputeParameter>();
//hashtable with simple values, we need it for initial weaving
Hashtable<String, String> simpleValues = new Hashtable<String, String>();
ComputeProtocol protocol = (ComputeProtocol) el.getProtocol();
String template = protocol.getScriptTemplate();
//chack if we have any targets
List<ComputeParameter> targets = foldingMaker.findTargets(protocol.getScriptTemplate());
if (targets != null)
{
table = foldingMaker.fold(targets, worksheet);
}
else
table = worksheet;
//now we start to use foldered worksheet
//all our parameters, that depend on foldered worksheet, will become lists as well, that upset me a lot :(
for (int i = 0; i < table.size(); i++)
{
String id = "id";
Hashtable<String, Object> line = table.get(i);
Enumeration ekeys = line.keys();
while (ekeys.hasMoreElements())
{
String ekey = (String) ekeys.nextElement();
Object eValues = line.get(ekey);
values.put(ekey, eValues);
id += "_" + eValues.toString();
}
for (ComputeParameter parameter : parameters)
{
if (parameter.getDefaultValue() != null)
{
if (parameter.getDefaultValue().contains("${"))
{
if (foldingParser.isDirectlyDependOnWorksheet(parameter, table))
{
complexParametersDepend.addElement(parameter);
}
else
{
complexParametersIndepend.addElement(parameter);
}
}
else
{
values.put(parameter.getName(), parameter.getDefaultValue());
simpleValues.put(parameter.getName(), parameter.getDefaultValue());
}
}
}
Hashtable<String, Object> unweavedValues = new Hashtable<String, Object>();
System.out.println("values before " + values.size());
Vector<ComputeParameter> toRemove = new Vector<ComputeParameter>();
//lets process dependent parameters
for (ComputeParameter par : complexParametersDepend)
{
Pair<String, Object> value = processDependentParameter(par, line, simpleValues);
if (foldingParser.isValueSimple(value))
{
values.put(par.getName(), value.getB());
toRemove.add(par);
}
else
{
unweavedValues.put(par.getName(), value.getB());
}
}
complexParametersDepend.removeAll(toRemove);
System.out.println("values after " + values.size() + " complex dependencies: " + complexParametersDepend.size());
//lets process independent parameters
for (ComputeParameter par : complexParametersIndepend)
{
Pair<String, Object> value = processDependentParameter(par, line, simpleValues);
if (foldingParser.isValueSimple(value))
{
values.put(par.getName(), value.getB());
toRemove.add(par);
}
else
{
unweavedValues.put(par.getName(), value.getB());
}
}
complexParametersIndepend.removeAll(toRemove);
System.out.println("values after " + values.size() + " complex dependencies: " + complexParametersIndepend.size());
//lets see that we can do now!!!
Vector<String> vecToRemove = new Vector<String>();
int weavingCount = 0;
System.out.println("loop " + weavingCount + " -> " + unweavedValues.size());
while (unweavedValues.size() > 0 && weavingCount < 10)
{
Enumeration unkeys = unweavedValues.keys();
while (unkeys.hasMoreElements())
{
String unkey = (String) unkeys.nextElement();
Object eValue = unweavedValues.get(unkey);
Pair<String, Object> value = null;
if (eValue instanceof Collection<?>)
{
//System.out.println("++++++++++++++++++++++");
List<String> unweavedLines = (List<String>) eValue;
value = processUnweavedCollection(unkey, unweavedLines, values);
}
else
{
//System.out.println("------------");
String unweavedLine = (String) eValue;
// it should not happen, but still to test
//if parameter is still not weaved, it should contain a list
//still, we specify 0 as a line number
value = processUnweavedLine(unkey, unweavedLine, values, 0);
}
if (foldingParser.isValueSimple(value))
{
values.put(value.getA(), value.getB());
vecToRemove.add(unkey);
}
else
unweavedValues.put(value.getA(), value.getB());
}
for (String str : vecToRemove)
unweavedValues.remove(str);
weavingCount++;
System.out.println("loop " + weavingCount + " -> " + unweavedValues.size());
}
String jobListing = foldingMaker.weaveFreemarker(template, values);
ComputeJob job = new ComputeJob();
String jobName = config.get(JobGenerator.GENERATION_ID) + "_" +
workflow.getName() + "_" +
el.getName() + "_" + id;
job.setName(jobName);
job.setProtocol(protocol);
job.setComputeScript(jobListing);
computeJobs.add(job);
//fill containers for grid jobs to ensure correct data transfer
// and for cluster to generate submit script
if (backend.equalsIgnoreCase(JobGenerator.GRID))
{
GridTransferContainer container = fillContainer(protocol, values);
pairJobTransfers.put(job.getName(), container);
}
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ.put(el, job);
pairCJtoWE.put(job, el);
}
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
logger.log(Level.DEBUG, el.getName());
logger.log(Level.DEBUG, jobListing);
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
}
}
return computeJobs;
}
public Vector<ComputeJob> generateComputeJobsFoldedWorksheetReduce(Workflow workflow, List<Tuple> f, String backend)
{
//create the table with targets, which is equal to worksheet if there are no targets
List<Hashtable> table = null;
//remove unused parameters from the worksheet
List<Hashtable> worksheet = foldingMaker.transformToTable(f);
foldingMaker.setWorkflow(workflow);
worksheet = foldingMaker.removeUnused(worksheet, (List<ComputeParameter>) workflow.getWorkflowComputeParameterCollection());
//some supplementary hashtables
Vector<ComputeJob> computeJobs = new Vector<ComputeJob>();
if (backend.equalsIgnoreCase(JobGenerator.GRID))
pairJobTransfers = new Hashtable<String, GridTransferContainer>();
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ = new Hashtable<WorkflowElement, ComputeJob>();
pairCJtoWE = new Hashtable<ComputeJob, WorkflowElement>();
submitScript = "";
}
Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection();
Collection<WorkflowElement> workflowElements = workflow.getWorkflowWorkflowElementCollection();
for (WorkflowElement el : workflowElements)
{
ComputeProtocol protocol = (ComputeProtocol) el.getProtocol();
String template = protocol.getScriptTemplate();
//chack if we have any targets
List<ComputeParameter> targets = foldingMaker.findTargets(protocol.getScriptTemplate());
if (targets != null)
{
table = foldingMaker.fold(targets, worksheet);
}
else
table = worksheet;
//here, we prapare some information about foldered table
//in particular, we find what parameters are Martijn's foldered constants
foldingParser.evaluateTable(table);
//we set all parameters to foldered parser, which are used for reducing foldered constants
//this comment does give any clue to what is going on :)
foldingParser.setParametersList(parameters);
//now we start to use foldered worksheet
//all our parameters, that depend on foldered worksheet, will become lists as well, that upset me a lot :(
for (int i = 0; i < table.size(); i++)
{
//because Hashtable does not allow null keys or values used for weaving
Hashtable<String, Object> values = new Hashtable<String, Object>();
//parameters which are templates and do directly depend on worksheet values
Vector<ComputeParameter> complexParametersDepend = new Vector<ComputeParameter>();
//parameters which are templates but do not directly depend on worksheet values
Vector<ComputeParameter> complexParametersIndepend = new Vector<ComputeParameter>();
//hashtable with simple values, we need it for initial weaving
Hashtable<String, String> simpleValues = new Hashtable<String, String>();
//job naming
String id = "id";
Hashtable<String, Object> line = table.get(i);
if(targets != null)
{
//use targets to create name
Enumeration ekeys = line.keys();
while (ekeys.hasMoreElements())
{
String ekey = (String) ekeys.nextElement();
if(isTarget(ekey, targets))
{
Object eValues = line.get(ekey);
values.put(ekey, eValues);
String vvv = eValues.toString();
vvv = vvv.replaceAll(" ", "_");
id = "_" + vvv;
}
}
}
else
{
//use the whole line to create name
Enumeration ekeys = line.keys();
while (ekeys.hasMoreElements())
{
String ekey = (String) ekeys.nextElement();
Object eValues = line.get(ekey);
values.put(ekey, eValues);
String vvv = eValues.toString();
vvv = vvv.replaceAll(" ", "_");
id = "_" + vvv;
}
}
for (ComputeParameter parameter : parameters)
{
if (parameter.getDefaultValue() != null)
{
if (parameter.getDefaultValue().contains("${"))
{
if (foldingParser.isDirectlyDependOnWorksheet(parameter, table))
{
complexParametersDepend.addElement(parameter);
}
else
{
complexParametersIndepend.addElement(parameter);
}
}
else
{
values.put(parameter.getName(), parameter.getDefaultValue());
simpleValues.put(parameter.getName(), parameter.getDefaultValue());
}
}
}
Hashtable<String, Object> unweavedValues = new Hashtable<String, Object>();
Hashtable<String, String> unweavedValuesSimple = new Hashtable<String, String>();
//System.out.println("values before " + values.size());
Vector<ComputeParameter> toRemove = new Vector<ComputeParameter>();
//lets process dependent parameters
for (ComputeParameter par : complexParametersDepend)
{
Pair<String, Object> value = processDependentParameter(par, line, simpleValues);
if (foldingParser.isValueSimple(value))
{
values.put(par.getName(), value.getB());
toRemove.add(par);
}
else
{
unweavedValues.put(par.getName(), value.getB());
}
}
complexParametersDepend.removeAll(toRemove);
//System.out.println("values after " + values.size() + " complex dependencies: " + complexParametersDepend.size());
//lets process independent parameters
for (ComputeParameter par : complexParametersIndepend)
{
Pair<String, Object> value = processDependentParameter(par, line, simpleValues);
if (foldingParser.isValueSimple(value))
{
values.put(par.getName(), value.getB());
toRemove.add(par);
}
else
{
unweavedValues.put(par.getName(), value.getB());
}
}
complexParametersIndepend.removeAll(toRemove);
//System.out.println("values after " + values.size() + " complex dependencies: " + complexParametersIndepend.size());
//lets see that we can do now!!!
Vector<String> vecToRemove = new Vector<String>();
int weavingCount = 0;
System.out.println("loop " + weavingCount + " -> " + unweavedValues.size());
while (unweavedValues.size() > 0 && weavingCount < 10)
{
Enumeration unkeys = unweavedValues.keys();
while (unkeys.hasMoreElements())
{
String unkey = (String) unkeys.nextElement();
Object eValue = unweavedValues.get(unkey);
Pair<String, Object> value = null;
if (eValue instanceof Collection<?>)
{
//System.out.println("++++++++++++++++++++++");
List<String> unweavedLines = (List<String>) eValue;
value = processUnweavedCollection(unkey, unweavedLines, values);
}
else
{
//System.out.println("------------");
String unweavedLine = (String) eValue;
// it should not happen, but still to test
//if parameter is still not weaved, it should contain a list
//still, we specify 0 as a line number
value = processUnweavedLine(unkey, unweavedLine, values, 0);
}
if (foldingParser.isValueSimple(value))
{
values.put(value.getA(), value.getB());
vecToRemove.add(unkey);
}
else
unweavedValues.put(value.getA(), value.getB());
}
for (String str : vecToRemove)
unweavedValues.remove(str);
weavingCount++;
System.out.println("loop " + weavingCount + " -> " + unweavedValues.size());
}
//try reduce here
//values = reduce(values);
String jobListing = foldingMaker.weaveFreemarker(template, values);
ComputeJob job = new ComputeJob();
String jobName = config.get(JobGenerator.GENERATION_ID) + "_" +
workflow.getName() + "_" +
el.getName() + "_" + id;
job.setName(jobName);
job.setProtocol(protocol);
job.setComputeScript(jobListing);
computeJobs.add(job);
//fill containers for grid jobs to ensure correct data transfer
// and for cluster to generate submit script
if (backend.equalsIgnoreCase(JobGenerator.GRID))
{
GridTransferContainer container = fillContainer(protocol, values);
pairJobTransfers.put(job.getName(), container);
}
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
{
pairWEtoCJ.put(el, job);
pairCJtoWE.put(job, el);
}
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
logger.log(Level.DEBUG, el.getName());
logger.log(Level.DEBUG, jobListing);
logger.log(Level.DEBUG, "----------------------------------------------------------------------");
}
}
return computeJobs;
}
private boolean isTarget(String ekey, List<ComputeParameter> targets)
{
for(ComputeParameter par: targets)
{
String name = par.getName();
if(name.equalsIgnoreCase(ekey))
return true;
}
return false;
}
private Pair<String, Object> processUnweavedLine(String unkey, String unweavedLine, Hashtable<String, Object> values, int i)
{
Pair<String, Object> pair = new Pair<String, Object>();
Hashtable<String, String> hashtable = prepareSimpleValues(values, i);
String value = foldingParser.doByHand(unweavedLine, hashtable);
pair.setA(unkey);
pair.setB(value);
return pair;
}
private Hashtable<String, String> prepareSimpleValues(Hashtable<String, Object> values, int i)
{
Hashtable<String, String> result = new Hashtable<String, String>();
Enumeration unkeys = values.keys();
while (unkeys.hasMoreElements())
{
String unkey = (String) unkeys.nextElement();
Object eValue = values.get(unkey);
String vvv;
if (eValue instanceof Collection<?>)
{
List<String> list = (List<String>) eValue;
vvv = list.get(i);
}
else
{
vvv = (String) eValue;
}
result.put(unkey, vvv);
}
return result;
}
private Pair<String, Object> processUnweavedCollection(String unkey, List<String> unweavedLines, Hashtable<String, Object> values)
{
Pair<String, Object> pair = new Pair<String, Object>();
List<String> list = new ArrayList<String>();
for (int i = 0; i < unweavedLines.size(); i++)
{
String input = unweavedLines.get(i);
Pair<String, Object> aPair = processUnweavedLine(unkey, input, values, i);
list.add((String) aPair.getB());
}
pair.setA(unkey);
pair.setB(list);
return pair;
}
//here, we also identify what parameters should be foldered
private Pair<String, Object> processDependentParameter
(ComputeParameter par, Hashtable<String, Object> line, Hashtable<String, String> simpleValues)
{
Pair<String, Object> pair = new Pair<String, Object>();
pair.setA(par.getName());
int lineFolderedSize = foldingParser.getFolderedLineSize(line);
String parTemplate = par.getDefaultValue();
foldingParser.setNotList();
foldingParser.checkIsList(parTemplate);
boolean isList = foldingParser.getIsList();
if (lineFolderedSize > 1 && isList)
{
List<String> values = new ArrayList<String>();
for (int i = 0; i < lineFolderedSize; i++)
{
String value = foldingParser.parseTemplateLineByHand(parTemplate, line, i, simpleValues);
values.add(value);
}
pair.setB(values);
}
else
{
String value = foldingParser.parseTemplateOneLineByHand(parTemplate, line, simpleValues);
pair.setB(value);
}
return pair;
}
private GridTransferContainer fillContainer(ComputeProtocol protocol, Hashtable<String, Object> values)
{
GridTransferContainer container = new GridTransferContainer();
List<ComputeParameter> inputs = protocol.getInputs();
for (ComputeParameter input : inputs)
{
String name = input.getName();
String value = (String) values.get(name);
container.addInput(name, value);
}
List<ComputeParameter> outputs = protocol.getOutputs();
for (ComputeParameter output : outputs)
{
String name = output.getName();
String value = (String) values.get(name);
container.addOutput(name, value);
}
List<ComputeParameter> exes = protocol.getExes();
for (ComputeParameter exe : exes)
{
String name = exe.getName();
String value = (String) values.get(name);
container.addExe(name, value);
}
List<ComputeParameter> logs = protocol.getLogs();
for (ComputeParameter log : logs)
{
String name = log.getName();
String value = (String) values.get(name);
container.addLog(name, value);
}
return container;
}
public Vector<ComputeJob> generateComputeJobsDB(Workflow workflow, List<ObservationTarget> worksheet, String backend)
{
return null;
}
public boolean generateActualJobs(Vector<ComputeJob> computeJobs, String backend, Hashtable<String, String> config)
{
//read templates
String templatesDir = config.get(JobGenerator.TEMPLATE_DIR);
if (backend.equalsIgnoreCase(JobGenerator.GRID))
readTemplatesGrid(templatesDir);
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
readTemplatesCluster(templatesDir);
for (ComputeJob computeJob : computeJobs)
{
//generate files for selected back-end
if (backend.equalsIgnoreCase(JobGenerator.GRID))
generateActualJobGrid(computeJob, config);
else if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
generateActualJobCluster(computeJob, config);
}
//write cluster submit script
if (backend.equalsIgnoreCase(JobGenerator.CLUSTER))
writeToFile(config.get(JobGenerator.OUTPUT_DIR) + System.getProperty("file.separator") + "submit_" + config.get(JobGenerator.GENERATION_ID) + ".sh",
submitScript);
return true;
}
public boolean generateActualJobsWithMacros(Vector<ComputeJob> computeJobs, String backend, Hashtable<String, String> config)
{
return false;
}
private void generateActualJobCluster(ComputeJob computeJob, Hashtable<String, String> config)
{
System.out.println("name: " + computeJob.getName());
//create values hashtable to fill templates
Hashtable<String, String> values = new Hashtable<String, String>();
ComputeProtocol protocol = (ComputeProtocol) computeJob.getProtocol();
values.put(JobGenerator.JOB_ID, computeJob.getName());
values.put(ModelLoader.FLAG_CLUSTER_QUEUE, protocol.getClusterQueue());
values.put(ModelLoader.FLAG_CORES, protocol.getCores().toString());
values.put(ModelLoader.FLAG_NODES, protocol.getNodes().toString());
values.put(ModelLoader.FLAG_MEMORY, protocol.getMem());
values.put(ModelLoader.FLAG_WALLTIME, protocol.getWalltime());
//create actual cluster job
String header = weaveFreemarker(templateClusterHeader, values);
String main = computeJob.getComputeScript();
String footer = weaveFreemarker(templateClusterFooter, values);
String actualJob = header + main + footer;
//write script
(new File(config.get(JobGenerator.OUTPUT_DIR))).mkdirs();
writeToFile(config.get(JobGenerator.OUTPUT_DIR) + System.getProperty("file.separator") + computeJob.getName() + ".sh",
actualJob);
//create job submission part
WorkflowElement el = pairCJtoWE.get(computeJob);
if (el.getPreviousSteps().size() > 0)
{
String dependency = JobGenerator.DEPENDENCY_HEAD;
for (WorkflowElement wEl : el.getPreviousSteps())
{
ComputeJob cJ = pairWEtoCJ.get(wEl);
dependency += ":" + cJ.getName();
values.put(JobGenerator.JOB_DEPENDENCIES, dependency);
}
}
else
values.put(JobGenerator.JOB_DEPENDENCIES, "");
String strSubmit = weaveFreemarker(templateClusterSubmission, values);
submitScript += strSubmit;
}
private void generateActualJobGrid(ComputeJob computeJob, Hashtable<String, String> config)
{
//create values hashtable to fill templates
Hashtable<String, String> values = new Hashtable<String, String>();
values.put("script_name", computeJob.getName());
values.put("error_log", "err_" + computeJob.getName() + ".log");
values.put("output_log", "out_" + computeJob.getName() + ".log");
values.put("script_location", config.get(JobGenerator.BACK_END_DIR));
//create jdl
String jdlListing = weaveFreemarker(templateGridJDL, values);
//write jdl
(new File(config.get(JobGenerator.OUTPUT_DIR))).mkdirs();
writeToFile(config.get(JobGenerator.OUTPUT_DIR) + System.getProperty("file.separator") + computeJob.getName() + ".jdl",
jdlListing);
//create shell
String shellListing = "";
String initialScript = computeJob.getComputeScript();
GridTransferContainer container = pairJobTransfers.get(computeJob.getName());
//get log filename
Hashtable<String, String> logs = container.getLogs();
Enumeration logValues = logs.elements();
String logName = (String) logValues.nextElement();
String justLogName = giveJustName(logName);
//generate downloading section (transfer inputs and executable)
//and change job listing to execute in the grid
Hashtable<String, String> inputs = container.getInputs();
Enumeration actuals = inputs.elements();
while (actuals.hasMoreElements())
{
Hashtable<String, String> local = new Hashtable<String, String>();
String actualName = (String) actuals.nextElement();
String justName = giveJustName(actualName);
local.put(JobGenerator.LFN_NAME, actualName);
local.put(JobGenerator.INPUT, justName);
local.put(JobGenerator.LOG, justLogName);
String inputListing = weaveFreemarker(templateGridDownload, local);
initialScript = initialScript.replaceAll(actualName, justName);
shellListing += inputListing;
}
Hashtable<String, String> exes = container.getExes();
actuals = exes.elements();
while (actuals.hasMoreElements())
{
Hashtable<String, String> local = new Hashtable<String, String>();
String actualName = (String) actuals.nextElement();
String justName = giveJustName(actualName);
local.put(JobGenerator.LFN_NAME, actualName);
local.put(JobGenerator.INPUT, justName);
local.put(JobGenerator.LOG, justLogName);
String inputListing = weaveFreemarker(templateGridDownloadExe, local);
System.out.println("-----------");
System.out.println(initialScript);
System.out.println("act " + actualName);
System.out.println("just " + justName);
initialScript = initialScript.replaceAll(actualName, justName);
shellListing += inputListing;
}
shellListing += initialScript;
//generate uploading section
//and change job listing to execute in the grid
Hashtable<String, String> outputs = container.getOutputs();
actuals = outputs.elements();
while (actuals.hasMoreElements())
{
Hashtable<String, String> local = new Hashtable<String, String>();
String actualName = (String) actuals.nextElement();
String justName = giveJustName(actualName);
local.put(JobGenerator.LFN_NAME, actualName);
local.put(JobGenerator.OUTPUT, justName);
local.put(JobGenerator.LOG, justLogName);
String outputListing = weaveFreemarker(templateGridUpload, local);
shellListing = shellListing.replaceAll(actualName, justName);
shellListing += outputListing;
}
//add upload log
Hashtable<String, String> local = new Hashtable<String, String>();
local.put(JobGenerator.LFN_NAME, logName);
local.put(JobGenerator.OUTPUT, justLogName);
local.put(JobGenerator.LOG, justLogName);
String outputListing = weaveFreemarker(templateGridUploadLog, local);
shellListing += outputListing;
//write shell
writeToFile(config.get(JobGenerator.OUTPUT_DIR) + System.getProperty("file.separator") + computeJob.getName() + ".sh",
shellListing);
}
private String giveJustName(String actualName)
{
int posSlash = actualName.lastIndexOf("/");
String justName = actualName.substring(posSlash + 1);
return justName;
}
private void readTemplatesCluster(String templatesDir)
{
try
{
templateClusterHeader = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateClusterHeader);
templateClusterFooter = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateClusterFooter);
templateClusterSubmission = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateClusterSubmission);
}
catch (IOException e)
{
e.printStackTrace();
}
}
private void readTemplatesGrid(String templatesDir)
{
try
{
templateGridDownload = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridDownload);
templateGridDownloadExe = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridDownloadExe);
templateGridUpload = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridUpload);
templateGridUploadLog = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridUploadLog);
templateGridJDL = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridJDL);
templateGridAfterExecution = getFileAsString(templatesDir + System.getProperty("file.separator") + fileTemplateGridAfterExecution);
}
catch (IOException e)
{
e.printStackTrace();
}
}
public void setConfig(Hashtable<String, String> config)
{
this.config = config;
}
public void setWorksheet(List<Tuple> worksheet)
{
//To change body of implemented methods use File | Settings | File Templates.
}
public String weaveFreemarker(String strTemplate, Hashtable<String, String> values)
{
Configuration cfg = new Configuration();
//cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
Template t = null;
StringWriter out = new StringWriter();
try
{
t = new Template("name", new StringReader(strTemplate), cfg);
t.process(values, out);
}
catch (TemplateException e)
{
//e.printStackTrace();
}
catch (IOException e)
{
//e.printStackTrace();
}
return out.toString();
}
private final String getFileAsString(String filename) throws IOException
{
File file = new File(filename);
if (!file.exists())
{
logger.log(Level.ERROR, "template file " + filename + " does not exist");
System.exit(1);
}
final BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(file));
final byte[] bytes = new byte[(int) file.length()];
bis.read(bytes);
bis.close();
return new String(bytes);
}
public void writeToFile(String outfilename, String script)
{
try
{
BufferedWriter out = new BufferedWriter(new FileWriter(outfilename));
out.write(script);
out.close();
}
catch (IOException e)
{
}
}
private Hashtable<String, Object> reduce(Hashtable<String, Object> values)
{
return null; //To change body of created methods use File | Settings | File Templates.
}
}