package org.molgenis.generator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.molgenis.compute.ComputeParameter; import org.molgenis.compute.ComputeProtocol; import org.molgenis.compute.commandline.WorksheetHelper; import org.molgenis.protocol.Workflow; import org.molgenis.protocol.WorkflowElement; import org.molgenis.util.CsvFileReader; import org.molgenis.util.CsvReader; import org.molgenis.util.Entity; import org.molgenis.util.Tuple; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.*; /** * Created by IntelliJ IDEA. * User: georgebyelas * Date: 05/04/2012 * Time: 10:18 * To change this template use File | Settings | File Templates. */ public class ModelLoader { public static final String FLAG_MOLGENIS = "#MOLGENIS"; public static final String FLAG_INPUTS = "#INPUTS"; public static final String FLAG_OUTPUTS = "#OUTPUTS"; public static final String FLAG_EXES = "#EXES"; public static final String FLAG_LOG = "#LOGS"; public static final String FLAG_TARGETS = "#TARGETS"; //also used for cluster scripts generation public static final String FLAG_CLUSTER_QUEUE = "clusterQueue"; public static final String FLAG_CORES = "cores"; public static final String FLAG_NODES = "nodes"; public static final String FLAG_WALLTIME = "walltime"; public static final String FLAG_MEMORY = "mem"; public static final String FLAG_INTERPRETER = "interpreter"; private static final String INTERPRETER_BASH = "bash"; private static final String INTERPRETER_R = "R"; private static Logger logger = Logger.getLogger(ModelLoader.class); private Workflow workflow = null; public Workflow loadWorkflowFromFiles(File fileWorkflow, File dirProtocol, File fileParameters, File fileEnvironment) throws Exception { workflow = new Workflow(); String name = fileWorkflow.getName(); //removing file extension name = name.substring(0, name.lastIndexOf(".")); workflow.setName(name); //read workflow elements List<WorkflowElement> workflowElements = readEntitiesFromFile(fileWorkflow, WorkflowElement.class); workflow.setWorkflowWorkflowElementCollection(workflowElements); //read workflow environment parameters List<ComputeParameter> workflowParameters1 = readEntitiesFromFile(fileEnvironment, ComputeParameter.class); //read workflow parameters List<ComputeParameter> workflowParameters2 = readEntitiesFromFile(fileParameters, ComputeParameter.class); //add parameters and environment parameters workflowParameters1.addAll(workflowParameters2); workflow.setWorkflowComputeParameterCollection(workflowParameters1); //set protocols and workflow_name to elements Iterator<WorkflowElement> iterator = workflow.getWorkflowWorkflowElementCollection().iterator(); while (iterator.hasNext()) { WorkflowElement workflowElement = iterator.next(); workflowElement.setWorkflow_Name(workflow.getName()); String strComputeProtocol = workflowElement.getProtocol_Name(); System.out.print("protocol " + strComputeProtocol); //loading protocol String strProtocol = dirProtocol.getAbsolutePath() + System.getProperty("file.separator") + strComputeProtocol + ".ftl"; File fileProtocol = new File(strProtocol); isExist(fileProtocol); String protocol = readFileAsString(fileProtocol); //create ComputeProtocol parsing file ComputeProtocol computeProtocol = parseComputeProtocolFromString(strComputeProtocol, protocol); workflowElement.setProtocol(computeProtocol); //set predecessors for workflow elements workflowElement.setPreviousSteps(createListPreviousSteps(workflowElement, workflowElements)); } return workflow; } private List<WorkflowElement> createListPreviousSteps(WorkflowElement workflowElement, List<WorkflowElement> workflowElements) { List<WorkflowElement> previous = new ArrayList<WorkflowElement>(); List<String> names = workflowElement.getPreviousSteps_Name(); for (int i = 0; i < names.size(); i++) { WorkflowElement el = findWorkflowElement(names.get(i), workflowElements); previous.add(el); } return previous; } private WorkflowElement findWorkflowElement(String s, List<WorkflowElement> workflowElements) { Iterator<WorkflowElement> itr = workflowElements.iterator(); while (itr.hasNext()) { WorkflowElement el = itr.next(); String name = el.getName(); if (s.equalsIgnoreCase(name)) return el; } logger.log(Level.ERROR, "workflow element " + s + " does not exist"); System.exit(1); return null; } private ComputeProtocol parseComputeProtocolFromString(String protocolName, String protocolListing) { ComputeProtocol protocol = new ComputeProtocol(); protocol.setName(protocolName); protocol.setScriptTemplate(protocolListing); String strMolgenisHeader = protocolListing.substring(protocolListing.indexOf(FLAG_MOLGENIS), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_MOLGENIS))); String str = null; //set walltime if (strMolgenisHeader.indexOf(FLAG_WALLTIME) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_WALLTIME); protocol.setWalltime(str); } //set # nodes if (strMolgenisHeader.indexOf(FLAG_NODES) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_NODES); protocol.setNodes(Integer.parseInt(str)); } //set # cores if (strMolgenisHeader.indexOf(FLAG_CORES) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_CORES); protocol.setCores(Integer.parseInt(str)); } //set interpreter if (strMolgenisHeader.indexOf(FLAG_INTERPRETER) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_INTERPRETER); protocol.setInterpreter(str); } //set cluster queue if (strMolgenisHeader.indexOf(FLAG_CLUSTER_QUEUE) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_CLUSTER_QUEUE); protocol.setClusterQueue(str); } //set memory if (strMolgenisHeader.indexOf(FLAG_MEMORY) > -1) { str = getValueFromMolgenisHeader(strMolgenisHeader, FLAG_MEMORY); protocol.setMem(str); } List<ComputeParameter> list = null; //set targets if (protocolListing.indexOf(FLAG_TARGETS) > -1) { str = protocolListing.substring(protocolListing.indexOf(FLAG_TARGETS), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_TARGETS))); list = getParametersFromHeader(str); protocol.setIterateOver(list); } //set inputs if (protocolListing.indexOf(FLAG_INPUTS) > -1) { str = protocolListing.substring(protocolListing.indexOf(FLAG_INPUTS), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_INPUTS))); list = getParametersFromHeader(str); protocol.setInputs(list); } //set outputs if (protocolListing.indexOf(FLAG_OUTPUTS) > -1) { str = protocolListing.substring(protocolListing.indexOf(FLAG_OUTPUTS), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_OUTPUTS))); list = getParametersFromHeader(str); protocol.setOutputs(list); } //set exes if (protocolListing.indexOf(FLAG_EXES) > -1) { str = protocolListing.substring(protocolListing.indexOf(FLAG_EXES), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_EXES))); list = getParametersFromHeader(str); protocol.setExes(list); } //set logs if (protocolListing.indexOf(FLAG_LOG) > -1) { str = protocolListing.substring(protocolListing.indexOf(FLAG_LOG), protocolListing.indexOf("\n", protocolListing.indexOf(FLAG_LOG))); list = getParametersFromHeader(str); protocol.setLogs(list); } System.out.println(" ... parsed"); return protocol; } public List<ComputeParameter> getParametersFromHeader(String str) { List<ComputeParameter> list = new ArrayList<ComputeParameter>(); Vector<String> names = findNames(str); for (int i = 0; i < names.size(); i++) { Vector<ComputeParameter> pars = findParameter(names.elementAt(i)); list.addAll(pars); } return list; } private Vector<ComputeParameter> findParameter(String s) { //here we manage wildcards in the parameter names Vector<ComputeParameter> pars = new Vector<ComputeParameter>(); Collection<ComputeParameter> parameters = workflow.getWorkflowComputeParameterCollection(); Iterator<ComputeParameter> itr = parameters.iterator(); while (itr.hasNext()) { ComputeParameter par = itr.next(); String name = par.getName(); if (s.contains(".*")) { String prefix = s.substring(0, s.lastIndexOf(".")); if (name.contains(prefix)) { int prefixIndex = name.indexOf(prefix); if (prefixIndex == 0) pars.add(par); } } else { if (s.equalsIgnoreCase(name)) { pars.add(par); return pars; } } } if (pars.size() > 0) return pars; logger.log(Level.ERROR, "parameter " + s + " does not exist"); System.exit(1); return null; } //here, we trim first to remove end string white spaces public Vector<String> findNames(String list) { list = list.trim(); Vector<String> names = new Vector<String>(); int posEmpty = list.indexOf(" ") + 1; if (posEmpty == 0) return names; list = list.substring(posEmpty); while (list.indexOf(",") > -1) { int posComa = list.indexOf(","); String name = list.substring(0, posComa).trim(); if (name != "") names.addElement(name); list = list.substring(posComa + 1); } names.add(list); return names; } private String getValueFromMolgenisHeader(String str, String flag) { String value; int index = str.indexOf("=", str.indexOf(flag)) + 1; int indexSpace = str.indexOf(" ", index); if (indexSpace > -1) value = str.substring(index, indexSpace); else value = str.substring(index); return value; } public List<Tuple> loadWorksheetFromFile(File fileWorksheet) { List<Tuple> worksheet = null; try { worksheet = new WorksheetHelper().readTuplesFromFile(fileWorksheet); } catch (Exception e) { e.printStackTrace(); } return worksheet; } private <E extends Entity> List<E> readEntitiesFromFile(File file, final Class<E> klazz) throws Exception { final List<E> result = new ArrayList<E>(); // check if file exists if (!file.exists()) { logger.warn("file '" + file.getName() + "' is missing"); return result; } // read the file CsvReader reader = new CsvFileReader(file); for (Tuple tuple : reader) { E entity = klazz.newInstance(); entity.set(tuple); result.add(entity); } return result; } private String readFileAsString(File file) throws java.io.IOException { StringBuffer fileData = new StringBuffer(1000); BufferedReader reader = new BufferedReader(new FileReader(file)); char[] buf = new char[1024]; int numRead = 0; while ((numRead = reader.read(buf)) != -1) { String readData = String.valueOf(buf, 0, numRead); fileData.append(readData); buf = new char[1024]; } reader.close(); return fileData.toString(); } private void isExist(File file) { if (!file.exists()) { logger.log(Level.ERROR, "protocol " + file.getName() + " does not exist"); System.exit(1); } } public Vector<String> findFlagValues(String script, String flag) { if (script.indexOf(flag) > -1) { String str = script.substring(script.indexOf(flag), script.indexOf("\n", script.indexOf(flag))); Vector<String> names = findNames(str); return names; } return null; } }