package net.sourceforge.seqware.pipeline.workflowV2; import io.seqware.pipeline.SqwKeys; import java.io.File; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Random; import java.util.logging.Level; import java.util.logging.Logger; import net.sourceforge.seqware.common.metadata.Metadata; import net.sourceforge.seqware.common.model.WorkflowRun; import net.sourceforge.seqware.common.module.ReturnValue; import net.sourceforge.seqware.common.util.Log; import net.sourceforge.seqware.common.util.Rethrow; import net.sourceforge.seqware.common.util.maptools.MapTools; import net.sourceforge.seqware.common.util.maptools.ReservedIniKeys; import net.sourceforge.seqware.pipeline.bundle.Bundle; /** * a utils class for creating the AbstractWorkflowDataModel, by reading the metadata.xml file, will load a Java based objectModel or XML- * based ObjectModel * * @author yliang * */ public class WorkflowDataModelFactory { private final Map<String, String> config; private final Metadata metadata; /** * This constructs the factory with only the parameters which shouldn't change from workflow to workflow. * * @param config * config generated from the .seqware/settings * @param metadata */ public WorkflowDataModelFactory(Map<String, String> config, Metadata metadata) { this.config = config; this.metadata = metadata; } /** * load metadata.xml and load the class. * * This method still needs work because it requires a lot of parameters which * * @param bundlePath * @param workflowAccession * @param workflowRunAccession * @param workflowEngine * @return */ public synchronized AbstractWorkflowDataModel getWorkflowDataModel(String bundlePath, int workflowAccession, int workflowRunAccession, String workflowEngine) { File bundle = new File(bundlePath); // change to absolute path bundlePath = bundle.getAbsolutePath(); Map<String, String> metaInfo = metadata.get_workflow_info(workflowAccession); Log.info("Bundle Path: " + bundlePath); if (!bundle.exists()) { // then first try to see if we can get it from it's permenant location instead if (metaInfo.get("permanent_bundle_location") != null) { bundle = new File(getAndProvisionBundle(metaInfo.get("permanent_bundle_location"))); } // if we still can't get the bundle then error out if (!bundle.exists()) { Log.error("ERROR: Bundle is null or doesn't exist! The bundle must be either a zip file or a directory structure."); return null; } } metaInfo = WorkflowV2Utility.parseMetaInfo(bundle); if (metaInfo == null) { Log.error("ERROR: Bundle structure is incorrect, unable to parse metadata."); return null; } Log.info("bundle for workflowdatamodel found"); // check FTL exist? boolean workflowJava = true; if (metaInfo.get("workflow_template") != null && !metaInfo.get("workflow_template").isEmpty()) { workflowJava = false; } // Java object or FTL AbstractWorkflowDataModel dataModel = null; Class<?> clazz = null; if (workflowJava) { // String clazzPath = metaInfo.get("classes"); // Log.stdout("looking for classes at " + clazzPath); // Log.info("CLASSPATH: " + clazzPath); // // get user defined classes String classpath = metaInfo.get("workflow_class"); Log.debug("Attempting to instantiate " + classpath); WorkflowClassFinder finder = new WorkflowClassFinder(); clazz = finder.findFirstWorkflowClass(classpath); if (null != clazz) { Log.debug("using java object"); try { Object object = clazz.newInstance(); dataModel = (AbstractWorkflowDataModel) object; } catch (InstantiationException | IllegalAccessException | SecurityException | IllegalArgumentException ex) { Log.error(ex, ex); throw Rethrow.rethrow(ex); } } else { Log.stdout("failed looking for classes at " + classpath); throw new RuntimeException("Unable to construct workflow class"); } } else { throw new RuntimeException("Non-Java workflows not currently supported"); } if (dataModel == null) { throw new RuntimeException("Unable to construct datamodel"); } Log.info("datamodel generated"); // load metadata.xml dataModel.setTags(metaInfo); // set name, version in workflow dataModel.setName(metaInfo.get("name")); dataModel.setVersion(metaInfo.get("workflow_version")); dataModel.setBundle_version(metaInfo.get("bundle_version")); dataModel.setSeqware_version(metaInfo.get("seqware_version")); dataModel.setWorkflow_directory_name(metaInfo.get("workflow_directory_name")); dataModel.setWorkflowBundleDir(bundlePath); dataModel.setWorkflowBasedir(metaInfo.get("basedir")); // set memory, network, compute to environment dataModel.getEnv().setCompute(metaInfo.get("compute")); dataModel.getEnv().setNetwork(metaInfo.get("network")); dataModel.getEnv().setMemory(metaInfo.get("memory")); Log.info("loading ini files"); // load ini config WorkflowRun workflowRun = metadata.getWorkflowRun(workflowRunAccession); Map<String, String> iniString2Map = MapTools.iniString2Map(workflowRun.getIniFile()); dataModel.setConfigs(iniString2Map); // 0.13.6.5 : The Java workflow launcher was not originally designed to schedule, hence it is not properly getting // parent accessions from saved ini files (as opposed to on the command line) ArrayList<String> parseParentAccessions = parseParentAccessions(dataModel.getConfigs()); dataModel.setParentAccessions(parseParentAccessions); // merge command line option with configs, command-line options should override parent accession set above if present this.mergeCmdOptions(dataModel, workflowAccession, workflowRunAccession, workflowEngine); // merge version, and name ??? TODO // set random, date, wait // magic variables always set Date date = new Date(); dataModel.setDate(date.toString()); // set random Random rand = new Random(System.currentTimeMillis()); int randInt = rand.nextInt(100000000); dataModel.setRandom("" + randInt); // copy some properties from .settings to configs dataModel.getEnv().setOOZIE_URL(config.get(SqwKeys.OOZIE_URL.getSettingKey())); dataModel.getEnv().setOOZIE_APP_ROOT(config.get(SqwKeys.OOZIE_APP_ROOT.getSettingKey())); dataModel.getEnv().setOOZIE_JOBTRACKER(config.get(SqwKeys.OOZIE_JOBTRACKER.getSettingKey())); dataModel.getEnv().setOOZIE_NAMENODE(config.get(SqwKeys.OOZIE_NAMENODE.getSettingKey())); dataModel.getEnv().setOOZIE_QUEUENAME(config.get(SqwKeys.OOZIE_QUEUENAME.getSettingKey())); dataModel.getEnv().setMapred_job_tracker(config.get(SqwKeys.OOZIE_JOBTRACKER.getSettingKey())); dataModel.getEnv().setFs_default_name(config.get("FS.DEFAULT.NAME")); dataModel.getEnv().setFs_defaultFS(config.get(SqwKeys.OOZIE_NAMENODE.getSettingKey())); dataModel.getEnv().setFs_hdfs_impl(config.get(SqwKeys.FS_HDFS_IMPL.getSettingKey())); dataModel.getEnv().setOOZIE_WORK_DIR(config.get(SqwKeys.OOZIE_WORK_DIR.getSettingKey())); // get workflow-run-accession // in 1.1 we're going to make metadata writeback of at least workflow runs mandatory dataModel.setWorkflow_run_accession(String.valueOf(workflowRunAccession)); dataModel.setWorkflow_accession(String.valueOf(workflowAccession)); // parse XML or Java Object for if (workflowJava) { try { Method m = clazz.getMethod("setupDirectory"); m.invoke(dataModel); m = clazz.getMethod("setupFiles"); m.invoke(dataModel); // handle the provisionedPath // this.setupProvisionedPath(dataModel.getFiles()); m = clazz.getMethod("setupWorkflow"); m.invoke(dataModel); m = clazz.getMethod("setupEnvironment"); m.invoke(dataModel); m = clazz.getMethod("buildWorkflow"); m.invoke(dataModel); m = clazz.getMethod("wrapup"); m.invoke(dataModel); } catch (NullPointerException e) { Log.error("NullPointerException", e); throw Rethrow.rethrow(e); } catch (SecurityException e) { Log.error("SecurityException", e); throw Rethrow.rethrow(e); } catch (NoSuchMethodException e) { Log.error("NoSuchMethodException", e); throw Rethrow.rethrow(e); } catch (IllegalArgumentException e) { Log.error("IllegalArgumentException", e); throw Rethrow.rethrow(e); } catch (IllegalAccessException e) { Log.error("IllegalAccessException", e); throw Rethrow.rethrow(e); } catch (InvocationTargetException e) { Log.error("InvocationTargetException", e); throw Rethrow.rethrow(e); } } else { throw new RuntimeException("No other workflow engine is currently supported."); } AbstractWorkflowDataModel.prepare(dataModel); Log.info("returning datamodel"); return dataModel; } /** * I'm copying this from BasicWorkflow since I don't know if the package net.sourceforge.seqware.pipeline.workflow will be removed or if * all the workflowV2 will be merged. * * This code will either copy or download from S3, unzip, and return unzip location. * * It's used when the local workflow bundle dir is null or doesn't exist which is a sign that the workflow bundle should be retrieved * from the permanent location * * @param permLoc * @return */ private String getAndProvisionBundle(String permLoc) { String result = null; Bundle bundle = new Bundle(metadata, config); ReturnValue ret; if (permLoc.startsWith("s3://")) { ret = bundle.unpackageBundleFromS3(permLoc); } else { ret = bundle.unpackageBundle(new File(permLoc)); } if (ret != null) { return (ret.getAttribute("outputDir")); } return (result); } // FIXME should iterate all options automatically /** * This method is badly named now. There are no command-line options if we always schedule. Instead we only need to retain the process * of getting information from the DB into the workflow data model so that we can use it. * * @param model * @param workflowAccession * @param workflowRunAccession * @param metadataOutputFilePrefix * @param metadataOutputDir * @param workflowEngine */ private void mergeCmdOptions(AbstractWorkflowDataModel model, int workflowAccession, int workflowRunAccession, String workflowEngine) { // merge parent-accessions model.setWorkflow_run_accession(String.valueOf(workflowRunAccession)); model.setWorkflow_accession(String.valueOf(workflowAccession)); if (model.hasPropertyAndNotNull(ReservedIniKeys.METADATA.getKey())) { try { // TODO: fix this magic name boolean metadataWriteBack = model.getProperty(ReservedIniKeys.METADATA.getKey()).equals("metadata"); Log.info("Launching with metadataWriteback = " + metadataWriteBack + " since property was " + model.getProperty(ReservedIniKeys.METADATA.getKey())); model.setMetadataWriteBack(metadataWriteBack); } catch (Exception ex) { Logger.getLogger(WorkflowDataModelFactory.class.getName()).log(Level.SEVERE, null, ex); } } // metadata-output-file-prefix if (model.hasPropertyAndNotNull(ReservedIniKeys.OUTPUT_PREFIX.getKey())) { try { model.setMetadata_output_file_prefix(model.getProperty(ReservedIniKeys.OUTPUT_PREFIX.getKey())); } catch (Exception ex) { Logger.getLogger(WorkflowDataModelFactory.class.getName()).log(Level.SEVERE, null, ex); } } else { Log.error("You need to specify the output prefix for your workflow using either an override parameter at schedule-time or in your workflow INI file as " + ReservedIniKeys.OUTPUT_PREFIX.getKey()); } // metadata-output-dir if (model.hasPropertyAndNotNull(ReservedIniKeys.OUTPUT_DIR.getKey())) { try { model.setMetadata_output_dir(model.getProperty(ReservedIniKeys.OUTPUT_DIR.getKey())); } catch (Exception ex) { Logger.getLogger(WorkflowDataModelFactory.class.getName()).log(Level.SEVERE, null, ex); } } else { Log.error("You need to specify the output dir for your workflow using either an override parameter at schedule-time or in your workflow INI file as output_dir!"); } // workflow_engine if (workflowEngine != null) { model.setWorkflow_engine(workflowEngine); } } /** * reads a map and tries to find the parent accessions, the result is de-duplicated. * * @param map * @return */ private static ArrayList<String> parseParentAccessions(Map<String, String> map) { ArrayList<String> results = new ArrayList<>(); HashMap<String, String> resultsDeDup = new HashMap<>(); for (String key : map.keySet()) { if (ReservedIniKeys.PARENT_ACCESSION.getKey().equals(key) || ReservedIniKeys.PARENT_UNDERSCORE_ACCESSIONS.getKey().equals(key) || ReservedIniKeys.PARENT_DASH_ACCESSIONS.getKey().equals(key)) { resultsDeDup.put(map.get(key), "null"); } } for (String accession : resultsDeDup.keySet()) { results.add(accession); } // for hotfix 0.13.6.3 // GATK reveals an issue where parent_accession is setup with a correct list // of accessions while parent-accessions and parent_accessions are set to 0 // when the three are mushed together, the rogue zero is transferred to // parent_accession and causes it to crash the workflow // I'm going to allow a single 0 in case (god forbid) some workflow relies // upon this, but otherwise a 0 should not occur in a list of valid // parent_accessions if (results.contains("0") && results.size() > 1) { results.remove("0"); } return results; } }