/*
* Copyright (C) 2012 SeqWare
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package net.sourceforge.seqware.pipeline.deciders;
import com.google.common.collect.ImmutableList;
import io.seqware.common.model.ProcessingStatus;
import io.seqware.common.model.WorkflowRunStatus;
import io.seqware.pipeline.plugins.WorkflowScheduler;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.MissingResourceException;
import java.util.PropertyResourceBundle;
import java.util.Random;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import joptsimple.NonOptionArgumentSpec;
import joptsimple.OptionSpecBuilder;
import net.sourceforge.seqware.common.hibernate.FindAllTheFiles;
import net.sourceforge.seqware.common.hibernate.FindAllTheFiles.Header;
import net.sourceforge.seqware.common.metadata.Metadata;
import net.sourceforge.seqware.common.model.FileProvenanceParam;
import net.sourceforge.seqware.common.model.WorkflowParam;
import net.sourceforge.seqware.common.model.WorkflowRun;
import net.sourceforge.seqware.common.module.FileMetadata;
import net.sourceforge.seqware.common.module.ReturnValue;
import net.sourceforge.seqware.common.util.Log;
import net.sourceforge.seqware.common.util.filetools.FileTools;
import net.sourceforge.seqware.common.util.filetools.FileTools.LocalhostPair;
import net.sourceforge.seqware.pipeline.decider.DeciderInterface;
import net.sourceforge.seqware.pipeline.plugin.Plugin;
import net.sourceforge.seqware.pipeline.plugin.PluginInterface;
import net.sourceforge.seqware.pipeline.plugins.fileprovenance.ProvenanceUtility;
import net.sourceforge.seqware.pipeline.runner.PluginRunner;
import net.sourceforge.seqware.pipeline.tools.SetOperations;
import org.apache.commons.lang3.StringUtils;
import org.openide.util.lookup.ServiceProvider;
/**
*
* @author mtaschuk
*/
@ServiceProvider(service = PluginInterface.class)
public class BasicDecider extends Plugin implements DeciderInterface {
private Header header = Header.FILE_SWA;
private Set<String> parentWorkflowAccessions = new TreeSet<>();
private Set<String> workflowAccessionsToCheck = new TreeSet<>();
private List<String> metaTypes = null;
private Boolean ignorePreviousRuns = null;
private Boolean test = null;
private String workflowAccession = null;
protected Random random = new Random(System.currentTimeMillis());
private Boolean metadataWriteback = null;
private Collection<String> parentAccessionsToRun;
private Collection<String> filesToRun;
private Collection<String> workflowParentAccessionsToRun;
private Collection<Integer> fileSWIDsToRun;
private Set<String> studyReporterOutput;
private ArrayList<String> iniFiles;
private Boolean skipStuff = null;
private int launchMax = Integer.MAX_VALUE, launched = 0;
private int rerunMax = 5;
private String host = null;
private final NonOptionArgumentSpec<String> nonOptionSpec;
private final OptionSpecBuilder ignorePreviousRunsSpec;
private final OptionSpecBuilder forceRunAllSpec;
public BasicDecider() {
super();
parser.acceptsAll(Arrays.asList("wf-accession"), "The workflow accession of the workflow").withRequiredArg();
// configure parameters used to parse provenance report
ProvenanceUtility.configureFileProvenanceParams(parser);
parser.acceptsAll(Arrays.asList("group-by"),
"Optional: Group by one of the headings in FindAllTheFiles. Default: FILE_SWA. One of LANE_SWA or IUS_SWA.")
.withRequiredArg();
parser.acceptsAll(Arrays.asList("parent-wf-accessions"),
"The workflow accessions of the parent workflows, comma-separated with no spaces. May also specify the meta-type.")
.withRequiredArg();
this.ignorePreviousRunsSpec = parser.acceptsAll(Arrays.asList("ignore-previous-runs"),
"Forces the decider to run all matches regardless of whether they've been run before or not");
parser.acceptsAll(Arrays.asList("meta-types"),
"The comma-separated meta-type(s) of the files to run this workflow with. Alternatively, use parent-wf-accessions.")
.withRequiredArg();
parser.acceptsAll(
Arrays.asList("check-wf-accessions"),
"The comma-separated, no spaces, workflow accessions of the workflow that perform the same function (e.g. older versions). Any files that have been processed with these workflows will be skipped.")
.withRequiredArg();
this.forceRunAllSpec = parser.acceptsAll(Arrays.asList("force-run-all"),
"Forces the decider to run all matches regardless of whether they've been run before or not");
parser.acceptsAll(Arrays.asList("test"), "Testing mode. Prints the INI files to standard out and does not submit the workflow.");
parser.acceptsAll(Arrays.asList("no-meta-db", "no-metadata"), "Optional: a flag that prevents metadata writeback (which is done "
+ "by default) by the Decider and that is subsequently "
+ "passed to the called workflow which can use it to determine if "
+ "they should write metadata at runtime on the cluster.");
parser.acceptsAll(Arrays.asList("ignore-skip-flag"),
"Ignores any 'skip' flags on lanes, IUSes, sequencer runs, samples, etc. Use caution.");
parser.acceptsAll(Arrays.asList("launch-max"), "The maximum number of jobs to launch at once.").withRequiredArg()
.defaultsTo("2147483647");
parser.acceptsAll(Arrays.asList("rerun-max"), "The maximum number of times to re-launch a workflowrun if failed.")
.withRequiredArg().defaultsTo("5");
parser.acceptsAll(Arrays.asList("host", "ho"),
"Used only in combination with --schedule to schedule onto a specific host. If not provided, the default is the local host")
.withRequiredArg();
// SEQWARE-1622 - check whether files exist
parser.acceptsAll(Arrays.asList("check-file-exists", "cf"), "Optional: only launch on the file if the file exists");
this.nonOptionSpec = parser.nonOptions(WorkflowScheduler.OVERRIDE_INI_DESC);
}
/**
* {@inheritDoc}
*/
@Override
public String get_description() {
return "The decider from which all other deciders came";
}
@Override
/**
* This method is intended to be called AFTER any implementing class's init
* method.
*/
public ReturnValue init() {
ReturnValue ret = new ReturnValue();
if (!ProvenanceUtility.checkForValidOptions(options)) {
println("One of the various contraints or '--all' must be specified.");
println(this.get_syntax());
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
try {
ResourceBundle rb = PropertyResourceBundle.getBundle("decider");
String parents = rb.getString("parent-workflow-accessions");
String checks = rb.getString("check-wf-accessions");
String wfa = rb.getString("workflow-accession");
if (wfa != null && !wfa.trim().isEmpty()) {
this.setWorkflowAccession(wfa);
}
if (parents != null && !parents.trim().isEmpty()) {
List<String> pas = Arrays.asList(parents.split(","));
this.setParentWorkflowAccessions(new TreeSet(pas));
}
if (checks != null && !checks.trim().isEmpty()) {
List<String> cwa = Arrays.asList(checks.split(","));
this.setWorkflowAccessionsToCheck(new TreeSet(cwa));
}
} catch (MissingResourceException e) {
Log.debug("No decider resource found: ", e);
}
// Group-by allows you to group processing events based on one characteristic.
// Normally, this allows you to run on a group of samples (for example, all
// of the IUS-level BAM files). The default is no grouping, so the workflow
// will be run independently on every file it finds
if (options.has("group-by")) {
String headerString = (String) options.valueOf("group-by");
try {
header = Header.valueOf(headerString);
} catch (IllegalArgumentException e) {
Log.fatal("IllegalArgumentException when grouping", e);
StringBuilder sb = new StringBuilder();
sb.append("group-by attribute must be one of the following: \n");
for (Header h : Header.values()) {
sb.append("\t").append(h.name()).append("\n");
}
Log.stdout(sb.toString());
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
}
if (options.has("wf-accession")) {
workflowAccession = (String) options.valueOf("wf-accession");
} else if (workflowAccession == null) {
Log.error("Must specify the workflow-accession of the workflow to run");
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
// Check for filtering on the files. Either parent workflow accessions
// or file meta-types, or both
boolean hasFilter = false;
if (options.has("parent-wf-accessions")) {
String pas = (String) options.valueOf("parent-wf-accessions");
for (String p : pas.split(",")) {
parentWorkflowAccessions.add(p.trim());
hasFilter = true;
}
}
if (options.has("meta-types")) {
String mt = (String) options.valueOf("meta-types");
metaTypes = Arrays.asList(mt.split(","));
hasFilter = true;
}
if (!hasFilter && parentWorkflowAccessions.isEmpty() && metaTypes == null) {
Log.error("You must run a decider with parent-wf-accessions or meta-types (or both).");
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
// Repeat-checking options. if present, check-wf-accessions will check to
// see if the samples have been run through a particular workflow before.
// These workflows will not be launched again
// Optionally you can force the decider to re-run all possibilities in
// the database with force-run-all.
if (options.has("check-wf-accessions")) {
String pas = (String) options.valueOf("check-wf-accessions");
Log.debug("Pas = " + pas);
if (pas.contains(",")) {
for (String p : pas.split(",")) {
workflowAccessionsToCheck.add(p.trim());
}
} else {
workflowAccessionsToCheck.add(pas.trim());
}
// Separate out this logic
// workflowAccessionsToCheck.add(workflowAccession);
}
ignorePreviousRuns = options.has(this.ignorePreviousRunsSpec) || options.has(this.forceRunAllSpec);
// test turns off all of the submission functions and just prints to stdout
if (test == null) {
test = options.has("test");
}
if (test) {
StringWriter writer = new StringWriter();
try {
FindAllTheFiles.printHeader(writer, true);
Log.stdout(writer.toString());
} catch (IOException ex) {
Log.error(ex);
}
}
if (skipStuff == null) {
skipStuff = !options.has("ignore-skip-flag");
}
if (metadataWriteback == null) {
metadataWriteback = !(options.has("no-metadata") || options.has("no-meta-db"));
}
LocalhostPair localhostPair = FileTools.getLocalhost(options);
String localhost = localhostPair.hostname;
if (host == null) {
if (options.has("host") || options.has("ho")) {
host = (String) options.valueOf("host");
} else {
host = localhost;
}
}
if (localhostPair.returnValue.getExitStatus() != ReturnValue.SUCCESS && host == null) {
Log.error("Could not determine localhost: Return value " + localhostPair.returnValue.getExitStatus());
Log.error("Please supply it on the command line with --host");
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
} else if (!host.equals(localhost)) {
Log.warn("The localhost and the scheduling host are not the same: " + localhost + " and " + host + ". Proceeding anyway.");
}
if (options.has("launch-max")) {
try {
launchMax = Integer.parseInt(options.valueOf("launch-max").toString());
} catch (NumberFormatException e) {
Log.error("The launch-max parameter must be an integer. Unparseable integer: " + options.valueOf("launch-max").toString());
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
}
if (options.has("rerun-max")) {
try {
rerunMax = Integer.parseInt(options.valueOf("rerun-max").toString());
} catch (NumberFormatException e) {
Log.error("The rerun-max parameter must be an integer. Unparseable integer: " + options.valueOf("rerun-max").toString());
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
}
if (workflowAccession == null || "".equals(workflowAccession)) {
Log.error("The wf-accession must be defined.");
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
return ret;
}
@Override
public ReturnValue do_test() {
return ReturnValue.featureNotImplemented();
}
@Override
public ReturnValue do_run() {
if (!metadata.checkClientServerMatchingVersion()) {
Log.warn("Client version does not match webservice version");
}
String groupBy = header.getTitle();
Map<String, List<ReturnValue>> mappedFiles;
List<ReturnValue> vals = createListOfRelevantFilePaths();
mappedFiles = separateFiles(vals, groupBy);
return launchWorkflows(mappedFiles);
}
private ReturnValue launchWorkflows(Map<String, List<ReturnValue>> mappedFiles) {
ReturnValue ret = new ReturnValue();
if (mappedFiles != null) {
List<Entry<String, List<ReturnValue>>> entryList = new ArrayList<>();
entryList.addAll(mappedFiles.entrySet());
Collections.sort(entryList, new ReturnValueProcessingTimeComparator());
for (Entry<String, List<ReturnValue>> entry : entryList) {
Log.info("Considering key:" + entry.getKey());
for (ReturnValue r : entry.getValue()) {
Log.info("Group contains: " + r.getAttribute(FindAllTheFiles.FILE_SWA));
}
parentAccessionsToRun = new HashSet<>();
filesToRun = new HashSet<>();
workflowParentAccessionsToRun = new HashSet<>();
fileSWIDsToRun = new HashSet<>();
studyReporterOutput = new HashSet<>();
// for each grouping (e.g. sample), iterate through the files
List<ReturnValue> files = entry.getValue();
Log.info("key:" + entry.getKey() + " consists of " + files.size() + " files");
for (ReturnValue file : files) {
String wfAcc = file.getAttribute(Header.WORKFLOW_SWA.getTitle());
Log.debug(Header.WORKFLOW_SWA.getTitle() + ": WF accession is " + wfAcc);
// if there is no parent accessions, or if the parent accession is correct
// this makes an assumption that if the wfAcc is null then the parentWorkflowAccessions will be empty
// and thus we are able to find files of a particular metatype with no wfAcc
// check for each file if the metatype is correct (if it exists),
// or just add it
for (FileMetadata fm : file.getFiles()) {
if (metaTypes != null) {
if (metaTypes.contains(fm.getMetaType())) {
addFileToSets(file, fm, workflowParentAccessionsToRun, parentAccessionsToRun, filesToRun, fileSWIDsToRun);
}
} else {
addFileToSets(file, fm, workflowParentAccessionsToRun, parentAccessionsToRun, filesToRun, fileSWIDsToRun);
}
}
}// end iterate through files
if (!parentAccessionsToRun.isEmpty() && !filesToRun.isEmpty() && !workflowParentAccessionsToRun.isEmpty()) {
final String parentAccessionString = commaSeparateMy(parentAccessionsToRun);
final String fileString = commaSeparateMy(filesToRun);
Log.debug("FileString: " + fileString);
// SEQWARE-1773 short-circuit this with forceRunAll to ensure that sample fingerprinting workflow launches
if (ignorePreviousRuns) {
Log.debug("Ignoring previous runs because --ignore-previous-runs was enabled");
}
boolean rerun = ignorePreviousRuns || rerunWorkflowRun(filesToRun, fileSWIDsToRun);
// SEQWARE-1728 - move creation of ini to launches (and test launches) to conserve disk space
iniFiles = new ArrayList<>();
ReturnValue newRet = this.doFinalCheck(fileString, parentAccessionString);
if (newRet.getExitStatus() != ReturnValue.SUCCESS) {
Log.warn("Final check failed, aborting run. Return value was: " + newRet.getExitStatus());
rerun = false;
}
// if we're in testing mode or we don't want to rerun and we don't want to force the re-processing
if (test || !rerun) {
// we need to simplify the logic and make it more readable here for testing
if (rerun) {
iniFiles.add(createIniFile(fileString, parentAccessionString));
for (String line : studyReporterOutput) {
Log.stdout(line);
}
Log.debug("NOT RUNNING (but would have ran). test=" + test + " or !rerun=" + !rerun);
reportLaunch();
// SEQWARE-1642 - output to stdout only whether a decider would launch
ret = do_summary();
launched++;
} else {
for (String line : studyReporterOutput) {
Log.debug(line);
}
Log.debug("NOT RUNNING (and would not have ran). test=" + test + " or !rerun=" + !rerun);
}
} else if (launched < launchMax) {
iniFiles.add(createIniFile(fileString, parentAccessionString));
launched++;
// construct the INI and run it
for (String line : studyReporterOutput) {
Log.stdout(line);
}
Log.debug("Scheduling");
// construct the INI and run it
ArrayList<String> runArgs = constructCommand();
PluginRunner.main(runArgs.toArray(new String[runArgs.size()]));
Log.stdout("Scheduling.");
do_summary();
}
// separate this out so that it is reachable when in --test
if (launched >= launchMax) {
Log.info("The maximum number of jobs has been scheduled"
+ ". The next jobs will be launched when the decider runs again.");
ret.setExitStatus(ReturnValue.QUEUED);
// SEQWARE-1666 - short-circuit and exit when the maximum number of jobs have been launched
return ret;
}
} else {
Log.debug("Cannot run: parentAccessions: " + parentAccessionsToRun.size() + " filesToRun: " + filesToRun.size()
+ " workflowParentAccessions: " + workflowParentAccessionsToRun.size());
}
}
} else {
Log.stdout("There are no files");
}
return ret;
}
protected ArrayList<String> constructCommand() {
ArrayList<String> runArgs = new ArrayList<>();
runArgs.add("--plugin");
runArgs.add("io.seqware.pipeline.plugins.WorkflowScheduler");
runArgs.add("--");
runArgs.add("--workflow-accession");
runArgs.add(workflowAccession);
runArgs.add("--ini-files");
runArgs.add(commaSeparateMy(iniFiles));
Collection<String> fileSWIDs = new ArrayList<>();
runArgs.add("--" + WorkflowScheduler.INPUT_FILES);
for (Integer fileSWID : fileSWIDsToRun) {
fileSWIDs.add(String.valueOf(fileSWID));
}
runArgs.add(commaSeparateMy(fileSWIDs));
if (!metadataWriteback) {
runArgs.add("--no-metadata");
}
runArgs.add("--parent-accessions");
runArgs.add(commaSeparateMy(parentAccessionsToRun));
runArgs.add("--link-workflow-run-to-parents");
runArgs.add(commaSeparateMy(workflowParentAccessionsToRun));
runArgs.add("--host");
runArgs.add(host);
runArgs.add("--");
for (String s : options.valuesOf(nonOptionSpec)) {
runArgs.add(s);
}
return runArgs;
}
/**
* Returns true only if there are more files to run than have been run on any workflow so far, or if the filesToRun have different
* filepaths than those that have been run before.
*
* @param filesToRun
* @param fileSWIDs
* @return
*/
protected boolean rerunWorkflowRun(final Collection<String> filesToRun, Collection<Integer> fileSWIDs) {
boolean rerun;
List<Boolean> failures = new ArrayList<>();
List<Integer> asList = Arrays.asList(fileSWIDs.toArray(new Integer[fileSWIDs.size()]));
List<WorkflowRun> runs = produceAccessionListWithFileList(asList);
rerun = processWorkflowRuns(filesToRun, failures, runs);
if (!rerun) {
Log.debug("This workflow has failed to launch based on workflow runs found via direct search");
return rerun;
}
// special case, when rerun max is 0, we still want to launch even if there are 0 failures
if (failures.isEmpty() && this.rerunMax == 0) {
return rerun;
}
if (failures.size() >= this.rerunMax) {
Log.debug("This workflow has failed " + rerunMax + " times: not running");
rerun = false;
}
return rerun;
}
/**
* Map a normal status to whether a workflow run completed, failed, or other (submitted, pending, etc.) (the states that we care about
* for the decider)
*
* @param generateStatus
* @return
*/
protected PREVIOUS_RUN_STATUS determineStatus(WorkflowRunStatus generateStatus) {
switch (generateStatus) {
case completed:
return PREVIOUS_RUN_STATUS.COMPLETED;
case failed:
return PREVIOUS_RUN_STATUS.FAILED;
default:
return PREVIOUS_RUN_STATUS.OTHER;
}
}
/**
* Returns true if the filesToRun are totally contained by the files associated with the files in a given workflowRunAcc
*
* @param filesSWIDsHasRun
* @param filesToRun
* the files to check to see if they are contained by the past run
* @return
*/
protected boolean isToRunContained(Set<Integer> filesSWIDsHasRun, Collection<String> filesToRun) {
Set<String> filesHasRun = determineFilePaths(filesSWIDsHasRun);
Log.info("Files to run: " + StringUtils.join(filesToRun, ','));
// use set operations to be more explicit about our cases
Set<String> setToRun = new HashSet<>(filesToRun);
Set<String> setHasRun = new HashSet<>(filesHasRun);
return SetOperations.isSuperset(setHasRun, setToRun);
}
/**
* Tests if the files from the workflow run (filesHasRun) are the same as those found in the database (filesToRun). True if the
* filesToRun has more files than the workflow run. True if the filesToRun and the workflow run have the same number of files but with
* different filepaths. False if the filesToRun and the workflow run have the same number of files with the same file paths. False and
* prints an error message if there are more files in the workflow run than in the filesToRun.
*
* @param filesSWIDsHasRun
* @param filesToRun
* @return
*/
protected FILE_STATUS compareWorkflowRunFiles(Set<Integer> filesSWIDsHasRun, Collection<String> filesToRun) {
Set<String> filesHasRun = determineFilePaths(filesSWIDsHasRun);
Log.info("Files to run: " + StringUtils.join(filesToRun, ','));
Log.info("Files has run: " + StringUtils.join(filesHasRun, ','));
// use set operations to be more explicit about our cases
Set<String> setToRun = new HashSet<>(filesToRun);
Set<String> setHasRun = new HashSet<>(filesHasRun);
if (setToRun.equals(setHasRun)) {
return FILE_STATUS.SAME_FILES;
}
if (SetOperations.isSubset(setHasRun, setToRun)) {
return FILE_STATUS.PAST_SUBSET_OR_INTERSECTION;
}
if (SetOperations.isSuperset(setHasRun, setToRun)) {
return FILE_STATUS.PAST_SUPERSET;
}
if (SetOperations.intersection(setToRun, setHasRun).size() > 0) {
return FILE_STATUS.PAST_SUBSET_OR_INTERSECTION;
}
return FILE_STATUS.DISJOINT_SETS;
}
private void addFileToSets(ReturnValue file, FileMetadata fm, Collection<String> workflowParentAccessionsToRun,
Collection<String> parentAccessionsToRun, Collection<String> filesToRun, Collection<Integer> fileToRunSWIDs) {
if (checkFileDetails(file, fm)) {
if (skipStuff) {
for (String key : file.getAttributes().keySet()) {
if (key.contains("skip")) {
Log.warn("File SWID:" + fm.getDescription() + " path " + fm.getFilePath() + " is skipped: " + key + ">"
+ file.getAttribute(key));
return;
}
}
}
if (test) {
printFileMetadata(file, fm);
}
filesToRun.add(fm.getFilePath());
String fileSWID = file.getAttribute(Header.FILE_SWA.getTitle());
fileToRunSWIDs.add(Integer.valueOf(fileSWID));
parentAccessionsToRun.add(file.getAttribute(Header.PROCESSING_SWID.getTitle()));
String swid = file.getAttribute(Header.IUS_SWA.getTitle());
if (swid == null || swid.trim().isEmpty()) {
swid = file.getAttribute(Header.LANE_SWA.getTitle());
}
// seqware-2002 it is possible that both are null if the path goes through sample_processing
if (swid == null || swid.trim().isEmpty()) {
return;
}
workflowParentAccessionsToRun.add(swid);
}
}
protected void printFileMetadata(ReturnValue file, FileMetadata fm) {
String studyName = (String) options.valueOf("study-name");
try {
StringWriter writer = new StringWriter();
FindAllTheFiles.print(writer, file, studyName, true, fm);
studyReporterOutput.add(writer.getBuffer().toString().trim());
} catch (IOException ex) {
Log.error("Error printing file metadata", ex);
}
}
protected String commaSeparateMy(Collection<String> list) {
return separateMy(list, ",");
}
protected String spaceSeparateMy(Collection<String> list) {
return separateMy(list, " ");
}
private String separateMy(Collection<String> list, String delimiter) {
StringBuilder sb = new StringBuilder();
for (String s : list) {
if (sb.length() != 0) {
sb.append(delimiter);
}
sb.append(s);
}
return sb.toString();
}
private String createIniFile(String commaSeparatedFilePaths, String commaSeparatedParentAccessions) {
String iniPath = "";
Map<String, String> iniFileMap = new TreeMap<>();
SortedSet<WorkflowParam> wps = metadata.getWorkflowParams(workflowAccession);
for (WorkflowParam param : wps) {
iniFileMap.put(param.getKey(), param.getDefaultValue());
}
Map<String, String> iniParameters = modifyIniFile(commaSeparatedFilePaths, commaSeparatedParentAccessions);
for (String param : iniParameters.keySet()) {
iniFileMap.put(param, iniParameters.get(param));
}
PrintWriter writer = null;
File file = null;
try {
file = File.createTempFile("" + random.nextInt(), ".ini");
writer = new PrintWriter(new FileWriter(file), true);
for (String key : iniFileMap.keySet()) {
StringBuilder sb = new StringBuilder();
sb.append(key).append("=").append(iniFileMap.get(key));
writer.println(sb.toString());
}
} catch (IOException ex) {
Logger.getLogger(BasicDecider.class.getName()).log(Level.SEVERE, null, ex);
} finally {
if (writer != null) {
writer.close();
}
}
if (file != null) {
iniPath = file.getAbsolutePath();
}
return iniPath;
}
/**
* Performs any additional checks on the file before adding it to the list of files to incorporate. This method should be extended for
* future deciders for custom behaviour. You can also pull any details out of the file metadata here.
*
* @param returnValue
* The ReturnValue representing the Processing event. May have one or more files. The attributes table contains the
* information from FindAllTheFiles.Header.
* @param fm
* the particular file that will be added
* @return true if the file can be added to the list, false otherwise
*/
protected boolean checkFileDetails(ReturnValue returnValue, FileMetadata fm) {
if (this.options.has("check-file-exists")) {
if (!new File(fm.getFilePath()).exists()) {
Log.warn("File not found:" + fm.getFilePath());
return false;
}
}
return true;
}
protected Map<String, String> modifyIniFile(String commaSeparatedFilePaths, String commaSeparatedParentAccessions) {
Map<String, String> iniFileMap = new TreeMap<>();
iniFileMap.put("input_files", commaSeparatedFilePaths);
return iniFileMap;
}
protected String handleGroupByAttribute(String attribute) {
return attribute;
}
// protected
public Map<String, List<ReturnValue>> separateFiles(List<ReturnValue> vals, String groupBy) {
// get files from study
Map<String, List<ReturnValue>> map = new HashMap<>();
// group files according to the designated header (e.g. sample SWID)
for (ReturnValue r : vals) {
String currVal = r.getAttributes().get(groupBy);
if (currVal != null) {
currVal = handleGroupByAttribute(currVal);
}
List<ReturnValue> vs = map.get(currVal);
if (vs == null) {
vs = new ArrayList<>();
}
vs.add(r);
map.put(currVal, vs);
}
return map;
}
@Override
public ReturnValue clean_up() {
return ReturnValue.featureNotImplemented();
}
@Override
public ReturnValue do_summary() {
String command = do_summary_command();
Log.stdout(command);
return new ReturnValue();
}
public Boolean getForceRunAll() {
return ignorePreviousRuns;
}
public void setForceRunAll(Boolean forceRunAll) {
this.ignorePreviousRuns = forceRunAll;
}
/**
* use getGroupingStrategy
*
* @return
*/
@Deprecated
public Header getHeader() {
return header;
}
public Header getGroupingStrategy() {
return this.header;
}
/**
* use setGroupingStrategy
*
* @param header
*/
@Deprecated
public void setHeader(Header header) {
this.header = header;
}
public void setGroupingStrategy(Header strategy) {
this.header = strategy;
}
public List<String> getMetaType() {
return metaTypes;
}
public void setMetaType(List<String> metaType) {
this.metaTypes = metaType;
}
public Boolean getMetadataWriteback() {
return metadataWriteback;
}
public void setMetadataWriteback(Boolean metadataWriteback) {
this.metadataWriteback = metadataWriteback;
}
public Set<String> getParentWorkflowAccessions() {
return parentWorkflowAccessions;
}
public void setParentWorkflowAccessions(Set<String> parentWorkflowAccessions) {
this.parentWorkflowAccessions = parentWorkflowAccessions;
}
public Boolean getTest() {
return test;
}
public void setTest(Boolean test) {
this.test = test;
}
public String getWorkflowAccession() {
return workflowAccession;
}
public void setWorkflowAccession(String workflowAccession) {
this.workflowAccession = workflowAccession;
}
public Set<String> getWorkflowAccessionsToCheck() {
return workflowAccessionsToCheck;
}
public void setWorkflowAccessionsToCheck(Set<String> workflowAccessions) {
this.workflowAccessionsToCheck = workflowAccessions;
}
/**
* allow to user to do the final check and decide to run or cancel the decider e.g. check if all files are present
*
* @param commaSeparatedFilePaths
* @param commaSeparatedParentAccessions
* @return
*/
protected ReturnValue doFinalCheck(String commaSeparatedFilePaths, String commaSeparatedParentAccessions) {
ReturnValue checkReturnValue = new ReturnValue(ReturnValue.SUCCESS);
return checkReturnValue;
}
/**
* Report an actual launch of a workflow for testing purpose
*
* @return false iff we don't actually want to launch
*/
protected boolean reportLaunch() {
return true;
}
public void setMetaws(Metadata metaws) {
metadata = metaws;
}
/**
* We now use the guideline that we only count failures when they occur on the same number of files (with the same paths)
*
* @param fileStatus
* @param previousStatus
* @return
*/
protected static boolean isCountAsFail(FILE_STATUS fileStatus, PREVIOUS_RUN_STATUS previousStatus) {
return (fileStatus == FILE_STATUS.SAME_FILES && previousStatus == PREVIOUS_RUN_STATUS.FAILED);
}
/**
* See https://wiki.oicr.on.ca/display/SEQWARE/BasicDecider+logic
*
* @param fileStatus
* @param previousStatus
* @return
*/
protected static boolean isDoRerun(FILE_STATUS fileStatus, PREVIOUS_RUN_STATUS previousStatus) {
Log.info("Considering match with " + fileStatus.name() + " status:" + previousStatus.name());
boolean strangeCondition = fileStatus == FILE_STATUS.PAST_SUPERSET && previousStatus == PREVIOUS_RUN_STATUS.FAILED;
if (strangeCondition) {
Log.stderr("****** Workflow run has more files in the past but failed. We will try to re-run, but you should investigate!!!! *******");
}
boolean doRerun = true;
if (fileStatus == FILE_STATUS.PAST_SUBSET_OR_INTERSECTION) {
doRerun = true;
} else if (fileStatus == FILE_STATUS.SAME_FILES || fileStatus == FILE_STATUS.PAST_SUPERSET) {
doRerun = previousStatus == PREVIOUS_RUN_STATUS.FAILED;
}
return doRerun;
}
private List<WorkflowRun> produceAccessionListWithFileList(List<Integer> fileSWIDs) {
// grab only the workflows in which we are interested
List<Integer> relevantWorkflows = new ArrayList<>();
relevantWorkflows.add(Integer.valueOf(this.workflowAccession));
for (String accession : this.workflowAccessionsToCheck) {
relevantWorkflows.add(Integer.valueOf(accession));
}
// find relevant workflow runs for this group of files
List<WorkflowRun> wrFiles1 = this.metadata.getWorkflowRunsAssociatedWithInputFiles(fileSWIDs, relevantWorkflows);
Log.debug("Found " + wrFiles1.size() + " workflow runs via direct search");
return wrFiles1;
}
/**
* For a given set of file SWIDs in filesToRun, we will count up the number of previous workflow runs that failed and return whether or
* not we think the workflow should be rerun
*
* @param filesToRun
* @param failures
* @param previousWorkflowRuns
* @return
*/
private boolean processWorkflowRuns(Collection<String> filesToRun, List<Boolean> failures, List<WorkflowRun> previousWorkflowRuns) {
int count = 0;
boolean rerun = true;
for (WorkflowRun previousWorkflowRun : previousWorkflowRuns) {
count++;
// only consider previous runs of the same workflow
if (workflowAccession.equals(previousWorkflowRun.getWorkflowAccession().toString())) {
FILE_STATUS fileStatus = compareWorkflowRunFiles(previousWorkflowRun.getInputFileAccessions(), filesToRun);
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a file status of " + fileStatus);
PREVIOUS_RUN_STATUS previousStatus = determineStatus(previousWorkflowRun.getStatus());
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a status of " + previousStatus);
boolean countAsFail = isCountAsFail(fileStatus, previousStatus);
boolean doRerun = isDoRerun(fileStatus, previousStatus);
if (countAsFail) {
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " counted as a failure with a file status of "
+ fileStatus);
Log.info("The failing run was workflow_run " + count + "/" + previousWorkflowRuns.size() + " out of "
+ previousWorkflowRuns.size());
failures.add(true);
}
if (!doRerun) {
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " blocking re-run with a status of: "
+ previousStatus + " file status of: " + fileStatus);
Log.info("The blocking run was workflow_run " + count + "/" + previousWorkflowRuns.size() + " out of "
+ previousWorkflowRuns.size());
rerun = false;
break;
}
} else if (this.workflowAccessionsToCheck.contains(previousWorkflowRun.getWorkflowAccession().toString())) {
Log.debug("Workflow run " + previousWorkflowRun.getWorkflowAccession() + " has a workflow "
+ previousWorkflowRun.getWorkflowAccession() + " on the list of workflow accessions to check");
// we will check whether all the files to run are contained within the previous run of the workflow, if so we will not
// re-run
FILE_STATUS fileStatus = compareWorkflowRunFiles(previousWorkflowRun.getInputFileAccessions(), filesToRun);
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a file status of " + fileStatus);
if (this.isToRunContained(previousWorkflowRun.getInputFileAccessions(), filesToRun)) {
Log.info("Previous workflow run contained the all of the files that we want to run");
rerun = false;
}
} else {
Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " was neither a workflow to check nor a previous run of "
+ workflowAccession + " , ignored");
}
}
return rerun;
}
private Set<String> determineFilePaths(Set<Integer> fileSWIDs) {
Set<String> results = new HashSet<>();
for (Integer fileSWID : fileSWIDs) {
net.sourceforge.seqware.common.model.File file = metadata.getFile(fileSWID);
results.add(file.getFilePath());
}
return results;
}
private String do_summary_command() {
StringBuilder command = new StringBuilder();
// SEQWARE-1612 Change test command to actual jar name
String seqwareVersion = this.metadata.getClass().getPackage().getImplementationVersion();
command.append("\njava -jar seqware-distribution-").append(seqwareVersion).append("-full.jar ");
command.append(spaceSeparateMy(constructCommand()));
command.append("\n");
return command.toString();
}
private List<ReturnValue> createListOfRelevantFilePaths() {
List<ReturnValue> vals;
List<Map<String, String>> fileProvenanceReport;
Map<FileProvenanceParam, List<String>> map = ProvenanceUtility.convertOptionsToMap(options, metadata);
if (skipStuff) {
map.put(FileProvenanceParam.skip, new ImmutableList.Builder<String>().add("false").build());
}
map.put(FileProvenanceParam.workflow_run_status, new ImmutableList.Builder<String>().add(WorkflowRunStatus.completed.toString())
.build());
map.put(FileProvenanceParam.processing_status, new ImmutableList.Builder<String>().add(ProcessingStatus.success.toString()).build());
if (this.parentWorkflowAccessions.size() > 0) {
map.put(FileProvenanceParam.workflow, new ImmutableList.Builder<String>().addAll(this.parentWorkflowAccessions).build());
}
fileProvenanceReport = metadata.fileProvenanceReport(map);
// convert to list of ReturnValues for backwards compatibility
vals = convertFileProvenanceReport(fileProvenanceReport);
// consider memory use and GC here
return vals;
}
private List<ReturnValue> convertFileProvenanceReport(List<Map<String, String>> fileProvenanceReport) {
List<ReturnValue> list = new ArrayList<>();
for (Map<String, String> map : fileProvenanceReport) {
ReturnValue row = new ReturnValue();
row.setAttributes(map);
list.add(row);
// mutate additional rows into a nested FileMetadata object
FileMetadata fm = new FileMetadata();
fm.setFilePath(map.get(Header.FILE_PATH.getTitle()));
fm.setMetaType(map.get(Header.FILE_META_TYPE.getTitle()));
fm.setDescription(map.get(Header.FILE_DESCRIPTION.getTitle()));
fm.setMd5sum(map.get(Header.FILE_MD5SUM.getTitle()));
if (map.containsKey(Header.FILE_SIZE.getTitle())) {
if (!map.get(Header.FILE_SIZE.getTitle()).isEmpty()) {
fm.setSize(Long.valueOf(map.get(Header.FILE_SIZE.getTitle())));
}
}
row.setFiles(new ArrayList(new ImmutableList.Builder<FileMetadata>().add(fm).build()));
handleAttributes(map, row, Header.STUDY_ATTRIBUTES, Header.STUDY_TAG_PREFIX);
handleAttributes(map, row, Header.EXPERIMENT_ATTRIBUTES, Header.EXPERIMENT_TAG_PREFIX);
handleAttributes(map, row, Header.PARENT_SAMPLE_ATTRIBUTES, Header.PARENT_SAMPLE_TAG_PREFIX);
handleAttributes(map, row, Header.SAMPLE_ATTRIBUTES, Header.SAMPLE_TAG_PREFIX);
handleAttributes(map, row, Header.IUS_ATTRIBUTES, Header.IUS_TAG_PREFIX);
handleAttributes(map, row, Header.LANE_ATTRIBUTES, Header.LANE_TAG_PREFIX);
handleAttributes(map, row, Header.SEQUENCER_RUN_ATTRIBUTES, Header.SEQUENCER_RUN_TAG_PREFIX);
handleAttributes(map, row, Header.PROCESSING_ATTRIBUTES, Header.PROCESSING_TAG_PREFIX);
handleAttributes(map, row, Header.FILE_ATTRIBUTES, Header.FILE_TAG_PREFIX);
}
return list;
}
private void handleAttributes(Map<String, String> map, ReturnValue row, Header headerType, Header headerPrefix) {
// mutate attributes into expected format from FindAllTheFiles
String studyAttributes = map.remove(headerType.getTitle());
if (!studyAttributes.isEmpty()) {
String[] studyAttrArr = studyAttributes.split(";");
for (String studyAttr : studyAttrArr) {
String[] parts = studyAttr.split("=");
String key = parts[0];
String value = parts.length > 1 ? parts[1] : null;
FindAllTheFiles.addAttributeToReturnValue(row, key, value);
}
}
}
/**
* These file statuses reflect the discussion at https://wiki.oicr.on.ca/display/SEQWARE/BasicDecider+logic
*/
protected enum FILE_STATUS {
/**
* Two sets of files have no relationship
*/
DISJOINT_SETS,
/**
* Two sets of files partially overlap i.e. intersection and subset (set of files in the past was smaller)
*/
PAST_SUBSET_OR_INTERSECTION,
/**
* the same files are found at the same paths
*/
SAME_FILES,
/**
* The set of files in the past was strictly larger than the current files under consideration
*/
PAST_SUPERSET
}
/**
* We care about three types of status, an outright fail, other (pending, running, submitted, etc.), and completed
*/
protected enum PREVIOUS_RUN_STATUS {
FAILED, OTHER, COMPLETED
}
private class ReturnValueProcessingTimeComparator implements Comparator<Entry<String, List<ReturnValue>>> {
@Override
public int compare(Entry<String, List<ReturnValue>> t0, Entry<String, List<ReturnValue>> t1) {
DateFormat formatter = new SimpleDateFormat();
Integer t0date = latestSWID(t0, formatter);
Integer t1date = latestSWID(t1, formatter);
return t1date.compareTo(t0date);
}
private Integer latestSWID(Entry<String, List<ReturnValue>> t0, DateFormat formatter) {
// grab the latest date in each group
Integer latestSWID = Integer.MIN_VALUE;
for (ReturnValue t0i : t0.getValue()) {
Integer currInt = Integer.valueOf(t0i.getAttribute(FindAllTheFiles.FILE_SWA));
if (currInt != null && currInt > latestSWID) {
latestSWID = currInt;
}
}
return latestSWID;
}
}
}