/**
* Copyright 2014 VU University Medical Center.
* Licensed under the Apache License version 2.0 (see http://www.apache.org/licenses/LICENSE-2.0.html).
*/
package nl.vumc.biomedbridges.galaxy;
import com.github.jmchilton.blend4j.galaxy.GalaxyInstance;
import com.github.jmchilton.blend4j.galaxy.HistoriesClient;
import com.github.jmchilton.blend4j.galaxy.ToolsClient;
import com.github.jmchilton.blend4j.galaxy.WorkflowsClient;
import com.github.jmchilton.blend4j.galaxy.beans.Dataset;
import com.github.jmchilton.blend4j.galaxy.beans.HistoryContents;
import com.github.jmchilton.blend4j.galaxy.beans.HistoryDetails;
import com.github.jmchilton.blend4j.galaxy.beans.WorkflowDetails;
import com.github.jmchilton.blend4j.galaxy.beans.WorkflowInputs;
import com.github.jmchilton.blend4j.galaxy.beans.WorkflowOutputs;
import com.github.jmchilton.blend4j.galaxy.beans.WorkflowStepDefinition;
import com.sun.jersey.api.client.ClientResponse;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import nl.vumc.biomedbridges.core.Constants;
import nl.vumc.biomedbridges.core.FileUtils;
import nl.vumc.biomedbridges.core.Workflow;
import nl.vumc.biomedbridges.core.WorkflowEngine;
import org.apache.http.HttpStatus;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.github.jmchilton.blend4j.galaxy.beans.WorkflowInputs.WorkflowInput;
/**
* The workflow engine implementation for Galaxy.
*
* todo [high priority]: improve error messages, for example when incorrect names for input files are used.
*
* todo: Galaxy workflows cannot specify which version of a tool should be used:
* galaxy-dist\lib\galaxy\workflow\modules.py (lines 313-316):
* """
* # See if we have access to a different version of the tool.
* # TO DO: If workflows are ever enhanced to use tool version
* # in addition to tool id, enhance the selection process here
* # to retrieve the correct version of the tool.
* """
*
* @author <a href="mailto:f.debruijn@vumc.nl">Freek de Bruijn</a>
*/
public class GalaxyWorkflowEngine implements WorkflowEngine {
/**
* File type tabular.
*/
public static final String FILE_TYPE_TABULAR = "tabular";
/**
* File type text.
*/
public static final String FILE_TYPE_TEXT = "txt";
/**
* Workflow output file path.
*/
protected static final String OUTPUT_FILE_PATH = Paths.get("tmp", "WorkflowRunner-runWorkflow.txt").toString();
/**
* The logger for this class.
*/
private static final Logger logger = LoggerFactory.getLogger(GalaxyWorkflowEngine.class);
/**
* The number of milliseconds in a second.
*/
private static final int MILLISECONDS_PER_SECOND = 1000;
/**
* The default maximum number of times to wait for the upload to finish.
*/
private static final int UPLOAD_MAX_WAIT_COUNT = 28;
/**
* The default number of seconds to wait for the upload to finish (for each wait cycle).
*/
private static final int UPLOAD_WAIT_SECONDS = 6;
/**
* The default number of milliseconds to wait after the upload has finished.
*/
private static final int WAIT_AFTER_UPLOAD_SECONDS = 2;
/**
* The default maximum number of times to wait for the workflow to finish.
*/
private static final int RUN_WORKFLOW_MAX_WAIT_COUNT = 20;
/**
* The default number of seconds to wait for the workflow to finish (for each wait cycle).
*/
private static final int WORKFLOW_WAIT_SECONDS = 3;
/**
* History state ok.
*/
private static final String STATE_OK = "ok";
/**
* The history utils object.
*/
private HistoryUtils historyUtils;
/**
* The Galaxy server instance that will run the workflows.
*/
private GalaxyInstance galaxyInstance;
/**
* The workflows client to interact with the workflows.
*/
private WorkflowsClient workflowsClient;
/**
* The histories client for accessing Galaxy histories.
*/
private HistoriesClient historiesClient;
/**
* The ID of the history that is used for the input and output files.
*/
private String historyId;
/**
* The outputs of the executed workflow.
*/
private WorkflowOutputs workflowOutputs;
/**
* Mappings from output name to output ID (used when output files are not downloaded automatically).
*/
private Map<String, String> outputNameToIdsMap;
/**
* The maximum number of times to wait for the upload to finish.
*/
private int uploadMaxWaitCount;
/**
* The number of seconds to wait for the upload to finish (for each wait cycle).
*/
private int uploadWaitSeconds;
/**
* The number of milliseconds to wait after the upload has finished.
*/
private int waitAfterUploadSeconds;
/**
* The maximum number of times to wait for the workflow to finish.
*/
private int runWorkflowMaxWaitCount;
/**
* The number of seconds to wait for the workflow to finish (for each wait cycle).
*/
private int workflowWaitSeconds;
///**
// * The metadata for the workflow engine.
// */
//private GalaxyWorkflowEngineMetadata workflowEngineMetadata;
/**
* Create a Galaxy workflow engine.
*
* @param galaxyInstance the Galaxy instance that is used.
* @param historyId the history ID.
* @param historyUtils the history utils object.
*/
public GalaxyWorkflowEngine(final GalaxyInstance galaxyInstance, final String historyId,
final HistoryUtils historyUtils) {
this.galaxyInstance = galaxyInstance;
this.workflowsClient = galaxyInstance != null ? galaxyInstance.getWorkflowsClient() : null;
this.historiesClient = galaxyInstance != null ? galaxyInstance.getHistoriesClient() : null;
this.historyId = historyId;
this.historyUtils = historyUtils;
this.uploadMaxWaitCount = UPLOAD_MAX_WAIT_COUNT;
this.uploadWaitSeconds = UPLOAD_WAIT_SECONDS;
this.waitAfterUploadSeconds = WAIT_AFTER_UPLOAD_SECONDS;
this.runWorkflowMaxWaitCount = RUN_WORKFLOW_MAX_WAIT_COUNT;
this.workflowWaitSeconds = WORKFLOW_WAIT_SECONDS;
}
@Override
public Workflow getWorkflow(final String workflowName) {
return new GalaxyWorkflow(workflowName, this, new JSONParser());
}
/**
* Set the maximum wait counts for uploading data and running the workflow.
*
* @param uploadMaxWaitCount the maximum number of times to wait for the upload to finish.
* @param runWorkflowMaxWaitCount the maximum number of times to wait for the workflow to finish.
*/
public void setWaitCounts(final int uploadMaxWaitCount, final int runWorkflowMaxWaitCount) {
if (uploadMaxWaitCount >= 0)
this.uploadMaxWaitCount = uploadMaxWaitCount;
if (runWorkflowMaxWaitCount >= 0)
this.runWorkflowMaxWaitCount = runWorkflowMaxWaitCount;
}
/**
* Change the wait timers from their default value to something else; useful for testing.
*
* @param uploadWaitSeconds the number of seconds to wait for the upload to finish (for each wait cycle).
* @param waitAfterUploadSeconds the number of milliseconds to wait after the upload has finished.
* @param workflowWaitSeconds the number of seconds to wait for the workflow to finish (for each wait cycle).
*/
protected void setWaitTimers(final int uploadWaitSeconds, final int waitAfterUploadSeconds,
final int workflowWaitSeconds) {
this.uploadWaitSeconds = uploadWaitSeconds;
this.waitAfterUploadSeconds = waitAfterUploadSeconds;
this.workflowWaitSeconds = workflowWaitSeconds;
}
@Override
@SuppressWarnings("SpellCheckingInspection")
public boolean runWorkflow(final Workflow workflow) throws InterruptedException, IOException {
boolean result = false;
if (galaxyInstance != null) {
logStartRunWorkflow();
// todo: check whether the server is available and/or give a better error message when it isn't available.
/*
<html>
<head><title>504 Gateway Time-out</title></head>
<body bgcolor="white">
<center><h1>504 Gateway Time-out</h1></center>
<hr>
<center>nginx/1.2.0</center>
</body>
</html>
*/
logger.info("Ensure the workflow is available.");
((GalaxyWorkflow) workflow).ensureWorkflowIsOnServer(workflowsClient);
logger.info("Prepare the input files.");
uploadInputFiles(workflow);
final WorkflowInputs inputs = createInputsObject(workflow);
final boolean workflowFinished = executeWorkflow(inputs);
final boolean downloadsSuccessful = downloadOutputFiles(workflow);
logger.trace("Download output files downloadsSuccessful: {}.", downloadsSuccessful);
final boolean checkResults;
if (workflowFinished)
checkResults = checkWorkflowResults(workflow);
else {
logger.info("Timeout while waiting for workflow output file(s).");
// Freek: test the output anyway to generate some logging for debugging/analysis.
checkResults = checkWorkflowResults(workflow);
}
logger.trace("workflowFinished: " + workflowFinished);
logger.trace("downloadsSuccessful: " + downloadsSuccessful);
logger.trace("checkResults: " + checkResults);
result = workflowFinished && downloadsSuccessful && checkResults;
} else
logger.error("Galaxy instance is not initialized properly.");
return result;
}
/**
* Log Galaxy server details when starting a workflow.
*/
private void logStartRunWorkflow() {
logger.info("nl.vumc.biomedbridges.galaxy.GalaxyWorkflowEngine.runWorkflow");
logger.info("");
logger.info("Galaxy instance URL: {}.", galaxyInstance.getGalaxyUrl());
logger.info("Galaxy API key: {}.", galaxyInstance.getApiKey());
logger.info("Galaxy history ID: {}.", historyId);
logger.info("");
}
/**
* Upload the input files and wait for it to finish.
*
* @param workflow the workflow.
* @throws InterruptedException if any thread has interrupted the current thread while waiting for Galaxy.
*/
private void uploadInputFiles(final Workflow workflow) throws InterruptedException {
logger.info("- Upload the input files.");
// workflow.getAllInputValues().stream().filter(inputObject -> inputObject instanceof File).forEach(inputObject -> {
// final File inputFile = (File) inputObject;
// final int uploadStatus = uploadInputFile(workflow, historyId, inputFile).getStatus();
// if (uploadStatus != HttpStatus.SC_OK)
// logger.error("Uploading file {} failed with status {}.", inputFile.getAbsolutePath(), uploadStatus);
// });
for (Object inputObject : workflow.getAllInputValues())
if (inputObject instanceof File) {
final File inputFile = (File) inputObject;
final int uploadStatus = uploadInputFile(workflow, historyId, inputFile).getStatus();
if (uploadStatus != HttpStatus.SC_OK)
logger.error("Uploading file {} failed with status {}.", inputFile.getAbsolutePath(), uploadStatus);
}
logger.info("- Waiting for upload to history to finish.");
waitForHistoryUpload(historyId);
}
/**
* Upload an input file to a Galaxy server.
*
* @param workflow the workflow.
* @param historyId the ID of the history to use for workflow input and output.
* @param inputFile the input file to upload.
* @return the client response from the Jersey library.
*/
private ClientResponse uploadInputFile(final Workflow workflow, final String historyId, final File inputFile) {
final ToolsClient.FileUploadRequest fileUploadRequest = new ToolsClient.FileUploadRequest(historyId, inputFile);
// todo: do this based on what the Galaxy workflow needs.
if (workflow.getName().equals(Constants.WORKFLOW_REMOVE_TOP_AND_LEFT))
fileUploadRequest.setFileType(FILE_TYPE_TEXT);
else
fileUploadRequest.setFileType(FILE_TYPE_TABULAR);
return galaxyInstance.getToolsClient().uploadRequest(fileUploadRequest);
}
/**
* Wait for the input files upload to finish.
*
* @param historyId the ID of the history with the input files.
* @throws InterruptedException if any thread has interrupted the current thread while waiting for Galaxy.
*/
private void waitForHistoryUpload(final String historyId) throws InterruptedException {
boolean finished = false;
int waitCount = 0;
while (!finished && waitCount < uploadMaxWaitCount) {
logger.info(" + Now waiting for {} seconds...", uploadWaitSeconds);
Thread.sleep(uploadWaitSeconds * MILLISECONDS_PER_SECOND);
finished = isHistoryReady(historyId);
waitCount++;
}
final HistoryDetails historyDetails = historiesClient.showHistory(historyId);
final String state = historyDetails.getState();
final Map<String, List<String>> stateIds = historyDetails.getStateIds();
final String stateIdsMessage = "historyDetails.getStateIds(): " + stateIds;
if (STATE_OK.equals(state))
logger.debug(stateIdsMessage);
else {
logger.error("History upload no longer running, but not in 'ok' state. State is: '{}'.", state);
logger.error(stateIdsMessage);
}
Thread.sleep(waitAfterUploadSeconds * MILLISECONDS_PER_SECOND);
}
/**
* Check whether uploading/processing of all files in a history is ready.
*
* @param historyId the ID of the history with the input files.
* @return whether uploading/processing of all files in a history is ready.
*/
private boolean isHistoryReady(final String historyId) {
final HistoryDetails historyDetails = historiesClient.showHistory(historyId);
// If the input/output file count is known, it could be checked too:
// historyDetails.getStateIds().get(STATE_OK).size() == [n]
final boolean finished = historyDetails.getStateIds().get("running").size() == 0
&& historyDetails.getStateIds().get("queued").size() == 0;
logger.debug("finished: " + finished);
logger.debug("History state IDs: {}.", historyDetails.getStateIds());
return finished;
}
/**
* Create the workflow inputs object with the input files and parameters.
*
* @param workflow the workflow.
* @return the workflow inputs object.
*/
private WorkflowInputs createInputsObject(final Workflow workflow) {
logger.info("- Create the workflow inputs object.");
final WorkflowInputs inputs = new WorkflowInputs();
inputs.setDestination(new WorkflowInputs.ExistingHistory(historyId));
final String galaxyWorkflowId = getGalaxyWorkflowId(workflow.getName());
logger.trace("galaxyWorkflowId: {}.", galaxyWorkflowId);
inputs.setWorkflowId(galaxyWorkflowId);
final WorkflowDetails workflowDetails = workflowsClient.showWorkflow(galaxyWorkflowId);
for (final Map.Entry<String, Object> inputEntry : workflow.getAllInputEntries()) {
final String fileName = ((File) inputEntry.getValue()).getName();
final String inputId = historyUtils.getDatasetIdByName(fileName, historiesClient, historyId);
final WorkflowInput workflowInput = new WorkflowInput(inputId, WorkflowInputs.InputSourceType.HDA);
logger.trace("Add input file {} for input label {}.", fileName, inputEntry.getKey());
WorkflowUtils.setInputByLabel(inputEntry.getKey(), workflowDetails, inputs, workflowInput);
}
if (workflow.getParameters() != null && workflow.getParameters().size() > 0)
addParametersToInputsObject(workflow, inputs, workflowDetails.getSteps());
return inputs;
}
/**
* Add workflow parameters to the inputs object.
*
* @param workflow the workflow.
* @param inputs the inputs object.
* @param workflowSteps the workflow steps.
*/
private void addParametersToInputsObject(final Workflow workflow, final WorkflowInputs inputs,
final Map<String, WorkflowStepDefinition> workflowSteps) {
final List<String> stepIds = new ArrayList<>(workflowSteps.keySet());
Collections.sort(stepIds);
for (final Object stepNumber : workflow.getParameters().keySet()) {
final int stepIndex = Integer.parseInt(stepNumber.toString()) - 1;
if (stepIndex >= 0 && stepIndex < stepIds.size()) {
for (final Map.Entry<String, Object> entry : workflow.getParameters().get(stepNumber).entrySet()) {
final String stepId = stepIds.get(stepIndex);
logger.trace("Set workflow step {} (id: {}) parameter {} to value {}.", stepNumber, stepId,
entry.getKey(), entry.getValue());
inputs.setStepParameter(stepId, entry.getKey(), entry.getValue());
}
} else
logger.error("No step ID found for step number {} (should be in range 0..{})", stepNumber,
stepIds.size() - 1);
}
}
/**
* Get the ID of the Galaxy workflow.
*
* @param workflowName the name of the workflow.
* @return the ID of the Galaxy workflow or null otherwise.
*/
private String getGalaxyWorkflowId(final String workflowName) {
com.github.jmchilton.blend4j.galaxy.beans.Workflow matchingWorkflow = null;
for (final com.github.jmchilton.blend4j.galaxy.beans.Workflow workflow : workflowsClient.getWorkflows())
if (workflow.getName().startsWith(workflowName))
matchingWorkflow = workflow;
return (matchingWorkflow != null) ? matchingWorkflow.getId() : null;
}
/**
* Execute the workflow that was prepared with the workflows client.
*
* @param workflowInputs the blend4j workflow inputs.
* @return whether the workflow finished successfully.
* @throws InterruptedException if any thread has interrupted the current thread while waiting for Galaxy.
*/
private boolean executeWorkflow(final WorkflowInputs workflowInputs) throws InterruptedException {
workflowOutputs = workflowsClient.runWorkflow(workflowInputs);
logger.info("Running the workflow (history ID: {}).", workflowOutputs.getHistoryId());
boolean finished = false;
int waitCount = 0;
while (!finished && waitCount < runWorkflowMaxWaitCount) {
logger.info("- Now waiting for {} seconds...", workflowWaitSeconds);
Thread.sleep(workflowWaitSeconds * MILLISECONDS_PER_SECOND);
finished = isHistoryReady(historyId);
waitCount++;
}
if (finished)
logger.info("Workflow seems to be finished after roughly {} seconds.", waitCount * workflowWaitSeconds);
else
logger.warn("Stopped waiting for the workflow to finish after {} seconds.",
runWorkflowMaxWaitCount * workflowWaitSeconds);
final Map<String, List<String>> stateIds = historiesClient.showHistory(historyId).getStateIds();
logger.debug("History state IDs after execute: {}.", stateIds);
logger.debug("There are {} output file(s) ready for download.", stateIds.get(STATE_OK).size());
return finished;
}
/**
* If the workflow has automatically downloading selected: download all output files and add them as results to the
* workflow object. Else: fill a map with output name to output ID entries to allow later download.
*
* @param workflow the workflow.
* @return whether all output files were downloaded successfully.
*/
private boolean downloadOutputFiles(final Workflow workflow) {
boolean success = true;
try {
if (workflow.getAutomaticDownload())
for (final String outputId : workflowOutputs.getOutputIds())
success &= downloadOutputFile(workflow, outputId);
else {
outputNameToIdsMap = new HashMap<>();
for (final HistoryContents historyContents : historiesClient.showHistoryContents(historyId))
outputNameToIdsMap.put(historyContents.getName(), historyContents.getId());
}
} catch (final IOException e) {
logger.error("Error downloading a workflow output file.", e);
success = false;
}
return success;
}
/**
* Retrieve the output ID for a workflow output file using the output name.
*
* @param outputName the output name.
* @return the output ID.
*/
public String getOutputIdForOutputName(final String outputName) {
return outputNameToIdsMap != null ? outputNameToIdsMap.get(outputName) : null;
}
/**
* Download a workflow output file and add it to the output map in the workflow.
*
* @param workflow the workflow.
* @param outputId the ID of the output file.
* @return whether downloading was successful.
* @throws IOException if a local file could not be created.
*/
protected boolean downloadOutputFile(final Workflow workflow, final String outputId) throws IOException {
final Dataset dataset = historiesClient.showDataset(historyId, outputId);
final String outputName = dataset.getName() != null ? dataset.getName() : outputId;
final String baseName = FileUtils.cleanFileName(String.format("workflow-runner-%s-%s-", historyId, outputName));
final String suffix;
final String period = ".";
suffix = period + dataset.getDataTypeExt();
final File outputFile;
if (workflow.getDownloadDirectory() != null)
outputFile = new File(FileUtils.createUniqueFilePath(workflow.getDownloadDirectory(), baseName, suffix));
else
outputFile = File.createTempFile(baseName, suffix);
logger.info("Downloading output {} to local file {}.", outputName, outputFile.getAbsolutePath());
final boolean success = historyUtils.downloadDataset(galaxyInstance, historiesClient, historyId, outputId,
outputFile.getAbsolutePath());
workflow.addOutput(outputName, outputFile);
return success;
}
/**
* Check the results of the workflow.
*
* todo: is this still necessary? only if automatic download is on? last output file is downloaded twice?
*
* @param workflow the workflow.
* @return whether the workflow results appear to be valid.
* @throws IOException if reading the workflow results fails.
*/
private boolean checkWorkflowResults(final Workflow workflow) throws IOException {
boolean valid = true;
logger.info("Check outputs.");
for (final String outputId : workflowOutputs.getOutputIds())
logger.info("- Workflow output ID: {}.", outputId);
final int outputCount = workflowOutputs.getOutputIds().size();
if (outputCount != 1)
logger.warn((outputCount == 0) ? "No workflow output found."
: "More than one workflow outputs found ({}).", outputCount);
if (outputCount > 0 && workflow.getAutomaticDownload())
valid = checkDownloadingWorks();
return valid;
}
/**
* Check whether downloading an output file works.
*
* @return whether downloading an output file works.
*/
private boolean checkDownloadingWorks() {
boolean valid;
// The last workflow output file is most likely to be the end result.
final String outputDatasetId = workflowOutputs.getOutputIds().get(workflowOutputs.getOutputIds().size() - 1);
historyUtils.downloadDataset(galaxyInstance, historiesClient, historyId, outputDatasetId, OUTPUT_FILE_PATH);
final File outputFile = new File(OUTPUT_FILE_PATH);
valid = outputFile.exists();
if (valid)
logger.info("- Output file exists.");
else
logger.error("- Output file does not exist!");
if (outputFile.length() == 0)
logger.warn("- Output file is empty.");
return valid;
}
}