/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.galaxytools; import static fr.ens.biologie.genomique.eoulsan.galaxytools.GalaxyToolXMLParserUtils.extractInputs; import static fr.ens.biologie.genomique.eoulsan.galaxytools.GalaxyToolXMLParserUtils.extractOutputs; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.collect.HashMultiset; import com.google.common.collect.Multiset; import com.google.common.collect.Sets; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.core.Naming; import fr.ens.biologie.genomique.eoulsan.core.Parameter; import fr.ens.biologie.genomique.eoulsan.core.TaskContext; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormat; import fr.ens.biologie.genomique.eoulsan.design.Sample; import fr.ens.biologie.genomique.eoulsan.galaxytools.elements.ToolElement; import fr.ens.biologie.genomique.eoulsan.util.XMLUtils; /** * This class create an interpreter to tool xml file from Galaxy. * @author Sandrine Perrin * @since 2.0 */ public class GalaxyToolInterpreter { /** The tool xm lis. */ private final InputStream toolXMLis; /** The Constant TAG_FORBIDDEN. */ private final static Set<String> TAG_FORBIDDEN = Sets.newHashSet("repeat"); private static final String TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "TMPDIR"; // Set DOM related to the tool XML file /** The doc. */ private final Document doc; /** Data from tool XML. */ private Map<String, ToolElement> inputs; /** The outputs. */ private Map<String, ToolElement> outputs; private ElementPorts inputPorts; private ElementPorts outputPorts; /** The step parameters. */ private final Map<String, Parameter> stepParameters; /** The tool. */ private final ToolData tool; private boolean isConfigured = false; private boolean isExecuted = false; // // Inner classes // /** * This inner class define the link between an Element an Eoulsan step port. */ private static final class ElementPort { private final ToolElement element; private final String portName; private final int fileIndex; /** * Get the DataFile linked to the Element. * @param context Step context * @return a DataFile object */ public DataFile getInputDataFile(final TaskContext context) { final Data data = context.getInputData(this.portName); return this.fileIndex == -1 ? data.getDataFile() : data.getDataFile(this.fileIndex); } /** * Get the DataFile linked to the Element. * @param context Step context * @return a DataFile object */ public DataFile getOutputDataFile(final TaskContext context, final Data inData) { final Data data = context.getOutputData(this.portName, inData); return this.fileIndex == -1 ? data.getDataFile() : data.getDataFile(this.fileIndex); } @Override public String toString() { return "ElementPort{element=" + element.getName() + ", portName=" + portName + ", fileIndex=" + fileIndex + "}"; } /** * Constructor. * @param element Tool element * @param portName Eoulsan port name * @param fileIndex file index */ public ElementPort(final ToolElement element, final String portName, final int fileIndex) { this.element = element; this.portName = portName; this.fileIndex = fileIndex; } } /** * This inner class define a collection of ElementPorts. */ private static final class ElementPorts { private Map<String, ElementPort> ports = new HashMap<>(); /** * Get an ElementPort from its name. * @param elementName the name of the element port * @return an ElementPort or null if the element name does not exists */ public ElementPort getPortElements(final String elementName) { return this.ports.get(elementName); } /** * Get the ToolElement objects that will be used to create the Eoulsan step * ports. Only one of the port of multi-files DataFormat are kept. * @return a set of ToolElement */ public Set<ToolElement> getStepElements() { final Set<ToolElement> result = new HashSet<>(); for (ElementPort e : ports.values()) { if (e.fileIndex < 1) { result.add(e.element); } } return Collections.unmodifiableSet(result); } /** * Sort ToolElements. * @param elements element to sort * @return a sorted list of ToolElement */ private static List<ToolElement> sortedElements( final Collection<ToolElement> elements) { final List<ToolElement> elementsSorted = new ArrayList<>(elements); Collections.sort(elementsSorted, new Comparator<ToolElement>() { @Override public int compare(ToolElement o1, ToolElement o2) { return o1.getName().compareTo(o2.getName()); } }); return Collections.unmodifiableList(elementsSorted); } @Override public String toString() { return this.ports.toString(); } /** * Constructor. * @param elements the element */ public ElementPorts(final Map<String, ToolElement> elements) { final Multiset<DataFormat> formatCount = HashMultiset.create(); final Map<DataFormat, String> formatPortNames = new HashMap<>(); for (ToolElement e : sortedElements(elements.values())) { // Discard parameters if (!e.isFile()) { continue; } final DataFormat format = e.getDataFormat(); if (format.getMaxFilesCount() == 1) { this.ports.put(e.getName(), new ElementPort(e, e.getValidatedName(), -1)); } else { // If the DataFormat of the element is multi-file, only keep one // element for Eoulsan step ports final String portName; if (formatPortNames.containsKey(format)) { portName = formatPortNames.get(format); } else { portName = e.getValidatedName(); formatPortNames.put(format, portName); } this.ports.put(e.getName(), new ElementPort(e, portName, formatCount.count(format))); formatCount.add(format); } } } } /** * Parse tool file to extract useful data to run tool. * @param parameters the set step parameters * @throws EoulsanException if an data missing */ public void configure(final Set<Parameter> parameters) throws EoulsanException { checkState(!isConfigured, "GalaxyToolStep, this instance has been already configured"); // Convert Set in Map for (final Parameter p : parameters) { this.stepParameters.put(p.getName(), p); } final Document localDoc = this.doc; // Extract variable settings this.inputs = extractInputs(localDoc, this.stepParameters); this.outputs = extractOutputs(localDoc, this.stepParameters); this.inputPorts = new ElementPorts(this.inputs); this.outputPorts = new ElementPorts(this.outputs); isConfigured = true; } /** * Convert command tag from tool file in string, variable are replace by * value. * @param context the context * @return the string * @throws EoulsanException the Eoulsan exception */ public ToolExecutorResult execute(final TaskContext context) throws EoulsanException { checkState(!isExecuted, "GalaxyToolStep, this instance has been already executed"); context.getLogger().info("Parsing xml file successfully."); context.getLogger().info("Tool description " + this.tool); final int variablesCount = this.inputs.size() + this.outputs.size(); final Map<String, String> variables = new HashMap<>(variablesCount); // Set a TMPDIR variable that contain the path to the temporary directory variables.put(TMP_DIR_ENVIRONMENT_VARIABLE_NAME, context.getLocalTempDirectory().getAbsolutePath()); Data inData = null; // Extract from inputs variable command for (final ToolElement ptg : this.inputs.values()) { if (ptg.isFile()) { final ElementPort inPort = this.inputPorts.getPortElements(ptg.getName()); // Extract value from context from DataFormat final Data data = context.getInputData(inPort.portName); if (inData == null || isDataNameInDesign(inData, context)) { inData = data; } final DataFile inFile = inPort.getInputDataFile(context); variables.put(ptg.getName(), inFile.toFile().getAbsolutePath()); variables.put(removeNamespace(ptg.getName()), inFile.toFile().getAbsolutePath()); } else { // Variables setting with parameters file variables.put(ptg.getName(), ptg.getValue()); } } // Extract from outputs variable command for (final ToolElement ptg : this.outputs.values()) { if (ptg.isFile()) { final ElementPort outPort = this.outputPorts.getPortElements(ptg.getName()); // Extract value from context from DataFormat final DataFile outFile = outPort.getOutputDataFile(context, inData); variables.put(ptg.getName(), outFile.toFile().getAbsolutePath()); variables.put(removeNamespace(ptg.getName()), outFile.toFile().getAbsolutePath()); } else { // Variables setting with parameters file variables.put(ptg.getName(), ptg.getValue()); } } if (variables.isEmpty()) { throw new EoulsanException("No parameter settings."); } context.getLogger().info("Tool variable settings " + Joiner.on("\t").withKeyValueSeparator("=").join(variables)); // Create the Cheetah interpreter final CheetahInterpreter cheetahInterpreter = new CheetahInterpreter(this.tool.getCommandScript(), variables); final String commandLine = cheetahInterpreter.execute(); try { // Create the executor and interpret the command tag final ToolExecutor executor = new ToolExecutor(context, this.tool, commandLine); // Execute the command final ToolExecutorResult result = executor.execute(); isExecuted = true; // TODO return result; } catch (IOException e) { throw new EoulsanException(e); } } public String getDescription() { return "Launch tool galaxy " + this.tool.getToolName() + ", version " + this.tool.getToolVersion() + " with interpreter " + this.tool.getInterpreter(); } // // Private methods // /** * Create DOM instance from tool xml file. * @return DOM instance * @throws EoulsanException if an error occurs during creation instance */ private Document buildDOM() throws EoulsanException { try (InputStream in = this.toolXMLis) { // Read the XML file final DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); final Document doc = dBuilder.parse(in); doc.getDocumentElement().normalize(); return doc; } catch (final IOException | SAXException | ParserConfigurationException e) { throw new EoulsanException(e); } } /** * Check DOM validity. * @throws EoulsanException the Eoulsan exception */ private void checkDomValidity() throws EoulsanException { final Document localDoc = this.doc; for (final String tag : TAG_FORBIDDEN) { // Check tag exists in tool file if (!XMLUtils.getElementsByTagName(localDoc, tag).isEmpty()) { // Throw exception throw new EoulsanException("Parsing tool xml: unsupported tag " + tag); } } } /** * Test if a data name is a sample name. * @param data the data to test * @param context the step context * @return true the data name is a sample name */ private boolean isDataNameInDesign(final Data data, final TaskContext context) { final String dataName = data.getName(); for (Sample sample : context.getWorkflow().getDesign().getSamples()) { // TODO Change sample.getName() to sample.getId() with the new Design API if (Naming.toValidName(sample.getName()).equals(dataName)) { return true; } } return false; } // // Getters // /** * Gets the inputs. * @return the inputs */ public Map<String, ToolElement> getInputs() { return this.inputs; } /** * Gets the outputs. * @return the outputs */ public Map<String, ToolElement> getOutputs() { return this.outputs; } /** * Gets the in data format expected associated with variable found in command * line. * @return the in data format expected */ public Set<ToolElement> getInputDataElements() { return this.inputPorts.getStepElements(); } /** * Gets the out data format expected associated with variable found in command * line. * @return the out data format expected */ public Set<ToolElement> getOutputDataElements() { return this.outputPorts.getStepElements(); } /** * Gets the tool data. * @return the tool data */ public ToolData getToolData() { return this.tool; } @Override public String toString() { return "InterpreterToolGalaxy \n[inputs=" + Joiner.on("\n").withKeyValueSeparator("=").join(this.inputs) + ", \noutputs=" + Joiner.on("\n").withKeyValueSeparator("=").join(this.outputs) + ", \ntool=" + this.tool + "]"; } /** * Remove the namespace from the name of a variable. * @param variableName variable name * @return the variable name without the namespace */ public static String removeNamespace(final String variableName) { if (variableName == null) { return null; } final int dotIndex = variableName.lastIndexOf('.'); if (dotIndex == -1) { return variableName; } return variableName.substring(dotIndex + 1); } // // Constructor // /** * Public constructor. * @param in the input stream * @throws EoulsanException the Eoulsan exception */ public GalaxyToolInterpreter(final InputStream in) throws EoulsanException { checkNotNull(in, "in argument cannot be null"); this.toolXMLis = in; this.doc = buildDOM(); this.stepParameters = new HashMap<>(); this.tool = new ToolData(this.doc); checkDomValidity(); } }