/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules; import static com.google.common.base.Preconditions.checkNotNull; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import com.google.common.collect.Sets; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.annotations.NoLog; import fr.ens.biologie.genomique.eoulsan.annotations.ReuseModuleInstance; import fr.ens.biologie.genomique.eoulsan.core.DataUtils; import fr.ens.biologie.genomique.eoulsan.core.Naming; import fr.ens.biologie.genomique.eoulsan.core.OutputPort; import fr.ens.biologie.genomique.eoulsan.core.OutputPorts; import fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder; import fr.ens.biologie.genomique.eoulsan.core.ParallelizationMode; import fr.ens.biologie.genomique.eoulsan.core.Parameter; import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext; import fr.ens.biologie.genomique.eoulsan.core.TaskContext; import fr.ens.biologie.genomique.eoulsan.core.TaskResult; import fr.ens.biologie.genomique.eoulsan.core.TaskStatus; import fr.ens.biologie.genomique.eoulsan.core.Version; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormat; import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry; import fr.ens.biologie.genomique.eoulsan.data.DataFormats; import fr.ens.biologie.genomique.eoulsan.data.protocols.DataProtocol; import fr.ens.biologie.genomique.eoulsan.data.protocols.StorageDataProtocol; import fr.ens.biologie.genomique.eoulsan.design.Design; import fr.ens.biologie.genomique.eoulsan.design.DesignUtils; import fr.ens.biologie.genomique.eoulsan.design.Sample; import fr.ens.biologie.genomique.eoulsan.io.CompressionType; /** * This class define a design module. * @since 2.0 * @author Laurent Jourdren */ @ReuseModuleInstance @NoLog public class DesignModule extends AbstractModule { public static final String MODULE_NAME = "design"; private final Design design; private final CheckerModule checkerModule; private OutputPorts outputPorts; private Set<String> designPortNames = new HashSet<>(); private Set<String> samplePortNames = new HashSet<>(); @Override public String getName() { return MODULE_NAME; } @Override public Version getVersion() { return Globals.APP_VERSION; } @Override public OutputPorts getOutputPorts() { return this.outputPorts; } @Override public ParallelizationMode getParallelizationMode() { return ParallelizationMode.NOT_NEEDED; } @Override public void configure(final StepConfigurationContext context, final Set<Parameter> stepParameters) throws EoulsanException { // Get the metadata keys of the design and the samples final Set<String> designMetadataKeys = this.design.getMetadata().keySet(); final Set<String> sampleMetadataKeys = Sets.newHashSet(DesignUtils.getAllSamplesMetadataKeys(this.design)); final OutputPortsBuilder builder = new OutputPortsBuilder(); for (DataFormat format : DataFormatRegistry.getInstance().getAllFormats()) { // Search in Design metadata if (designMetadataKeys.contains(format.getDesignMetadataKeyName())) { final String key = format.getDesignMetadataKeyName(); builder.addPort(key, !format.isOneFilePerAnalysis(), format, compressionTypeOfDesignMetadata(key)); this.designPortNames.add(key); } // Search in Sample metadata if (sampleMetadataKeys.contains(format.getSampleMetadataKeyName())) { final String key = format.getSampleMetadataKeyName(); builder.addPort(key, !format.isOneFilePerAnalysis(), format, compressionTypeOfField(key)); this.samplePortNames.add(key); } } // Create the output ports this.outputPorts = builder.create(); // Configure Checker input ports this.checkerModule.configureInputPorts(this.outputPorts); } /** * Get the compression of a field of the design. The compression returned is * the first compression found in the field. * @param fieldname the name of the field * @return a compression type */ private CompressionType compressionTypeOfField(final String fieldname) { for (Sample sample : this.design.getSamples()) { final String fieldValue = sample.getMetadata().get(fieldname); if (fieldValue != null) { final DataFile file = new DataFile(fieldValue); final CompressionType fileCompression = file.getCompressionType(); if (fileCompression != CompressionType.NONE) { return fileCompression; } } } return CompressionType.NONE; } /** * Get the compression of a metadata of the design. * @param key the key of the metadata * @return a compression type */ private CompressionType compressionTypeOfDesignMetadata(final String key) { final String value = this.design.getMetadata().get(key); if (value != null) { final DataFile file = getUnderLyingDataFile(new DataFile(value)); final CompressionType fileCompression = file.getCompressionType(); if (fileCompression != CompressionType.NONE) { return fileCompression; } } return CompressionType.NONE; } @Override public TaskResult execute(final TaskContext context, final TaskStatus status) { final Set<DataFile> files = new HashSet<>(); final Set<String> dataNames = new HashSet<>(); for (String portName : this.designPortNames) { final OutputPort port = getOutputPorts().getPort(portName); // Create DataFile object(s) List<DataFile> dataFiles = getDesignDatafilesPort(this.design, port); // Check if file has not been already processed DataFile f = dataFiles.get(0); if (files.contains(f)) { continue; } files.add(f); // Get the data object final Data data = context.getOutputData(port.getName(), port.getName()); // Set the DataFile(s) in the Data object if (port.getFormat().getMaxFilesCount() == 1) { // Mono file data DataUtils.setDataFile(data, f); } else { // Multi-file data DataUtils.setDataFiles(data, dataFiles); } } for (Sample sample : this.design.getSamples()) { for (String portName : this.samplePortNames) { final OutputPort port = getOutputPorts().getPort(portName); // Create DataFile object(s) List<DataFile> dataFiles = getSampleDatafilesPort(sample, port); // Check if file has not been already processed DataFile f = dataFiles.get(0); if (files.contains(f)) { continue; } files.add(f); // Define the name of the port final String dataListName; if (port.isList() || port.getFormat().getMaxFilesCount() > 1) { dataListName = port.getName(); } else { dataListName = Naming.toValidName(f.getBasename()); } // Get the data object final Data dataList = context.getOutputData(port.getName(), dataListName); final Data data; // Set metadata if (port.isList()) { final String dataName = Naming.toValidName(sample.getId()); // Check if the data name has already used if (dataNames.contains(dataName)) { return status.createTaskResult(new EoulsanException( "The design contains two or more sample with the same name after renaming: " + dataName + " ( original sample name: " + sample.getId() + ")")); } dataNames.add(dataName); // Add a new data to the list data = dataList.addDataToList(dataName); // Set the metadata DataUtils.setDataMetaData(data, sample); } else { data = dataList; } // Set the DataFile(s) in the Data object if (port.getFormat().getMaxFilesCount() == 1) { // Mono file data DataUtils.setDataFile(data, f); } else { // Multi-file data DataUtils.setDataFiles(data, dataFiles); // Set paired-end metadata if (DataFormats.READS_FASTQ.equals(port.getFormat()) && dataFiles.size() > 1) { data.getMetadata().setPairedEnd(true); } } } } return status.createTaskResult(); } /** * Create a list of data files from a sample and a port * @param sample the sample * @param port the port * @return a list with the data files */ private List<DataFile> getSampleDatafilesPort(final Sample sample, final OutputPort port) { checkNotNull(sample, "sample argument cannot be null"); checkNotNull(port, "port argument cannot be null"); final List<DataFile> result = new ArrayList<>(); // Get the design field name for the port String fieldName = null; for (String f : sample.getMetadata().keySet()) { if (port.getName().equals(f.trim().toLowerCase())) { fieldName = f; break; } } // Get the values in the design for the sample final List<String> fieldValues = sample.getMetadata().getAsList(fieldName); for (String value : fieldValues) { result.add(new DataFile(value)); } return result; } /** * Create a list of data files from a sample and a port * @param design the design * @param port the port * @return a list with the data files */ private List<DataFile> getDesignDatafilesPort(final Design design, final OutputPort port) { checkNotNull(design, "design argument cannot be null"); checkNotNull(port, "port argument cannot be null"); final List<DataFile> result = new ArrayList<>(); // Get the design field name for the port String fieldName = null; for (String f : design.getMetadata().keySet()) { if (port.getName().equals(f.trim().toLowerCase())) { fieldName = f; break; } } // Get the values in the design for the sample final List<String> fieldValues = design.getMetadata().getAsList(fieldName); for (String value : fieldValues) { result.add(new DataFile(value)); } return result; } /** * Get the underlying file if the file is available via a StorageDataProtocol. * @param file the input file * @return the underlying file if exist or the original file */ private static DataFile getUnderLyingDataFile(final DataFile file) { if (file == null) { return null; } try { DataProtocol protocol = file.getProtocol(); if (protocol != null && protocol instanceof StorageDataProtocol) { return ((StorageDataProtocol) protocol).getUnderLyingData(file); } } catch (IOException e) { return file; } return file; } // // Constructor // /** * Constructor. * @param design design * @param checkeModule the checker module instance */ public DesignModule(final Design design, final CheckerModule checkeModule) { checkNotNull(design, "design argument cannot be null"); checkNotNull(checkeModule, "checkerModule argument cannot be null"); this.design = design; this.checkerModule = checkeModule; } }