/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules.mapping.local; import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger; import static fr.ens.biologie.genomique.eoulsan.core.ParallelizationMode.OWN_PARALLELIZATION; import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.MAPPER_RESULTS_SAM; import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.READS_FASTQ; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly; import fr.ens.biologie.genomique.eoulsan.bio.FastqFormat; import fr.ens.biologie.genomique.eoulsan.bio.readsmappers.MapperProcess; import fr.ens.biologie.genomique.eoulsan.bio.readsmappers.SequenceReadsMapper; import fr.ens.biologie.genomique.eoulsan.core.InputPorts; import fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder; import fr.ens.biologie.genomique.eoulsan.core.ParallelizationMode; import fr.ens.biologie.genomique.eoulsan.core.TaskContext; import fr.ens.biologie.genomique.eoulsan.core.TaskResult; import fr.ens.biologie.genomique.eoulsan.core.TaskStatus; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule; import fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters; import fr.ens.biologie.genomique.eoulsan.util.FileUtils; import fr.ens.biologie.genomique.eoulsan.util.LocalReporter; import fr.ens.biologie.genomique.eoulsan.util.Reporter; import fr.ens.biologie.genomique.eoulsan.util.StringUtils; /** * This class define a module for reads mapping. * @since 1.0 * @author Laurent Jourdren * @author Maria Bernard */ @LocalOnly public class ReadsMapperLocalModule extends AbstractReadsMapperModule { @Override public ParallelizationMode getParallelizationMode() { return OWN_PARALLELIZATION; } @Override public InputPorts getInputPorts() { final InputPortsBuilder builder = new InputPortsBuilder(); builder.addPort(READS_PORT_NAME, READS_FASTQ); builder.addPort(MAPPER_INDEX_PORT_NAME, getMapper().getArchiveFormat()); return builder.create(); } @Override public TaskResult execute(final TaskContext context, final TaskStatus status) { try { // Create the reporter final Reporter reporter = new LocalReporter(); final DataFile archiveIndexFile = context.getInputData(getMapper().getArchiveFormat()).getDataFile(); final File indexDir = new File(StringUtils .filenameWithoutExtension(archiveIndexFile.toUri().getPath())); // Get input data final Data inData = context.getInputData(READS_FASTQ); // Get output data final Data outData = context.getOutputData(MAPPER_RESULTS_SAM, inData); // Define final output SAM file final File samFile = outData.getDataFile().toFile(); // Get FASTQ format final FastqFormat fastqFormat = inData.getMetadata().getFastqFormat(); // Initialize the mapper final SequenceReadsMapper mapper = initMapper(context, fastqFormat, archiveIndexFile, indexDir, reporter); if (inData.getDataFileCount() < 1) { throw new IOException("No reads file found."); } if (inData.getDataFileCount() > 2) { throw new IOException( "Cannot handle more than 2 reads files at the same time."); } String logMsg = ""; // Single end mode if (inData.getDataFileCount() == 1) { // Get the source final DataFile inFile = context.getInputData(READS_FASTQ).getDataFile(0); getLogger().info("Map file: " + inFile + ", Fastq format: " + fastqFormat + ", use " + mapper.getMapperName() + " with " + mapper.getThreadsNumber() + " threads option"); // Single read mapping final MapperProcess process = mapper.mapSE(inFile); // Parse output of the mapper parseSAMResults(process.getStout(), samFile, reporter); // Wait the end of the process and do cleanup process.waitFor(); logMsg = "Mapping reads in " + fastqFormat + " with " + mapper.getMapperName() + " (" + inData.getName() + ", " + inFile.getName() + ")"; } // Paired end mode if (inData.getDataFileCount() == 2) { // Get the source final DataFile inFile1 = context.getInputData(READS_FASTQ).getDataFile(0); final DataFile inFile2 = context.getInputData(READS_FASTQ).getDataFile(1); getLogger().info("Map files: " + inFile1 + "," + inFile2 + ", Fastq format: " + fastqFormat + ", use " + mapper.getMapperName() + " with " + mapper.getThreadsNumber() + " threads option"); // Single read mapping final MapperProcess process = mapper.mapPE(inFile1, inFile2); // Parse output of the mapper parseSAMResults(process.getStout(), samFile, reporter); // Wait the end of the process and do cleanup process.waitFor(); logMsg = "Mapping reads in " + fastqFormat + " with " + mapper.getMapperName() + " (" + inData.getName() + ", " + inFile1.getName() + "," + inFile2.getName() + ")"; } // Throw an exception if an exception has occurred while mapping mapper.throwMappingException(); // Set the description of the context status.setDescription(logMsg); // Add counters for this sample to log file status.setCounters(reporter, COUNTER_GROUP); } catch (FileNotFoundException e) { return status.createTaskResult(e, "File not found: " + e.getMessage()); } catch (IOException e) { return status.createTaskResult(e, "Error while mapping reads: " + e.getMessage()); } return status.createTaskResult(); } /** * Initialize the mapper to use. * @param context Eoulsan context * @param format FASTQ format * @param archiveIndexFile genome index for the mapper as a ZIP file * @param indexDir uncompressed directory for the genome index * @param reporter reporter * @throws IOException */ private SequenceReadsMapper initMapper(final TaskContext context, final FastqFormat format, final DataFile archiveIndexFile, final File indexDir, final Reporter reporter) throws IOException { final SequenceReadsMapper mapper = getMapper(); // Set FASTQ format mapper.setFastqFormat(format); // Set mapper argument if needed if (getMapperArguments() != null) { mapper.setMapperArguments(getMapperArguments()); } // Get the number of threads to use int mapperThreads = getMapperLocalThreads(); if (mapperThreads > Runtime.getRuntime().availableProcessors() || mapperThreads < 1) { mapperThreads = Runtime.getRuntime().availableProcessors(); } // Set the number of threads mapper.setThreadsNumber(mapperThreads); // Set mapper temporary directory mapper.setTempDirectory(context.getLocalTempDirectory()); // Set mapper executable temporary directory mapper.setExecutablesTempDirectory( context.getSettings().getExecutablesTempDirectoryFile()); // Init mapper mapper.init(archiveIndexFile, indexDir, reporter, COUNTER_GROUP); // Delete the index directory at the end of the workflow context.getWorkflow().deleteOnExit(new DataFile(indexDir)); return mapper; } /** * Parse the output the mapper (in SAM format). * @param samFileInputStream SAM input stream * @param samFile output file to be written * @param reporter Eoulsan reporter for the step * @throws IOException if an error occurs while reading the sAM file */ private void parseSAMResults(final InputStream samFileInputStream, final File samFile, final Reporter reporter) throws IOException { String line; // Parse SAM result file final BufferedReader readerResults = FileUtils.createBufferedReader(samFileInputStream); final Writer writer = new OutputStreamWriter(new FileOutputStream(samFile), StandardCharsets.ISO_8859_1); int entriesParsed = 0; while ((line = readerResults.readLine()) != null) { writer.write(line); writer.write('\n'); final String trimmedLine = line.trim(); if ("".equals(trimmedLine) || trimmedLine.startsWith("@")) { continue; } final int tabPos = trimmedLine.indexOf('\t'); if (tabPos != -1) { entriesParsed++; reporter.incrCounter(COUNTER_GROUP, MappingCounters.OUTPUT_MAPPING_ALIGNMENTS_COUNTER.counterName(), 1); } } readerResults.close(); writer.close(); getLogger().info(entriesParsed + " entries parsed in " + getMapperName() + " output file"); } }