package fr.ens.biologie.genomique.eoulsan.modules.mapping.local; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly; import fr.ens.biologie.genomique.eoulsan.bio.ReadSequence; import fr.ens.biologie.genomique.eoulsan.bio.io.FastqWriter; import fr.ens.biologie.genomique.eoulsan.core.TaskContext; import fr.ens.biologie.genomique.eoulsan.core.TaskResult; import fr.ens.biologie.genomique.eoulsan.core.TaskStatus; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormats; import fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractSAM2FASTQModule; import fr.ens.biologie.genomique.eoulsan.util.LocalReporter; import fr.ens.biologie.genomique.eoulsan.util.Reporter; import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; /** * This class define a module for converting SAM files into FASTQ. * @since 2.0 * @author Laurent Jourdren */ @LocalOnly public class SAM2FASTQLocalModule extends AbstractSAM2FASTQModule { @Override public TaskResult execute(final TaskContext context, final TaskStatus status) { try { // Create the reporter final Reporter reporter = new LocalReporter(); // Get input SAM data final Data inData = context.getInputData(DataFormats.MAPPER_RESULTS_SAM); // Get input SAM TMP data File samTmpFile = File.createTempFile("samTmp", ".sam", context.getLocalTempDirectory()); // Get output FASTQ data final Data outData = context.getOutputData(DataFormats.READS_FASTQ, inData); final DataFile samFile = inData.getDataFile(); final int paired = sortConvert(samFile, samTmpFile, reporter, context.getLocalTempDirectory()); final DataFile fastqFile1 = outData.getDataFile(0); final DataFile fastqFile2 = paired == 3 ? outData.getDataFile(1) : null; writeConvert(samTmpFile, fastqFile1, fastqFile2, reporter); // Set the description of the context status.setDescription("Convert alignments (" + inData.getName() + "," + outData.getName() + ")"); // Add counters for this sample to log file status.setCounters(reporter, COUNTER_GROUP); return status.createTaskResult(); } catch (final IOException e) { return status.createTaskResult(e); } } /** * Convert SAM file to FASTQ * @param samDataFile input SAM file * @param fastqDataFile1 output FASTQ file 1 * @param fastqDataFile2 output FASTQ file 2 * @param reporter reporter * @throws IOException if an error occurs */ private static void writeConvert(final File samDataFile, final DataFile fastqDataFile1, final DataFile fastqDataFile2, final Reporter reporter) throws IOException { // Open sam file final SamReader samReader = SamReaderFactory.makeDefault() .open(SamInputResource.of(new FileInputStream(samDataFile))); // Open fastq file final FastqWriter fastqWriter1 = new FastqWriter(fastqDataFile1.create()); final FastqWriter fastqWriter2 = fastqDataFile2 == null ? null : new FastqWriter(fastqDataFile2.create()); int id = 0; String seq1 = null; String seq2 = null; String qual1 = null; String qual2 = null; String currentRecordId = null; for (final SAMRecord samRecord : samReader) { if (currentRecordId != null && !currentRecordId.equals(samRecord.getReadName())) { id++; reporter.incrCounter(COUNTER_GROUP, "sorted records", 1); writeFastq(id, fastqWriter1, fastqWriter2, currentRecordId, seq1, qual1, seq2, qual2); seq1 = seq2 = qual1 = qual2 = null; } if (samRecord.getReadPairedFlag() && !samRecord.getFirstOfPairFlag()) { seq2 = samRecord.getReadString(); qual2 = samRecord.getBaseQualityString(); } else { seq1 = samRecord.getReadString(); qual1 = samRecord.getBaseQualityString(); } currentRecordId = samRecord.getReadName(); } if (seq1 != null && seq2 != null) { id++; reporter.incrCounter(COUNTER_GROUP, "sorted records", 1); writeFastq(id, fastqWriter1, fastqWriter2, currentRecordId, seq1, qual1, seq2, qual2); } samReader.close(); fastqWriter1.close(); if (fastqWriter2 != null) { fastqWriter2.close(); } } private static int sortConvert(final DataFile samDataFile, final File samFileTmp, final Reporter reporter, final File tmpDir) throws IOException { // Open sam file final SamReader samReader = SamReaderFactory.makeDefault() .open(SamInputResource.of(samDataFile.open())); // Force sort samReader.getFileHeader().setSortOrder(SortOrder.queryname); // Open sam file final SAMFileWriter samWriter = new SAMFileWriterFactory() .setCreateIndex(false).setTempDirectory(tmpDir) .makeSAMWriter(samReader.getFileHeader(), false, samFileTmp); boolean firstPair = false; boolean secondPair = false; for (final SAMRecord samRecord : samReader) { if (!firstPair && samRecord.getReadPairedFlag() && samRecord.getFirstOfPairFlag()) { firstPair = true; } if (!secondPair && samRecord.getReadPairedFlag() && samRecord.getSecondOfPairFlag()) { secondPair = true; } samRecord.setReadName(samRecord.getReadName().split(" ")[0]); samWriter.addAlignment(samRecord); reporter.incrCounter(COUNTER_GROUP, "converted records", 1); } samWriter.close(); samReader.close(); int result = 0; if (!firstPair && !secondPair) { result = 0; } if (firstPair && !secondPair) { result = 1; } if (!firstPair && secondPair) { result = 2; } if (firstPair && secondPair) { result = 3; } return result; } public static final void writeFastq(int id, FastqWriter fastqWriter1, FastqWriter fastqWriter2, String currentRecordId, String seq1, String qual1, String seq2, String qual2) throws IOException { ReadSequence read1 = seq1 == null ? null : new ReadSequence(id, currentRecordId, seq1, qual1); ReadSequence read2 = seq2 == null ? null : new ReadSequence(id, currentRecordId, seq2, qual2); if (fastqWriter2 != null) { if (seq1 != null && seq2 != null) { fastqWriter1.write(read1); fastqWriter2.write(read2); } } else { if (seq1 != null) { fastqWriter1.write(read1); } else { fastqWriter1.write(read2); } } } }