/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.mapping.local;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.ALIGNMENTS_REJECTED_BY_FILTERS_COUNTER;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.ALIGNMENTS_WITH_INVALID_SAM_FORMAT;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.INPUT_ALIGNMENTS_COUNTER;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.OUTPUT_FILTERED_ALIGNMENTS_COUNTER;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.google.common.base.Joiner;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly;
import fr.ens.biologie.genomique.eoulsan.bio.SAMComparator;
import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.MultiReadAlignmentsFilter;
import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.ReadAlignmentsFilter;
import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.ReadAlignmentsFilterBuffer;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskResult;
import fr.ens.biologie.genomique.eoulsan.core.TaskStatus;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormats;
import fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractSAMFilterModule;
import fr.ens.biologie.genomique.eoulsan.util.LocalReporter;
import fr.ens.biologie.genomique.eoulsan.util.Reporter;
/**
* This class define a Step for alignments filtering.
* @since 1.0
* @author Laurent Jourdren
* @author Claire Wallon
*/
@LocalOnly
public class SAMFilterLocalModule extends AbstractSAMFilterModule {
@Override
public TaskResult execute(final TaskContext context,
final TaskStatus status) {
// Create the reporter
final Reporter reporter = new LocalReporter();
try {
// Get the read filter
final MultiReadAlignmentsFilter filter =
getAlignmentsFilter(reporter, COUNTER_GROUP);
getLogger().info("Read alignments filters to apply: "
+ Joiner.on(", ").join(filter.getFilterNames()));
filterSample(context, reporter, status, filter);
} catch (IOException e) {
status.createTaskResult(e,
"Error while filtering alignments: " + e.getMessage());
} catch (EoulsanException e) {
status.createTaskResult(e,
"Error while initializing filter: " + e.getMessage());
}
return status.createTaskResult();
}
/**
* Filter a sample data in single-end mode and in paired-end mode.
* @param context Eoulsan context
* @param reporter reporter to use
* @param status task status
* @param filter alignments filter to use
* @throws IOException if an error occurs while filtering reads
*/
private static void filterSample(final TaskContext context,
final Reporter reporter, final TaskStatus status,
final ReadAlignmentsFilter filter) throws IOException {
// Get input and output data
final Data inData = context.getInputData(DataFormats.MAPPER_RESULTS_SAM);
final Data outData =
context.getOutputData(DataFormats.MAPPER_RESULTS_SAM, inData);
// Get the source
final DataFile inFile = inData.getDataFile();
// Get the dest
final DataFile outFile = outData.getDataFile();
// Filter alignments in single-end mode or in paired-end mode
filterFile(inFile, outFile, reporter, filter,
context.getLocalTempDirectory());
// Set the description of the context
status.setDescription(
"Filter SAM file (" + inData.getName() + ", " + inFile.getName() + ")");
// Add counters for this sample to log file
status.setCounters(reporter, COUNTER_GROUP);
}
/**
* Filter a file in single-end mode or paired-end mode.
* @param inFile input file
* @param outFile output file
* @param reporter reporter to use
* @param filter alignments filter to use
* @param tmpDir temporary directory
* @throws IOException if an error occurs while filtering data
*/
private static void filterFile(final DataFile inFile, final DataFile outFile,
final Reporter reporter, final ReadAlignmentsFilter filter,
final File tmpDir) throws IOException {
final List<SAMRecord> records = new ArrayList<>();
int counterInput = 0;
int counterOutput = 0;
int counterInvalid = 0;
boolean pairedEnd = false;
// Creation of a buffer object to store alignments with the same read name
final ReadAlignmentsFilterBuffer rafb =
new ReadAlignmentsFilterBuffer(filter);
getLogger().info("Filter SAM file: " + inFile);
// Get reader
final SamReader inputSam =
SamReaderFactory.makeDefault().open(SamInputResource.of(inFile.open()));
// Get Writer
final SAMFileWriter outputSam =
new SAMFileWriterFactory().setTempDirectory(tmpDir)
.makeSAMWriter(inputSam.getFileHeader(), false, outFile.create());
try {
for (SAMRecord samRecord : inputSam) {
// single-end or paired-end mode ?
if (counterInput == 0) {
if (samRecord.getReadPairedFlag()) {
pairedEnd = true;
}
}
counterInput++;
// storage and filtering of all the alignments of a read in the list
// "records"
if (!rafb.addAlignment(samRecord)) {
records.clear();
records.addAll(rafb.getFilteredAlignments());
// sort alignments of the current read
Collections.sort(records, new SAMComparator());
// writing records
for (SAMRecord r : records) {
outputSam.addAlignment(r);
counterOutput++;
}
rafb.addAlignment(samRecord);
}
}
// treatment of the last record
records.clear();
records.addAll(rafb.getFilteredAlignments());
// sort alignments of the last read
Collections.sort(records, new SAMComparator());
// writing records
for (SAMRecord r : records) {
outputSam.addAlignment(r);
counterOutput++;
}
} catch (SAMFormatException e) {
counterInvalid++;
}
// paired-end mode
if (pairedEnd) {
int nbInput = counterInput / 2;
int nbOutput = counterOutput / 2;
reporter.incrCounter(COUNTER_GROUP,
INPUT_ALIGNMENTS_COUNTER.counterName(), nbInput);
reporter.incrCounter(COUNTER_GROUP,
OUTPUT_FILTERED_ALIGNMENTS_COUNTER.counterName(), nbOutput);
reporter.incrCounter(COUNTER_GROUP,
ALIGNMENTS_WITH_INVALID_SAM_FORMAT.counterName(), counterInvalid / 2);
reporter.incrCounter(COUNTER_GROUP,
ALIGNMENTS_REJECTED_BY_FILTERS_COUNTER.counterName(),
nbInput - nbOutput);
}
// single-end mode
else {
reporter.incrCounter(COUNTER_GROUP,
INPUT_ALIGNMENTS_COUNTER.counterName(), counterInput);
reporter.incrCounter(COUNTER_GROUP,
OUTPUT_FILTERED_ALIGNMENTS_COUNTER.counterName(), counterOutput);
reporter.incrCounter(COUNTER_GROUP,
ALIGNMENTS_WITH_INVALID_SAM_FORMAT.counterName(), counterInvalid);
reporter.incrCounter(COUNTER_GROUP,
ALIGNMENTS_REJECTED_BY_FILTERS_COUNTER.counterName(),
counterInput - counterOutput);
}
// Close files
inputSam.close();
outputSam.close();
}
}