/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/
/**
* $Id$
*/
package picard.sam;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.*;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.Interval;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
/**
* From a SAM or BAM file, produce a new SAM or BAM by filtering aligned reads or a list of read
* names provided in a file (one readname per line)
* <p/>
* $Id$
*/
@CommandLineProgramProperties(
usage = FilterSamReads.USAGE_SUMMARY + FilterSamReads.USAGE_DETAILS,
usageShort = FilterSamReads.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class FilterSamReads extends CommandLineProgram {
static final String USAGE_SUMMARY = "Subset read data from a SAM or BAM file";
static final String USAGE_DETAILS = "This tool takes a SAM or BAM file and subsets it to a new file that either excludes or " +
"only includes either aligned or unaligned reads (set using FILTER), or specific reads based on a list of reads names " +
"supplied in the READ_LIST_FILE. " +
"" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar FilterSamReads \\<br /> " +
" I=input.bam \\ <br /> " +
" O=output.bam \\<br /> " +
" READ_LIST_FILE=read_names.txt" +
" FILTER=filter_value" +
"</pre> " +
"For information on the SAM format, please see: http://samtools.sourceforge.net" +
"<hr />";
private static final Log log = Log.getInstance(FilterSamReads.class);
protected /* <- used in test */ enum Filter {
includeAligned("OUTPUT SAM/BAM will contain aligned reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of paired reads must be aligned to be included in the OUTPUT SAM or BAM)"),
excludeAligned("OUTPUT SAM/BAM will contain un-mapped reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of pair must be aligned to be excluded from the OUTPUT SAM or BAM)"),
includeReadList("OUTPUT SAM/BAM will contain reads that are supplied in the READ_LIST_FILE file"),
excludeReadList("OUTPUT bam will contain reads that are *not* supplied in the READ_LIST_FILE file"),
includeJavascript("OUTPUT bam will contain reads that hava been accepted by the JAVASCRIPT_FILE script."),
includePairedIntervals("OUTPUT SAM/BAM will contain any reads (and their mate) that overlap with an interval. INPUT SAM/BAM and INTERVAL_LIST must be in coordinate SortOrder. Only aligned reads will be output.");
private final String description;
Filter(final String description) {
this.description = description;
}
@Override
public String toString() {
return this.name() + " [" + description + "]";
}
}
@Option(doc = "The SAM or BAM file that will be filtered.",
optional = false,
shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
@Option(doc = "Filter.", optional = false)
public Filter FILTER = null;
@Option(doc = "Read List File containing reads that will be included or excluded from the OUTPUT SAM or BAM file.",
optional = true,
shortName = "RLF")
public File READ_LIST_FILE;
@Option(doc = "Interval List File containing intervals that will be included or excluded from the OUTPUT SAM or BAM file.",
optional = true,
shortName = "IL")
public File INTERVAL_LIST;
@Option(
doc = "SortOrder of the OUTPUT SAM or BAM file, otherwise use the SortOrder of the INPUT file.",
optional = true, shortName = "SO")
public SAMFileHeader.SortOrder SORT_ORDER;
@Option(
doc = "Create .reads files (for debugging purposes)",
optional = true)
public boolean WRITE_READS_FILES = true;
@Option(doc = "SAM or BAM file to write read excluded results to",
optional = false, shortName = "O")
public File OUTPUT;
@Option(shortName = "JS",
doc = "Filters a SAM or BAM file with a javascript expression using the java javascript-engine. "
+ " The script puts the following variables in the script context: "
+ " 'record' a SamRecord ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/SAMRecord.html ) and "
+ " 'header' a SAMFileHeader ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/SAMFileHeader.html )."
+ " Last value of the script should be a boolean to tell wether we should accept or reject the record.",
optional = true)
public File JAVASCRIPT_FILE = null;
private void filterReads(final FilteringSamIterator filteringIterator) {
// get OUTPUT header from INPUT and overwrite it if necessary
final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder();
if (SORT_ORDER != null) {
fileHeader.setSortOrder(SORT_ORDER);
}
if (FILTER == Filter.includePairedIntervals && fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UnsupportedOperationException("Input must be coordinate sorted to use includePairedIntervals");
}
final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder());
log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" +
OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]");
// create OUTPUT file
final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, presorted, OUTPUT);
final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written");
while (filteringIterator.hasNext()) {
final SAMRecord rec = filteringIterator.next();
outputWriter.addAlignment(rec);
progress.record(rec);
}
filteringIterator.close();
outputWriter.close();
log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
}
/**
* Write out a file of read names for debugging purposes.
*
* @param samOrBamFile The SAM or BAM file for which we are going to write out a file of its
* containing read names
*/
private void writeReadsFile(final File samOrBamFile) throws IOException {
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(samOrBamFile);
final File readsFile =
new File(OUTPUT.getParentFile(), IOUtil.basename(samOrBamFile) + ".reads");
IOUtil.assertFileIsWritable(readsFile);
final BufferedWriter bw = IOUtil.openFileForBufferedWriting(readsFile, false);
for (final SAMRecord rec : reader) {
bw.write(rec.toString() + "\n");
}
bw.close();
reader.close();
IOUtil.assertFileIsReadable(readsFile);
}
private List<Interval> getIntervalList (final File intervalFile) throws IOException {
IOUtil.assertFileIsReadable(intervalFile);
return IntervalList.fromFile(intervalFile).getIntervals();
}
@Override
protected int doWork() {
try {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (WRITE_READS_FILES) writeReadsFile(INPUT);
List<Interval> intervalList = new ArrayList<>();
if (INTERVAL_LIST != null) {
intervalList = getIntervalList(INTERVAL_LIST);
}
final SamReader samReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final FilteringSamIterator filteringIterator;
switch (FILTER) {
case includeAligned:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new AlignedFilter(true), true);
break;
case excludeAligned:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new AlignedFilter(false), true);
break;
case includeReadList:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new ReadNameFilter(READ_LIST_FILE, true));
break;
case excludeReadList:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new ReadNameFilter(READ_LIST_FILE, false));
break;
case includeJavascript:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new JavascriptSamRecordFilter(
JAVASCRIPT_FILE,
samReader.getFileHeader()));
break;
case includePairedIntervals:
filteringIterator = new FilteringSamIterator(samReader.iterator(),
new IntervalKeepPairFilter(intervalList), false);
break;
default:
throw new UnsupportedOperationException(FILTER.name() + " has not been implemented!");
}
filterReads(filteringIterator);
IOUtil.assertFileIsReadable(OUTPUT);
if (WRITE_READS_FILES) writeReadsFile(OUTPUT);
return 0;
} catch (Exception e) {
if (OUTPUT.exists() && !OUTPUT.delete()) {
log.warn("Failed to delete " + OUTPUT.getAbsolutePath());
}
log.error(e, "Failed to filter " + INPUT.getName());
return 1;
}
}
@Override
protected String[] customCommandLineValidation() {
if (INPUT.equals(OUTPUT)) {
return new String[]{"INPUT file and OUTPUT file must differ!"};
}
if ((FILTER.equals(Filter.includeReadList) ||
FILTER.equals(Filter.excludeReadList)) &&
READ_LIST_FILE == null) {
return new String[]{"A READ_LIST_FILE must be specified when using the " + FILTER.name() + " option"};
}
if (FILTER.equals(Filter.includePairedIntervals) && INTERVAL_LIST == null) {
return new String[]{"A INTERVAL_LIST must be specified when using the " + FILTER.name() + " option"};
}
return super.customCommandLineValidation();
}
/**
* Stock main method.
*
* @param args main arguments
*/
public static void main(final String[] args) {
System.exit(new FilterSamReads().instanceMain(args));
}
}