/* * The MIT License * * Copyright (c) 2011 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /** * $Id$ */ package picard.sam; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.filter.AlignedFilter; import htsjdk.samtools.filter.FilteringIterator; import htsjdk.samtools.filter.ReadNameFilter; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.SamOrBam; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.text.DecimalFormat; /** * From a SAM or BAM file, produce a new SAM or BAM by filtering aligned reads or a list of read * names provided in a file (one readname per line) * <p/> * $Id$ */ @CommandLineProgramProperties( usage = "Produces a new SAM or BAM file by including or excluding aligned reads " + "or a list of reads names supplied in the READ_LIST_FILE from the INPUT SAM or BAM file.\n", usageShort = "Creates a new SAM or BAM file by including or excluding aligned reads", programGroup = SamOrBam.class ) public class FilterSamReads extends CommandLineProgram { private static final Log log = Log.getInstance(FilterSamReads.class); private static enum Filter { includeAligned("OUTPUT SAM/BAM will contain aligned reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of paired reads must be aligned to be included in the OUTPUT SAM or BAM)"), excludeAligned("OUTPUT SAM/BAM will contain un-mapped reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of pair must be aligned to be excluded from the OUTPUT SAM or BAM)"), includeReadList("OUTPUT SAM/BAM will contain reads that are supplied in the READ_LIST_FILE file"), excludeReadList("OUTPUT bam will contain reads that are *not* supplied in the READ_LIST_FILE file"); private final String description; Filter(final String description) { this.description = description; } @Override public String toString() { return this.name() + " [" + description + "]"; } } @Option(doc = "The SAM or BAM file that will be filtered.", optional = false, shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) public File INPUT; @Option(doc = "Filter.", optional = false) public Filter FILTER = null; @Option(doc = "Read List File containing reads that will be included or excluded from the OUTPUT SAM or BAM file.", optional = true, shortName = "RLF") public File READ_LIST_FILE; @Option( doc = "SortOrder of the OUTPUT SAM or BAM file, otherwise use the SortOrder of the INPUT file.", optional = true, shortName = "SO") public SAMFileHeader.SortOrder SORT_ORDER; @Option( doc = "Create .reads files (for debugging purposes)", optional = true) public boolean WRITE_READS_FILES = true; @Option(doc = "SAM or BAM file to write read excluded results to", optional = false, shortName = "O") public File OUTPUT; private void filterReads(final FilteringIterator filteringIterator) { // get OUTPUT header from INPUT and owerwrite it if necessary final SAMFileReader inputReader = new SAMFileReader(INPUT); final SAMFileHeader.SortOrder inputSortOrder = inputReader.getFileHeader().getSortOrder(); final SAMFileHeader outputHeader = inputReader.getFileHeader(); if (SORT_ORDER != null) { outputHeader.setSortOrder(SORT_ORDER); } final boolean presorted = inputSortOrder.equals(outputHeader.getSortOrder()); log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" + OUTPUT.getName() + " [sortorder=" + outputHeader.getSortOrder().name() + "]"); // create OUTPUT file final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, presorted, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written"); while (filteringIterator.hasNext()) { final SAMRecord rec = filteringIterator.next(); outputWriter.addAlignment(rec); progress.record(rec); } filteringIterator.close(); outputWriter.close(); inputReader.close(); log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName()); } /** * Write out a file of read names for debugging purposes. * * @param samOrBamFile The SAM or BAM file for which we are going to write out a file of its * containing read names */ private void writeReadsFile(final File samOrBamFile) throws IOException { final SAMFileReader reader = new SAMFileReader(samOrBamFile); final File readsFile = new File(OUTPUT.getParentFile(), IOUtil.basename(samOrBamFile) + ".reads"); IOUtil.assertFileIsWritable(readsFile); final BufferedWriter bw = IOUtil.openFileForBufferedWriting(readsFile, false); for (final SAMRecord rec : reader) { bw.write(rec.toString() + "\n"); } bw.close(); reader.close(); IOUtil.assertFileIsReadable(readsFile); } @Override protected int doWork() { try { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); if (WRITE_READS_FILES) writeReadsFile(INPUT); switch (FILTER) { case includeAligned: filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(), new AlignedFilter(true), true)); break; case excludeAligned: filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(), new AlignedFilter(false), true)); break; case includeReadList: filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(), new ReadNameFilter(READ_LIST_FILE, true))); break; case excludeReadList: filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(), new ReadNameFilter(READ_LIST_FILE, false))); break; default: throw new UnsupportedOperationException(FILTER.name() + " has not been implemented!"); } IOUtil.assertFileIsReadable(OUTPUT); if (WRITE_READS_FILES) writeReadsFile(OUTPUT); return 0; } catch (Exception e) { if (OUTPUT.exists() && !OUTPUT.delete()) { log.warn("Failed to delete " + OUTPUT.getAbsolutePath()); } log.error(e, "Failed to filter " + INPUT.getName()); return 1; } } @Override protected String[] customCommandLineValidation() { if (INPUT.equals(OUTPUT)) { return new String[]{"INPUT file and OUTPUT file must differ!"}; } if ((FILTER.equals(Filter.includeReadList) || FILTER.equals(Filter.excludeReadList)) && READ_LIST_FILE == null) { return new String[]{"A READ_LIST_FILE must be specified when using the " + FILTER.name() + " option"}; } return super.customCommandLineValidation(); } /** * Stock main method. * * @param args main arguments */ public static void main(final String[] args) { System.exit(new FilterSamReads().instanceMain(args)); } }