package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; import htsjdk.samtools.util.SequenceUtil; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import java.io.File; import java.util.Arrays; import java.util.Collection; /** * Super class that is designed to provide some consistent structure between subclasses that * simply iterate once over a coordinate sorted BAM and collect information from the records * as the go in order to produce some kind of output. * * @author Tim Fennell */ public abstract class SinglePassSamProgram extends CommandLineProgram { @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input SAM or BAM file.") public File INPUT; @Option(shortName="O", doc="File to write the output to.") public File OUTPUT; @Option(shortName=StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc="Reference sequence fasta", optional=true) public File REFERENCE_SEQUENCE; @Option(doc="If true (default), then the sort order in the header file will be ignored.", shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME) public boolean ASSUME_SORTED = true; @Option(doc="Stop after processing N reads, mainly for debugging.") public long STOP_AFTER = 0; private static final Log log = Log.getInstance(SinglePassSamProgram.class); /** * Final implementation of doWork() that checks and loads the input and optionally reference * sequence files and the runs the sublcass through the setup() acceptRead() and finish() steps. */ @Override protected final int doWork() { makeItSo(INPUT, REFERENCE_SEQUENCE, ASSUME_SORTED, STOP_AFTER, Arrays.asList(this)); return 0; } public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<SinglePassSamProgram> programs) { // Setup the standard inputs IOUtil.assertFileIsReadable(input); final SAMFileReader in = new SAMFileReader(input); // Optionally load up the reference sequence and double check sequence dictionaries final ReferenceSequenceFileWalker walker; if (referenceSequence == null) { walker = null; } else { IOUtil.assertFileIsReadable(referenceSequence); walker = new ReferenceSequenceFileWalker(referenceSequence); if (!in.getFileHeader().getSequenceDictionary().isEmpty()) { SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), walker.getSequenceDictionary()); } } // Check on the sort order of the BAM file { final SortOrder sort = in.getFileHeader().getSortOrder(); if (sort != SortOrder.coordinate) { if (assumeSorted) { log.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway."); } else { throw new PicardException("File " + input.getAbsolutePath() + " should be coordinate sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be coordinate sorted you may pass ASSUME_SORTED=true"); } } } // Call the abstract setup method! boolean anyUseNoRefReads = false; for (final SinglePassSamProgram program : programs) { program.setup(in.getFileHeader(), input); anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads(); } final ProgressLogger progress = new ProgressLogger(log); for (final SAMRecord rec : in) { final ReferenceSequence ref; if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { ref = null; } else { ref = walker.get(rec.getReferenceIndex()); } for (final SinglePassSamProgram program : programs) { program.acceptRead(rec, ref); } progress.record(rec); // See if we need to terminate early? if (stopAfter > 0 && progress.getCount() >= stopAfter) { break; } // And see if we're into the unmapped reads at the end if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { break; } } in.close(); for (final SinglePassSamProgram program : programs) { program.finish(); } } /** Can be overriden and set to false if the section of unmapped reads at the end of the file isn't needed. */ protected boolean usesNoRefReads() { return true; } /** Should be implemented by subclasses to do one-time initialization work. */ protected abstract void setup(final SAMFileHeader header, final File samFile); /** * Should be implemented by subclasses to accept SAMRecords one at a time. * If the read has a reference sequence and a reference sequence file was supplied to the program * it will be passed as 'ref'. Otherwise 'ref' may be null. */ protected abstract void acceptRead(final SAMRecord rec, final ReferenceSequence ref); /** Should be implemented by subclasses to do one-time finalization work. */ protected abstract void finish(); }