/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.programgroups.Metrics; import picard.util.IlluminaUtil; import java.io.File; import java.util.List; import java.util.Set; /** * A command line tool to read a BAM file and produce standard alignment metrics that would be applicable to any alignment. * Metrics to include, but not limited to: * <ul> * <li>Total number of reads (total, period, no exclusions)</li> * <li>Total number of PF reads (PF == does not fail vendor check flag)</li> * <li>Number of PF noise reads (does not fail vendor check and has noise attr set)</li> * <li>Total aligned PF reads (any PF read that has a sequence and position)</li> * <li>High quality aligned PF reads (high quality == mapping quality >= 20)</li> * <li>High quality aligned PF bases (actual aligned bases, calculate off alignment blocks)</li> * <li>High quality aligned PF Q20 bases (subset of above where base quality >= 20)</li> * <li>Median mismatches in HQ aligned PF reads (how many aligned bases != ref on average)</li> * <li>Reads aligned in pairs (vs. reads aligned with mate unaligned/not present)</li> * <li>Read length (how to handle mixed lengths?)</li> * <li>Bad Cycles - how many machine cycles yielded combined no-call and mismatch rates of >= 80%</li> * <li>Strand balance - reads mapped to positive strand / total mapped reads</li> * </ul> * Metrics are written for the first read of a pair, the second read, and combined for the pair. * * @author Doug Voet (dvoet at broadinstitute dot org) */ @CommandLineProgramProperties( usage = CollectAlignmentSummaryMetrics.USAGE, usageShort = CollectAlignmentSummaryMetrics.USAGE, programGroup = Metrics.class ) public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram { static final String USAGE = "Produces from a SAM or BAM a file containing summary alignment metrics"; private static final Log log = Log.getInstance(CollectAlignmentSummaryMetrics.class); // Usage and parameters @Option(doc="Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.") public int MAX_INSERT_SIZE = 100000; @Option(doc="List of adapter sequences to use when processing the alignment metrics") public List<String> ADAPTER_SEQUENCE = CollectionUtil.makeList( IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get5PrimeAdapter(), IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get3PrimeAdapter(), IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get5PrimeAdapter(), IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get3PrimeAdapter(), IlluminaUtil.IlluminaAdapterPair.INDEXED.get5PrimeAdapter(), IlluminaUtil.IlluminaAdapterPair.INDEXED.get3PrimeAdapter() ); @Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ") private Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); @Option(shortName="BS", doc="Whether the SAM or BAM file consists of bisulfite sequenced reads. ") public boolean IS_BISULFITE_SEQUENCED = false; private AlignmentSummaryMetricsCollector collector; /** Required main method implementation. */ public static void main(final String[] argv) { new CollectAlignmentSummaryMetrics().instanceMainWithExit(argv); } /** Silly method that is necessary to give unit test access to call doWork() */ protected final int testDoWork() { return doWork(); } @Override protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(OUTPUT); if (header.getSequenceDictionary().isEmpty()) { log.warn(INPUT.getAbsoluteFile() + " has no sequence dictionary. If any reads " + "in the file are aligned then alignment summary metrics collection will fail."); } final boolean doRefMetrics = REFERENCE_SEQUENCE != null; collector = new AlignmentSummaryMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), doRefMetrics, ADAPTER_SEQUENCE, MAX_INSERT_SIZE, IS_BISULFITE_SEQUENCED); } @Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { collector.acceptRecord(rec, ref); } @Override protected void finish() { collector.finish(); final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file = getMetricsFile(); collector.addAllLevelsToFile(file); file.write(OUTPUT); } }