/* * The MIT License * * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.IntervalList; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.programgroups.Metrics; import static picard.cmdline.StandardOptionDefinitions.MINIMUM_MAPPING_QUALITY_SHORT_NAME; /** * Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing * experiments, same implementation as CollectWgsMetrics, with different defaults: lacks baseQ and mappingQ filters * and has much higher coverage cap. * * @author farjoun */ @CommandLineProgramProperties( usage = CollectRawWgsMetrics.USAGE_SUMMARY + CollectRawWgsMetrics.USAGE_DETAILS, usageShort = CollectRawWgsMetrics.USAGE_SUMMARY, programGroup = Metrics.class ) public class CollectRawWgsMetrics extends CollectWgsMetrics{ static final String USAGE_SUMMARY = "Collect whole genome sequencing-related metrics. "; static final String USAGE_DETAILS = "This tool computes metrics that are useful for evaluating coverage and performance " + "of whole genome sequencing experiments. These metrics include the percentages of reads that pass" + " minimal base- and mapping- quality filters as well as coverage (read-depth) levels. " + "<br /><br /> " + "The histogram output is optional and for a given run, displays two separate outputs on the y-axis while using a single set" + " of values for the x-axis. Specifically, the first column in the histogram table (x-axis) is labeled 'coverage' and " + "represents different possible coverage depths. However, it also represents the range of values for the base quality scores " + "and thus should probably be labeled 'sequence depth and base quality scores'. The second and third columns (y-axes) " + "correspond to the numbers of bases at a specific sequence depth 'count' and the numbers of bases at a particular base " + "quality score 'baseq_count' respectively." + "<br /><br />" + "Although similar to the CollectWgsMetrics tool, the default thresholds for CollectRawWgsMetrics are less stringent. " + "For example, the CollectRawWgsMetrics have base and mapping quality score thresholds set to '3' and '0' respectively, " + "while the CollectWgsMetrics tool has the default threshold values set to '20' (at time of writing). Nevertheless, both " + "tools enable the user to input specific threshold values." + "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" + "<h4>Usage example:</h4>" + "<pre>" + "java -jar picard.jar CollectRawWgsMetrics \\<br />" + " I=input.bam \\<br />" + " O=raw_wgs_metrics.txt \\<br />" + " R=reference_sequence.fasta \\<br />" + " INCLUDE_BQ_HISTOGRAM=true" + "</pre>" + "<hr />" + "Please see " + "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics'>" + "the WgsMetrics documentation</a> for detailed explanations of the output metrics." + "<hr />"; @Option(shortName=MINIMUM_MAPPING_QUALITY_SHORT_NAME, doc="Minimum mapping quality for a read to contribute coverage.") public int MINIMUM_MAPPING_QUALITY = 0; @Option(shortName="Q", doc="Minimum base quality for a base to contribute coverage.") public int MINIMUM_BASE_QUALITY = 3; @Option(shortName="CAP", doc="Treat bases with coverage exceeding this value as if they had coverage at this value.") public int COVERAGE_CAP = 100000; @Option(doc="At positions with coverage exceeding this value, completely ignore reads that accumulate beyond this value (so that they will not be considered for PCT_EXC_CAPPED). Used to keep memory consumption in check, but could create bias if set too low") public int LOCUS_ACCUMULATION_CAP = 200000; // rename the class so that in the metric file it is annotated differently. public static class RawWgsMetrics extends WgsMetrics { public RawWgsMetrics() { super(); } public RawWgsMetrics(final IntervalList intervals, final Histogram<Integer> highQualityDepthHistogram, final Histogram<Integer> unfilteredDepthHistogram, final double pctExcludedByMapq, final double pctExcludedByDupes, final double pctExcludedByPairing, final double pctExcludedByBaseq, final double pctExcludedByOverlap, final double pctExcludedByCapping, final double pctTotal, final int coverageCap, final Histogram<Integer> unfilteredBaseQHistogram, final int sampleSize) { super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } } @Override protected WgsMetrics generateWgsMetrics(final IntervalList intervals, final Histogram<Integer> highQualityDepthHistogram, final Histogram<Integer> unfilteredDepthHistogram, final double pctExcludedByMapq, final double pctExcludedByDupes, final double pctExcludedByPairing, final double pctExcludedByBaseq, final double pctExcludedByOverlap, final double pctExcludedByCapping, final double pctTotal, final int coverageCap, final Histogram<Integer> unfilteredBaseQHistogram, final int sampleSize) { return new RawWgsMetrics( intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } }