/* * The MIT License * * Copyright (c) 2016 Nils Homer * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.IntervalList; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringUtil; import picard.PicardException; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.programgroups.Alpha; import picard.filter.CountingFilter; import picard.filter.CountingPairedFilter; import picard.util.RExecutor; import java.io.File; import java.util.List; @CommandLineProgramProperties( usage = CollectWgsMetricsWithNonZeroCoverage.USAGE_SUMMARY + CollectWgsMetricsWithNonZeroCoverage.USAGE_DETAILS, usageShort = CollectWgsMetricsWithNonZeroCoverage.USAGE_SUMMARY, programGroup = Alpha.class ) public class CollectWgsMetricsWithNonZeroCoverage extends CollectWgsMetrics { static final String USAGE_SUMMARY = "Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments. "; static final String USAGE_DETAILS = "This tool collects metrics about the percentages of reads that pass base- and mapping- quality " + "filters as well as coverage (read-depth) levels. Both minimum base- and mapping-quality values as well as the maximum " + "read depths (coverage cap) are user defined. This extends CollectWgsMetrics by including metrics related only to sites" + "with non-zero (>0) coverage." + "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" + "<h4>Usage Example:</h4>" + "<pre>" + "java -jar picard.jar CollectWgsMetricsWithNonZeroCoverage \\<br /> " + " I=input.bam \\<br /> "+ " O=collect_wgs_metrics.txt \\<br /> " + " CHART=collect_wgs_metrics.pdf \\<br /> " + " R=reference_sequence.fasta " + "</pre>" + "Please see the " + "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetricsWithNonZeroCoverage.WgsMetricsWithNonZeroCoverage'>" + "WgsMetricsWithNonZeroCoverage</a> documentation for detailed explanations of the output metrics." + "<hr />"; @Option(shortName = "CHART", doc = "A file (with .pdf extension) to write the chart to.") public File CHART_OUTPUT; private final Log log = Log.getInstance(CollectWgsMetricsWithNonZeroCoverage.class); // Store this here since we need access to it in the doWork method private WgsMetricsWithNonZeroCoverageCollector collector = null; private SamReader samReader = null; /** Metrics for evaluating the performance of whole genome sequencing experiments. */ public static class WgsMetricsWithNonZeroCoverage extends WgsMetrics { public enum Category { WHOLE_GENOME, NON_ZERO_REGIONS } /** One of either WHOLE_GENOME or NON_ZERO_REGIONS */ @MergeByAssertEquals public Category CATEGORY; public WgsMetricsWithNonZeroCoverage() { super(); } public WgsMetricsWithNonZeroCoverage(final IntervalList intervals, final Histogram<Integer> highQualityDepthHistogram, final Histogram<Integer> unfilteredDepthHistogram, final double pctExcludedByMapq, final double pctExcludedByDupes, final double pctExcludedByPairing, final double pctExcludedByBaseq, final double pctExcludedByOverlap, final double pctExcludedByCapping, final double pctTotal, final int coverageCap, final Histogram<Integer> unfilteredBaseQHistogram, final int sampleSize) { super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } } public static void main(final String[] args) { new CollectWgsMetrics().instanceMainWithExit(args); } @Override protected SamReader getSamReader() { if (this.samReader == null) { this.samReader = super.getSamReader(); } return this.samReader; } @Override protected int doWork() { IOUtil.assertFileIsWritable(CHART_OUTPUT); IOUtil.assertFileIsReadable(INPUT); // Initialize the SamReader, so the header is available prior to super.doWork, for getIntervalsToExamine call. */ getSamReader(); this.collector = new WgsMetricsWithNonZeroCoverageCollector(this, COVERAGE_CAP, getIntervalsToExamine()); super.doWork(); final List<SAMReadGroupRecord> readGroups = getSamFileHeader().getReadGroups(); final String plotSubtitle = (readGroups.size() == 1) ? StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary()) : ""; if (collector.areHistogramsEmpty()) { log.warn("No valid bases found in input file. No plot will be produced."); } else { final int rResult = RExecutor.executeFromClasspath("picard/analysis/wgsHistogram.R", OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle); if (rResult != 0) { throw new PicardException("R script wgsHistogram.R failed with return code " + rResult); } } return 0; } @Override protected WgsMetrics generateWgsMetrics(final IntervalList intervals, final Histogram<Integer> highQualityDepthHistogram, final Histogram<Integer> unfilteredDepthHistogram, final double pctExcludedByMapq, final double pctExcludedByDupes, final double pctExcludedByPairing, final double pctExcludedByBaseq, final double pctExcludedByOverlap, final double pctExcludedByCapping, final double pctTotal, final int coverageCap, final Histogram<Integer> unfilteredBaseQHistogram, final int sampleSize) { return new WgsMetricsWithNonZeroCoverage( intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } @Override protected WgsMetricsCollector getCollector(final int coverageCap, final IntervalList intervals) { assert(coverageCap == this.collector.coverageCap); return this.collector; } protected class WgsMetricsWithNonZeroCoverageCollector extends WgsMetricsCollector { Histogram<Integer> highQualityDepthHistogram; Histogram<Integer> highQualityDepthHistogramNonZero; public WgsMetricsWithNonZeroCoverageCollector(final CollectWgsMetricsWithNonZeroCoverage metrics, final int coverageCap, final IntervalList intervals) { super(metrics, coverageCap, intervals); } @Override public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file, final boolean includeBQHistogram, final CountingFilter dupeFilter, final CountingFilter mapqFilter, final CountingPairedFilter pairFilter) { highQualityDepthHistogram = getDepthHistogram(); highQualityDepthHistogramNonZero = getDepthHistogramNonZero(); // calculate metrics the same way as in CollectWgsMetrics final WgsMetricsWithNonZeroCoverage metrics = (WgsMetricsWithNonZeroCoverage) getMetrics(dupeFilter, mapqFilter, pairFilter); metrics.CATEGORY = WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME; // set count of the coverage-zero bin to 0 and re-calculate metrics // note we don't need to update the base quality histogram; there are no bases in the depth = 0 bin highQualityDepthHistogramArray[0] = 0; unfilteredDepthHistogramArray[0] = 0; final WgsMetricsWithNonZeroCoverage metricsNonZero = (WgsMetricsWithNonZeroCoverage) getMetrics(dupeFilter, mapqFilter, pairFilter); metricsNonZero.CATEGORY = WgsMetricsWithNonZeroCoverage.Category.NON_ZERO_REGIONS; file.addMetric(metrics); file.addMetric(metricsNonZero); file.addHistogram(highQualityDepthHistogram); file.addHistogram(highQualityDepthHistogramNonZero); if (includeBQHistogram) { addBaseQHistogram(file); } } protected Histogram<Integer> getDepthHistogram() { return getHistogram(highQualityDepthHistogramArray, "coverage", "count_WHOLE_GENOME"); } private Histogram<Integer> getDepthHistogramNonZero() { final Histogram<Integer> depthHistogram = new Histogram<>("coverage", "count_NON_ZERO_REGIONS"); // do not include the zero-coverage bin for (int i = 1; i < highQualityDepthHistogramArray.length; ++i) { depthHistogram.increment(i, highQualityDepthHistogramArray[i]); } return depthHistogram; } public boolean areHistogramsEmpty() { return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZero.isEmpty()); } } }