/* * The MIT License * * Copyright (c) 2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.util.AbstractLocusInfo; import htsjdk.samtools.util.AbstractRecordAndOffset; import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.IntervalList; import htsjdk.samtools.util.SequenceUtil; import picard.filter.CountingFilter; import picard.filter.CountingPairedFilter; /** * Class for collecting data on reference coverage, base qualities and excluded bases from one AbstractLocusInfo object for * CollectWgsMetrics. * <p> * The shared code for forming result for CollectWgsMetrics is abstracted into this class. * Classes that extend this collector implement their logic in addInfo() method. * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. <www.epam.com> */ public abstract class AbstractWgsMetricsCollector<T extends AbstractRecordAndOffset> { /** * The source CollectWgsMetrics object */ final CollectWgsMetrics collectWgsMetrics; /** Count of sites with a given depth of coverage. Includes all but quality 2 bases. * We draw depths from this histogram when we calculate the theoretical het sensitivity. */ protected final long[] unfilteredDepthHistogramArray; /** Count of bases observed with a given base quality. Includes all but quality 2 bases. * We draw bases from this histogram when we calculate the theoretical het sensitivity. */ protected final long[] unfilteredBaseQHistogramArray; /** * Count of sites with a given depth of coverage. * Excludes bases with quality below MINIMUM_BASE_QUALITY (default 20). */ protected final long[] highQualityDepthHistogramArray; /** * Number of aligned bases that were filtered out because they were of low base quality (default is < 20). */ long basesExcludedByBaseq = 0; /** * Number of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */ long basesExcludedByOverlap = 0; /** * Number of aligned bases that were filtered out because they would have raised coverage above the capped value (default cap = 250x). */ long basesExcludedByCapping = 0; /** * Positions with coverage exceeding this value are treated as if they had coverage at this value */ protected final int coverageCap; protected final IntervalList intervals; /** * This value indicates that processing will stop after specified int the metric amount of genomic bases. */ private final boolean usingStopAfter; /** * The number of processed genomic bases */ protected long counter = 0; /** * Creates a collector and initializes the inner data structures * * @param collectWgsMetrics CollectWgsMetrics, that creates this collector * @param coverageCap coverage cap */ AbstractWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, final int coverageCap, final IntervalList intervals) { if (coverageCap <= 0) { throw new IllegalArgumentException("Coverage cap must be positive."); } this.collectWgsMetrics = collectWgsMetrics; unfilteredDepthHistogramArray = new long[coverageCap + 1]; highQualityDepthHistogramArray = new long[coverageCap + 1]; unfilteredBaseQHistogramArray = new long[Byte.MAX_VALUE]; this.coverageCap = coverageCap; this.intervals = intervals; this.usingStopAfter = collectWgsMetrics.STOP_AFTER > 0; } /** * Accumulates the data from AbstractLocusInfo in inner structures * @param info {@link htsjdk.samtools.util.AbstractLocusInfo} with aligned to reference position reads * @param ref {@link htsjdk.samtools.reference.ReferenceSequence} * @param referenceBaseN true if current the value of reference base represents a no call */ public abstract void addInfo(final AbstractLocusInfo<T> info, final ReferenceSequence ref, boolean referenceBaseN); /** * Adds collected metrics and depth histogram to file * @param file MetricsFile for result of collector's work * @param dupeFilter counting filter for duplicate reads * @param mapqFilter counting filter for mapping quality * @param pairFilter counting filter for reads without a mapped mate pair */ public void addToMetricsFile(final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> file, final boolean includeBQHistogram, final CountingFilter dupeFilter, final CountingFilter mapqFilter, final CountingPairedFilter pairFilter) { final CollectWgsMetrics.WgsMetrics metrics = getMetrics(dupeFilter, mapqFilter, pairFilter); // add them to the file file.addMetric(metrics); file.addHistogram(getHighQualityDepthHistogram()); if (includeBQHistogram) addBaseQHistogram(file); } protected void addBaseQHistogram(final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> file) { file.addHistogram(getUnfilteredBaseQHistogram()); } protected Histogram<Integer> getHighQualityDepthHistogram() { return getHistogram(highQualityDepthHistogramArray, "coverage", "high_quality_coverage_count"); } protected Histogram<Integer> getUnfilteredDepthHistogram() { return getHistogram(unfilteredDepthHistogramArray, "coverage", "unfiltered_coverage_count"); } protected Histogram<Integer> getUnfilteredBaseQHistogram() { return getHistogram(unfilteredBaseQHistogramArray, "baseq", "unfiltered_baseq_count"); } protected Histogram<Integer> getHistogram(final long[] array, final String binLabel, final String valueLabel) { final Histogram<Integer> histogram = new Histogram<>(binLabel, valueLabel); for (int i = 0; i < array.length; ++i) { histogram.increment(i, array[i]); } return histogram; } /** * Creates CollectWgsMetrics.WgsMetrics - the object holding the result of CollectWgsMetrics * * @param dupeFilter counting filter for duplicate reads * @param mapqFilter counting filter for mapping quality * @param pairFilter counting filter for reads without a mapped mate pair * @return CollectWgsMetrics.WgsMetrics with set fields */ protected CollectWgsMetrics.WgsMetrics getMetrics(final CountingFilter dupeFilter, final CountingFilter mapqFilter, final CountingPairedFilter pairFilter) { return collectWgsMetrics.generateWgsMetrics( this.intervals, getHighQualityDepthHistogram(), getUnfilteredDepthHistogram(), collectWgsMetrics.getBasesExcludedBy(mapqFilter), collectWgsMetrics.getBasesExcludedBy(dupeFilter), collectWgsMetrics.getBasesExcludedBy(pairFilter), basesExcludedByBaseq, basesExcludedByOverlap, basesExcludedByCapping, coverageCap, getUnfilteredBaseQHistogram(), collectWgsMetrics.SAMPLE_SIZE ); } /** * @return true, of number of processed loci exceeded the threshold, otherwise false */ boolean isTimeToStop(final long processedLoci) { return usingStopAfter && processedLoci > collectWgsMetrics.STOP_AFTER - 1; } /** * Sets the counter to the current number of processed loci. Counter, must be updated * from outside, since we are skipping a no call reference positions outside of the collector * * @param counter number of processed loci */ public void setCounter(long counter) { this.counter = counter; } /** * Checks if reference base at given position is unknown. * * @param position to check the base * @param ref reference sequence * @return true if reference base at position represents a no call, otherwise false */ boolean isReferenceBaseN(final int position, final ReferenceSequence ref) { final byte base = ref.getBases()[position - 1]; return SequenceUtil.isNoCall(base); } }