/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.util.IOUtil; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.Metrics; import java.io.File; /** * Command line program to calibrate quality yield metrics * * @author Martha Borkan */ @CommandLineProgramProperties( usage = CollectQualityYieldMetrics.USAGE_SUMMARY + CollectQualityYieldMetrics.USAGE_DETAILS, usageShort = CollectQualityYieldMetrics.USAGE_SUMMARY, programGroup = Metrics.class ) public class CollectQualityYieldMetrics extends SinglePassSamProgram { static final String USAGE_SUMMARY = "Collect metrics about reads that pass quality thresholds and Illumina-specific filters. "; static final String USAGE_DETAILS = "This tool evaluates the overall quality of reads within a bam file containing one read group. " + "The output indicates the total numbers of bases within a read group that pass a minimum base quality score threshold and " + "(in the case of Illumina data) pass Illumina quality filters as described in the " + "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>. " + "<br />" + "<h4>Note on base quality score options</h4>" + "If the quality score of read bases has been modified in a previous data processing step such as " + "<a href='https://www.broadinstitute.org/gatk/guide/article?id=44'>GATK Base Recalibration</a> " + "and an OQ tag is available, this tool can be set to use the OQ value instead of the primary quality value for the evaluation. " + "<br /><br />" + "Note that the default behaviour of this program changed as of November 6th 2015 to no longer include secondary and " + "supplemental alignments in the computation. <br />" + "<h4>Usage Example:</h4>" + "<pre>" + "java -jar picard.jar CollectQualityYieldMetrics \\<br /> " + " I=input.bam \\<br /> "+ " O=quality_yield_metrics.txt \\<br />" + "</pre>" + "Please see " + "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectQualityYieldMetrics.QualityYieldMetrics'>" + "the QualityYieldMetrics documentation</a> for details and explanations of the output metrics." + "<hr />"; @Option(shortName = StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc = "If available in the OQ tag, use the original quality scores " + "as inputs instead of the quality scores in the QUAL field.") public boolean USE_ORIGINAL_QUALITIES = true; @Option(doc="If true, include bases from secondary alignments in metrics. Setting to true may cause double-counting " + "of bases if there are secondary alignments in the input file.") public boolean INCLUDE_SECONDARY_ALIGNMENTS = false; @Option(doc="If true, include bases from supplemental alignments in metrics. Setting to true may cause double-counting " + "of bases if there are supplemental alignments in the input file.") public boolean INCLUDE_SUPPLEMENTAL_ALIGNMENTS = false; // The metrics to be accumulated private final QualityYieldMetrics metrics = new QualityYieldMetrics(); /** Ensure that we get all reads regardless of alignment status. */ @Override protected boolean usesNoRefReads() { return true; } @Override protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(OUTPUT); } @Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { if (!INCLUDE_SECONDARY_ALIGNMENTS && rec.getNotPrimaryAlignmentFlag()) return; if (!INCLUDE_SUPPLEMENTAL_ALIGNMENTS && rec.getSupplementaryAlignmentFlag()) return; final int length = rec.getReadLength(); metrics.TOTAL_READS++; metrics.TOTAL_BASES += length; final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag(); if (isPfRead) { metrics.PF_READS++; metrics.PF_BASES += length; } final byte[] quals; if (USE_ORIGINAL_QUALITIES) { byte[] tmp = rec.getOriginalBaseQualities(); if (tmp == null) tmp = rec.getBaseQualities(); quals = tmp; } else { quals = rec.getBaseQualities(); } // add up quals, and quals >= 20 for (final int qual : quals) { metrics.Q20_EQUIVALENT_YIELD += qual; if (qual >= 20) metrics.Q20_BASES++; if (qual >= 30) metrics.Q30_BASES++; if (isPfRead) { metrics.PF_Q20_EQUIVALENT_YIELD += qual; if (qual >= 20) metrics.PF_Q20_BASES++; if (qual >= 30) metrics.PF_Q30_BASES++; } } } @Override protected void finish() { final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile(); metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS); metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20; metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20; metricsFile.addMetric(metrics); metricsFile.write(OUTPUT); } /** A set of metrics used to describe the general quality of a BAM file */ public static class QualityYieldMetrics extends MetricBase { /** The total number of reads in the input file */ public long TOTAL_READS = 0; /** The number of reads that are PF - pass filter */ public long PF_READS = 0; /** The average read length of all the reads (will be fixed for a lane) */ public int READ_LENGTH = 0; /** The total number of bases in all reads */ public long TOTAL_BASES; /** The total number of bases in all PF reads */ public long PF_BASES = 0; /** The number of bases in all reads that achieve quality score 20 or higher */ public long Q20_BASES = 0; /** The number of bases in PF reads that achieve quality score 20 or higher */ public long PF_Q20_BASES = 0; /** The number of bases in all reads that achieve quality score 30 or higher */ public long Q30_BASES = 0; /** The number of bases in PF reads that achieve quality score 30 or higher */ public long PF_Q30_BASES = 0; /** The sum of quality scores of all bases divided by 20 */ public long Q20_EQUIVALENT_YIELD = 0; /** The sum of quality scores of all bases divided by 20 */ public long PF_Q20_EQUIVALENT_YIELD = 0; } }