/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis.directed; import htsjdk.samtools.AlignmentBlock; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.CoordMath; import htsjdk.samtools.util.FormatUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalList; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.OverlapDetector; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; import picard.PicardException; import picard.analysis.MetricAccumulationLevel; import picard.metrics.MultilevelMetrics; import picard.metrics.PerUnitMetricCollector; import picard.metrics.SAMRecordMultiLevelCollector; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.Field; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; /** * TargetMetrics, are metrics to measure how well we hit specific targets (or baits) when using a targeted sequencing process like hybrid selection * or Targeted PCR Techniques (TSCA). TargetMetrics at the moment are the metrics that are shared by both HybridSelection and TargetedPcrMetrics. * * TargetMetricsCollector collects for a run these common metrics and can be sub-classed to provide metrics more specific to a targeted sequencing * run. * * Note: Probe is the name I've used to indicate the bait set or amplicon set (e.g. the individual technological units used to target specific * sites). * * @author Jonathan Burke */ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetrics> extends SAMRecordMultiLevelCollector<METRIC_TYPE, Integer> { // What is considered "near" to the bait private static final int NEAR_PROBE_DISTANCE = 250; //If perTargetCoverage != null then coverage is computed for each specified target and output to this file private final File perTargetCoverage; //The name of the set of probes used private final String probeSetName; private static final Log log = Log.getInstance(TargetMetricsCollector.class); //The interval list indicating the regions targeted by all probes private final IntervalList allProbes; //The interval list of the the regions we intend to cover private final IntervalList allTargets; // Overlap detector for finding overlaps between reads and the experimental targets private final OverlapDetector<Interval> targetDetector; // Overlap detector for finding overlaps between the reads and the baits (and the near bait space) private final OverlapDetector<Interval> probeDetector; private Map<Interval,Double> intervalToGc = null; //The number of bases within all unique intervals in allProbes private final long probeTerritory; //The number of bases within all unique intervals found in allTargets private final long targetTerritory; private final long genomeSize; //A map of coverage by target in which coverage is reset every read, this is done //so that we can calculate overlap for a read once and the resulting coverage is //than added to the cumulative coverage of every collector that collects //information on that read private Map<Interval, Coverage> coverageByTargetForRead; private Coverage [] cov; //Converts a targetMetric into a more specific metric of METRIC_TYPE public abstract METRIC_TYPE convertMetric(final TargetMetrics targetMetrics); /** * Since the targeted metrics (HsMetrics, TargetedPcrMetrics,...) share many of the same values as TargetMetrics, this copy will copy all public attributes in targetMetrics * to the outputMetrics' attributes of the same name. If no matching attribute exists in the outputMetrics or the attribute of the target metrics class also is found * in targetKeys then it's value is not copied. Further more, targetKeys and outputKeys are attribute name arrays synchronized by the index. * For each target key, targetMetrics.<targetKeys[i]> is assigned to outputMetrics.<outputKeys[i]> * * @param targetMetrics A metric with values to be copied * @param outputMetrics A metrics intended to receive values from targetMetrics * @param targetKeys Specific names of attributes of targetMetrics to copy to outputMetrics, each key has a corresponding one in outputKeys * @param outputKeys Specific names of the destination attributes of outputMetrics that will be filled with values of outputMetrics, each key has a corresponding one in targetKeys * @param <MT> The type of metric of outputMetrics */ protected static <MT extends MetricBase> void reflectiveCopy(final TargetMetrics targetMetrics, final MT outputMetrics, final String [] targetKeys, final String [] outputKeys) { if(targetKeys == null || outputKeys == null) { if(outputKeys != null) { throw new PicardException("Target keys is null but output keys == " + StringUtil.join(",", outputKeys)); } if(targetKeys != null) { throw new PicardException("Output keys is null but target keys == " + StringUtil.join(",", targetKeys)); } } else { if(targetKeys.length != outputKeys.length) { throw new PicardException("Target keys and output keys do not have the same length: " + "targetKeys == (" + StringUtil.join(",", targetKeys) + ") " + "outputKeys == (" + StringUtil.join(",", outputKeys) + ")"); } } final Class mtClass = outputMetrics.getClass(); final Set<Field> targetSet = CollectionUtil.makeSet(TargetMetrics.class.getFields()); for(final String targetKey : targetKeys) { if(targetSet.contains(targetKey)) { targetSet.remove(targetKey); } } final Set<String> outputSet = new HashSet<String>(); for(final Field field : outputMetrics.getClass().getFields()) { outputSet.add(field.getName()); } for(final Field field : targetSet) { if(outputSet.contains(field.getName())) { try { final Field outputField = mtClass.getField(field.getName()); outputField.set(outputMetrics, field.get(targetMetrics)); } catch (Exception e) { throw new PicardException("Exception while copying targetMetrics to " + outputMetrics.getClass().getName(), e); } } } for(int i = 0; i < targetKeys.length; i++) { try { Field targetMetricField = TargetMetrics.class.getField(targetKeys[i]); Field outputMetricField = mtClass.getField(outputKeys[i]); outputMetricField.set(outputMetrics, targetMetricField.get(targetMetrics)); } catch(final Exception exc) { throw new PicardException("Exception while copying TargetMetrics." + targetKeys[i] + " to " + mtClass.getName() + "." + outputKeys[i], exc); } } } public TargetMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords, final ReferenceSequenceFile refFile, final File perTargetCoverage, final IntervalList targetIntervals, final IntervalList probeIntervals, final String probeSetName) { this.perTargetCoverage = perTargetCoverage; this.probeSetName = probeSetName; this.allProbes = probeIntervals; this.allTargets = targetIntervals; final List<Interval> uniqueBaits = this.allProbes.getUniqueIntervals(); this.probeDetector = new OverlapDetector<Interval>(-NEAR_PROBE_DISTANCE, 0); this.probeDetector.addAll(uniqueBaits, uniqueBaits); this.probeTerritory = Interval.countBases(uniqueBaits); final List<Interval> uniqueTargets = this.allTargets.getUniqueIntervals(); targetDetector = new OverlapDetector<Interval>(0,0); this.targetDetector.addAll(uniqueTargets, uniqueTargets); this.targetTerritory = Interval.countBases(uniqueTargets); // Populate the coverage by target map int i = 0; cov = new Coverage[uniqueTargets.size()]; this.coverageByTargetForRead = new LinkedHashMap<Interval, Coverage>(uniqueTargets.size() * 2, 0.5f); for (final Interval target : uniqueTargets) { final Coverage coverage = new Coverage(target, 0); this.coverageByTargetForRead.put(target, coverage); cov[i++] = coverage; } long genomeSizeAccumulator = 0; for (final SAMSequenceRecord seq : this.allProbes.getHeader().getSequenceDictionary().getSequences()) { genomeSizeAccumulator += seq.getSequenceLength(); } this.genomeSize = genomeSizeAccumulator; if (refFile != null) { intervalToGc = new HashMap<Interval,Double>(); for (final Interval target : uniqueTargets) { final ReferenceSequence rs = refFile.getSubsequenceAt(target.getSequence(), target.getStart(), target.getEnd()); intervalToGc.put(target,SequenceUtil.calculateGc(rs.getBases())); } } setup(accumulationLevels, samRgRecords); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> makeChildCollector(final String sample, final String library, final String readGroup) { final PerUnitTargetMetricCollector collector = new PerUnitTargetMetricCollector(probeSetName, coverageByTargetForRead.keySet(), sample, library, readGroup, probeTerritory, targetTerritory, genomeSize, intervalToGc); if (this.probeSetName != null) { collector.setBaitSetName(probeSetName); } return collector; } @Override protected PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> makeAllReadCollector() { final PerUnitTargetMetricCollector collector = (PerUnitTargetMetricCollector) makeChildCollector(null, null, null); if (perTargetCoverage != null) { collector.setPerTargetOutput(perTargetCoverage); } return collector; } /** * Collect the Target Metrics for one unit of "accumulation" (i.e. for one sample, or for one library ...) */ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> { private final Map<Interval,Double> intervalToGc; private File perTargetOutput; // A Map to accumulate per-bait-region (i.e. merge of overlapping targets) coverage. */ private final Map<Interval, Coverage> coverageByTarget; private final TargetMetrics metrics = new TargetMetrics(); /** * Constructor that parses the squashed reference to genome reference file and stores the * information in a map for later use. */ public PerUnitTargetMetricCollector(final String probeSetName, final Set<Interval> coverageTargets, final String sample, final String library, final String readGroup, final long probeTerritory, final long targetTerritory, final long genomeSize, final Map<Interval, Double> intervalToGc) { this.metrics.SAMPLE = sample; this.metrics.LIBRARY = library; this.metrics.READ_GROUP = readGroup; this.metrics.PROBE_SET = probeSetName; metrics.PROBE_TERRITORY = probeTerritory; metrics.TARGET_TERRITORY = targetTerritory; metrics.GENOME_SIZE = genomeSize; this.coverageByTarget = new LinkedHashMap<Interval, Coverage>(coverageTargets.size() * 2, 0.5f); for (Interval target : coverageTargets) { this.coverageByTarget.put(target, new Coverage(target,0)); } this.intervalToGc = intervalToGc; } /** If set, the metrics collector will output per target coverage information to this file. */ public void setPerTargetOutput(final File perTargetOutput) { this.perTargetOutput = perTargetOutput; } /** Sets the name of the bait set explicitly instead of inferring it from the bait file. */ public void setBaitSetName(final String name) { this.metrics.PROBE_SET = name; } /** Adds information about an individual SAMRecord to the statistics. */ public void acceptRecord(final SAMRecord rec) { // Just plain avoid records that are marked as not-primary if (rec.isSecondaryOrSupplementary()) return; this.metrics.TOTAL_READS += 1; // Check for PF reads if (rec.getReadFailsVendorQualityCheckFlag()) { return; } // Prefetch the list of target and bait overlaps here as they're needed multiple times. final Collection<Interval> targets; final Collection<Interval> probes; if (!rec.getReadUnmappedFlag()) { final Interval read = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()); targets = targetDetector.getOverlaps(read); probes = probeDetector.getOverlaps(read); } else { targets = null; probes = null; } ++this.metrics.PF_READS; this.metrics.PF_BASES += rec.getReadLength(); // And now calculate the values we need for HS_LIBRARY_SIZE if (rec.getReadPairedFlag() && rec.getFirstOfPairFlag() && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag()) { if (probes != null && !probes.isEmpty()) { ++this.metrics.PF_SELECTED_PAIRS; if (!rec.getDuplicateReadFlag()) ++this.metrics.PF_SELECTED_UNIQUE_PAIRS; } } // Check for reads that are marked as duplicates if (rec.getDuplicateReadFlag()) { return; } else { ++this.metrics.PF_UNIQUE_READS; } // Don't bother with reads that didn't align uniquely if (rec.getReadUnmappedFlag() || rec.getMappingQuality() == 0) { return; } this.metrics.PF_UQ_READS_ALIGNED += 1; for (final AlignmentBlock block : rec.getAlignmentBlocks()) { this.metrics.PF_UQ_BASES_ALIGNED += block.getLength(); } final boolean mappedInPair = rec.getReadPairedFlag() && !rec.getMateUnmappedFlag(); // Find the target overlaps if (targets != null && !targets.isEmpty()) { for (final Interval target : targets) { final Coverage coverage = this.coverageByTarget.get(target); for (final AlignmentBlock block : rec.getAlignmentBlocks()) { final int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength()); for (int pos=block.getReferenceStart(); pos<=end; ++ pos) { if (pos >= target.getStart() && pos <= target.getEnd()) { ++this.metrics.ON_TARGET_BASES; if (mappedInPair) ++this.metrics.ON_TARGET_FROM_PAIR_BASES; coverage.addBase(pos - target.getStart()); } } } } } // Now do the bait overlaps int mappedBases = 0; for (final AlignmentBlock block : rec.getAlignmentBlocks()) mappedBases += block.getLength(); int onBaitBases = 0; if (probes != null && !probes.isEmpty()) { for (final Interval bait : probes) { for (final AlignmentBlock block : rec.getAlignmentBlocks()) { final int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength()); for (int pos=block.getReferenceStart(); pos<=end; ++pos) { if (pos >= bait.getStart() && pos <= bait.getEnd()) ++onBaitBases; } } } this.metrics.ON_PROBE_BASES += onBaitBases; this.metrics.NEAR_PROBE_BASES += (mappedBases - onBaitBases); } else { this.metrics.OFF_PROBE_BASES += mappedBases; } } @Override public void finish() { metrics.PCT_PF_READS = metrics.PF_READS / (double) metrics.TOTAL_READS; metrics.PCT_PF_UQ_READS = metrics.PF_UNIQUE_READS / (double) metrics.TOTAL_READS; metrics.PCT_PF_UQ_READS_ALIGNED = metrics.PF_UQ_READS_ALIGNED / (double) metrics.PF_UNIQUE_READS; final double denominator = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES + metrics.OFF_PROBE_BASES); metrics.PCT_SELECTED_BASES = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES) / denominator; metrics.PCT_OFF_PROBE = metrics.OFF_PROBE_BASES / denominator; metrics.ON_PROBE_VS_SELECTED = metrics.ON_PROBE_BASES / (double) (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES); metrics.MEAN_PROBE_COVERAGE = metrics.ON_PROBE_BASES / (double) metrics.PROBE_TERRITORY; metrics.FOLD_ENRICHMENT = (metrics.ON_PROBE_BASES/ denominator) / ((double) metrics.PROBE_TERRITORY / metrics.GENOME_SIZE); calculateTargetCoverageMetrics(); calculateGcMetrics(); } /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { final short[] depths = new short[(int) this.metrics.TARGET_TERRITORY]; // may not use entire array int zeroCoverageTargets = 0; int depthIndex = 0; double totalCoverage = 0; int basesConsidered = 0; for (final Coverage c : this.coverageByTarget.values()) { if (!c.hasCoverage()) { ++zeroCoverageTargets; continue; } final short[] targetDepths = c.getDepths(); basesConsidered += targetDepths.length; for (final short depth : targetDepths) { depths[depthIndex++] = depth; totalCoverage += depth; } } this.metrics.MEAN_TARGET_COVERAGE = totalCoverage / basesConsidered; // Sort the array (ASCENDING) and then find the base the coverage value that lies at the 80% // line, which is actually at 20% into the array now Arrays.sort(depths); // Note. basesConsidered can be between 0 and depths.length inclusive. indexOf80thPercentile will be -1 in the latter case final int indexOf80thPercentile = Math.max((depths.length - 1 - basesConsidered) + (int) (basesConsidered * 0.2), 0); final int coverageAt80thPercentile = depths[indexOf80thPercentile]; this.metrics.FOLD_80_BASE_PENALTY = this.metrics.MEAN_TARGET_COVERAGE / coverageAt80thPercentile; this.metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size(); // Now do the "how many bases at X" calculations. int totalTargetBases = 0; int targetBases2x = 0; int targetBases10x = 0; int targetBases20x = 0; int targetBases30x = 0; int targetBases40x = 0; int targetBases50x = 0; int targetBases100x = 0; for (final Coverage c : this.coverageByTarget.values()) { for (final short depth : c.getDepths()) { ++totalTargetBases; if (depth >= 2) { ++targetBases2x; if (depth >=10) { ++targetBases10x; if (depth >= 20) { ++targetBases20x; if (depth >=30) { ++targetBases30x; if (depth >=40) { ++targetBases40x; if (depth >=50) { ++targetBases50x; if (depth >=100) { ++targetBases100x; } } } } } } } } } this.metrics.PCT_TARGET_BASES_2X = (double) targetBases2x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_10X = (double) targetBases10x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_20X = (double) targetBases20x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_30X = (double) targetBases30x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_40X = (double) targetBases40x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_50X = (double) targetBases50x / (double) totalTargetBases; this.metrics.PCT_TARGET_BASES_100X = (double) targetBases100x / (double) totalTargetBases; } private void calculateGcMetrics() { if (this.intervalToGc != null) { log.info("Calculating GC metrics"); // Setup the output file if we're outputting per-target coverage FormatUtil fmt = new FormatUtil(); final PrintWriter out; try { if (perTargetOutput != null) { out = new PrintWriter(perTargetOutput); out.println("chrom\tstart\tend\tlength\tname\t%gc\tmean_coverage\tnormalized_coverage"); } else { out = null; } } catch (IOException ioe) { throw new RuntimeIOException(ioe); } final int bins = 101; final long[] targetBasesByGc = new long[bins]; final long[] alignedBasesByGc = new long[bins]; for (final Map.Entry<Interval,Coverage> entry : this.coverageByTarget.entrySet()) { final Interval interval = entry.getKey(); final Coverage cov = entry.getValue(); final double gcDouble = this.intervalToGc.get(interval); final int gc = (int) Math.round(gcDouble * 100); targetBasesByGc[gc] += interval.length(); alignedBasesByGc[gc] += cov.getTotal(); if (out != null) { final double coverage = cov.getTotal() / (double) interval.length(); out.println(interval.getSequence() + "\t" + interval.getStart() + "\t" + interval.getEnd() + "\t" + interval.length() + "\t" + interval.getName() + "\t" + fmt.format(gcDouble) + "\t" + fmt.format(coverage) + "\t" + fmt.format(coverage / this.metrics.MEAN_TARGET_COVERAGE) ); } } if (out != null) out.close(); // Total things up long totalTarget = 0; long totalBases = 0; for (int i=0; i<targetBasesByGc.length; ++i) { totalTarget += targetBasesByGc[i]; totalBases += alignedBasesByGc[i]; } // Re-express things as % of the totals and calculate dropout metrics for (int i=0; i<targetBasesByGc.length; ++i) { final double targetPct = targetBasesByGc[i] / (double) totalTarget; final double alignedPct = alignedBasesByGc[i] / (double) totalBases; double dropout = (alignedPct - targetPct) * 100d; if (dropout < 0) { dropout = Math.abs(dropout); if (i <=50) this.metrics.AT_DROPOUT += dropout; if (i >=50) this.metrics.GC_DROPOUT += dropout; } } } } @Override public void addMetricsToFile(MetricsFile<METRIC_TYPE, Integer> hsMetricsComparableMetricsFile) { hsMetricsComparableMetricsFile.addMetric(convertMetric(this.metrics)); } } /** * A simple class that is used to store the coverage information about an interval. * * @author Tim Fennell */ public static class Coverage { private final Interval interval; private final short[] depths; /** Constructs a new coverage object for the provided mapping with the desired padding either side. */ public Coverage(final Interval i, final int padding) { this.interval = i; this.depths = new short[interval.length() + 2*padding]; } /** Adds a single point of depth at the desired offset into the coverage array. */ public void addBase(final int offset) { if (offset >= 0 && offset < this.depths.length) { // Prevent overflow if depth is too great, while avoiding doubling memory requirement. if (this.depths[offset] < Short.MAX_VALUE) { this.depths[offset] += 1; } } } /** Returns true if any base in the range has coverage of > 1 */ public boolean hasCoverage() { for (final short s : depths) { if (s > 1) return true; } return false; } /** Gets the coverage depths as an array of shorts. */ public short[] getDepths() { return this.depths; } public int getTotal() { int total = 0; for (int i=0; i<depths.length; ++i) total += depths[i]; return total; } @Override public String toString() { return "TargetedMetricCollector(interval=" + interval + ", depths = [" + StringUtil.intValuesToString(this.depths) + "])"; } } } /** * For a sequencing run targeting specific regions of the genome this metric class holds metrics describing * how well those regions were targeted. */ class TargetMetrics extends MultilevelMetrics { /** The name of the PROBE_SET (BAIT SET, AMPLICON SET, ...) used in this metrics collection run */ public String PROBE_SET; /** The number of unique bases covered by the intervals of all probes in the probe set */ public long PROBE_TERRITORY; /** The number of unique bases covered by the intervals of all targets that should be covered */ public long TARGET_TERRITORY; /** The number of bases in the reference genome used for alignment. */ public long GENOME_SIZE; /** The total number of reads in the SAM or BAM file examined. */ public long TOTAL_READS; /** The number of reads that pass the vendor's filter. */ public long PF_READS; /** The number of bases in the SAM or BAM file to be examined */ public long PF_BASES; /** The number of PF reads that are not marked as duplicates. */ public long PF_UNIQUE_READS; // Tracks the number of read pairs that we see that are PF (used to calculate library size) */ public long PF_SELECTED_PAIRS; // Tracks the number of unique PF reads pairs we see (used to calc library size) public long PF_SELECTED_UNIQUE_PAIRS; /** The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. */ public long PF_UQ_READS_ALIGNED; /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */ public long PF_UQ_BASES_ALIGNED; /** The number of PF aligned probed that mapped to a baited region of the genome. */ public long ON_PROBE_BASES; /** The number of PF aligned bases that mapped to within a fixed interval of a probed region, but not on a baited region. */ public long NEAR_PROBE_BASES; /** The number of PF aligned bases that mapped to neither on or near a probe. */ public long OFF_PROBE_BASES; /** The number of PF aligned bases that mapped to a targeted region of the genome. */ public long ON_TARGET_BASES; /** The number of PF aligned bases that are mapped in pair to a targeted region of the genome. */ public long ON_TARGET_FROM_PAIR_BASES; //metrics below here are derived after collection /** PF reads / total reads. The percent of reads passing filter. */ public double PCT_PF_READS; /** PF Unique Reads / Total Reads. */ public double PCT_PF_UQ_READS; /** PF Reads Aligned / PF Reads. */ public double PCT_PF_UQ_READS_ALIGNED; /** On+Near Bait Bases / PF Bases Aligned. */ public double PCT_SELECTED_BASES; /** The percentage of aligned PF bases that mapped neither on or near a probe. */ public double PCT_OFF_PROBE; /** The percentage of on+near probe bases that are on as opposed to near. */ public double ON_PROBE_VS_SELECTED; /** The mean coverage of all probes in the experiment. */ public double MEAN_PROBE_COVERAGE; /** The fold by which the probed region has been amplified above genomic background. */ public double FOLD_ENRICHMENT; /** The mean coverage of targets that recieved at least coverage depth = 2 at one base. */ public double MEAN_TARGET_COVERAGE; /** The number of targets that did not reach coverage=2 over any base. */ public double ZERO_CVG_TARGETS_PCT; /** * The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to * the mean coverage level in those targets. */ public double FOLD_80_BASE_PENALTY; /** The percentage of ALL target bases acheiving 2X or greater coverage. */ public double PCT_TARGET_BASES_2X; /** The percentage of ALL target bases acheiving 10X or greater coverage. */ public double PCT_TARGET_BASES_10X; /** The percentage of ALL target bases acheiving 20X or greater coverage. */ public double PCT_TARGET_BASES_20X; /** The percentage of ALL target bases acheiving 30X or greater coverage. */ public double PCT_TARGET_BASES_30X; /** The percentage of ALL target bases acheiving 40X or greater coverage. */ public double PCT_TARGET_BASES_40X; /** The percentage of ALL target bases acheiving 50X or greater coverage. */ public double PCT_TARGET_BASES_50X; /** The percentage of ALL target bases acheiving 100X or greater coverage. */ public double PCT_TARGET_BASES_100X; /** * A measure of how undercovered <= 50% GC regions are relative to the mean. For each GC bin [0..50] * we calculate a = % of target territory, and b = % of aligned reads aligned to these targets. * AT DROPOUT is then abs(sum(a-b when a-b < 0)). E.g. if the value is 5% this implies that 5% of total * reads that should have mapped to GC<=50% regions mapped elsewhere. */ public double AT_DROPOUT; /** * A measure of how undercovered >= 50% GC regions are relative to the mean. For each GC bin [50..100] * we calculate a = % of target territory, and b = % of aligned reads aligned to these targets. * GC DROPOUT is then abs(sum(a-b when a-b < 0)). E.g. if the value is 5% this implies that 5% of total * reads that should have mapped to GC>=50% regions mapped elsewhere. */ public double GC_DROPOUT; }