/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis.directed; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.IntervalList; import picard.analysis.MetricAccumulationLevel; import picard.sam.DuplicationMetrics; import java.io.File; import java.util.List; import java.util.Set; /** * Calculates HS metrics for a given SAM or BAM file. Requires the input of a list of * target intervals and a list of bait intervals. Can be invoked either on an entire * iterator of SAMRecords or be passed SAMRecords one at a time. * * @author Jonathan Burke */ public class HsMetricCollector extends TargetMetricsCollector<HsMetrics> { public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords, final ReferenceSequenceFile refFile, final File perTargetCoverage, final IntervalList targetIntervals, final IntervalList probeIntervals, final String probeSetName) { super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName); } @Override public HsMetrics convertMetric(final TargetMetrics targetMetrics) { final HsMetrics hsMetrics = new HsMetrics(); TargetMetricsCollector.reflectiveCopy(targetMetrics, hsMetrics, new String[]{"PROBE_SET", "PROBE_TERRITORY", "ON_PROBE_BASES", "NEAR_PROBE_BASES", "OFF_PROBE_BASES", "PCT_OFF_PROBE", "ON_PROBE_VS_SELECTED", "MEAN_PROBE_COVERAGE"}, new String[]{"BAIT_SET", "BAIT_TERRITORY", "ON_BAIT_BASES", "NEAR_BAIT_BASES", "OFF_BAIT_BASES", "PCT_OFF_BAIT", "ON_BAIT_VS_SELECTED", "MEAN_BAIT_COVERAGE"} ); hsMetrics.BAIT_DESIGN_EFFICIENCY = (double) hsMetrics.TARGET_TERRITORY / (double) hsMetrics.BAIT_TERRITORY; hsMetrics.PCT_USABLE_BASES_ON_BAIT = hsMetrics.ON_BAIT_BASES / (double) targetMetrics.PF_BASES; hsMetrics.PCT_USABLE_BASES_ON_TARGET = hsMetrics.ON_TARGET_BASES / (double) targetMetrics.PF_BASES; hsMetrics.HS_LIBRARY_SIZE = DuplicationMetrics.estimateLibrarySize(targetMetrics.PF_SELECTED_PAIRS, targetMetrics.PF_SELECTED_UNIQUE_PAIRS); //need HSLIBRARY_SIZE hsMetrics.HS_PENALTY_10X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 10); hsMetrics.HS_PENALTY_20X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 20); hsMetrics.HS_PENALTY_30X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 30); hsMetrics.HS_PENALTY_40X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 40); hsMetrics.HS_PENALTY_50X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 50); hsMetrics.HS_PENALTY_100X = calculateHsPenalty(hsMetrics.HS_LIBRARY_SIZE, targetMetrics, 100); return hsMetrics; } /** * Attempts to calculate the HS penalty incurred by the library in order to get 80% * of target bases (in non-zero-covered targets) to a specific target coverage (e.g. 20X). * * @param coverageGoal the desired coverage target (e.g. 20X) * @return the hs penalty - a multiplier that tells if you want, e.g. 20X coverage, then you will * need to produce this many PF aligned bases per target bases in your design! */ private double calculateHsPenalty(final Long librarySize, final TargetMetrics targetMetrics, final int coverageGoal) { if (librarySize == null) return 0; final double meanCoverage = targetMetrics.ON_TARGET_FROM_PAIR_BASES / (double) targetMetrics.TARGET_TERRITORY; final double fold80 = targetMetrics.FOLD_80_BASE_PENALTY; final long pairs = targetMetrics.PF_SELECTED_PAIRS; final long uniquePairs = targetMetrics.PF_SELECTED_UNIQUE_PAIRS; final double onTargetPct = (double) targetMetrics.ON_TARGET_BASES / (double) targetMetrics.PF_UQ_BASES_ALIGNED; final double uniquePairGoalMultiplier = (coverageGoal / meanCoverage) * fold80; double pairMultiplier = uniquePairGoalMultiplier; double increment = 1; boolean goingUp = uniquePairGoalMultiplier >= 1; double finalPairMultiplier = -1; // Converge "pairMultiplier" to the number that gives us a uniquePairMultiplier equal // to the coverage multiplier we desire. If we can't get there with 1000X coverage, // we're not going to get there! for (int i=0; i<10000; ++i) { final double uniquePairMultiplier = DuplicationMetrics.estimateRoi(librarySize, pairMultiplier, pairs, uniquePairs); if (Math.abs(uniquePairMultiplier - uniquePairGoalMultiplier) / uniquePairGoalMultiplier <= 0.001) { finalPairMultiplier = pairMultiplier; break; } else if ((uniquePairMultiplier > uniquePairGoalMultiplier && goingUp) || (uniquePairMultiplier < uniquePairGoalMultiplier && !goingUp)){ increment /= 2; goingUp = !goingUp; } pairMultiplier += (goingUp ? increment : -increment); } if (finalPairMultiplier == -1) { return -1; } else { final double uniqueFraction = (uniquePairs * uniquePairGoalMultiplier) / (pairs * finalPairMultiplier); return (1 / uniqueFraction) * fold80 * (1 / onTargetPct); } } }