/* * The MIT License * * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.Histogram; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.ArrayList; import java.util.List; import java.util.Arrays; import java.io.FileReader; import java.io.File; import java.util.Scanner; /** * Created by davidben on 5/18/15. */ public class TheoreticalSensitivityTest { private final static File TEST_DIR = new File("testdata/picard/analysis/TheoreticalSensitivity/"); private final static File DEPTH = new File(TEST_DIR, "Solexa332667_DepthDist.histo"); private final static File BASEQ = new File(TEST_DIR, "Solexa332667_BaseQ.histo"); @Test public void testRouletteWheel() throws Exception { //test that a deterministic roulette wheel only gives one value final double[] deterministicWeights = {0.0, 1.0, 0.0}; final TheoreticalSensitivity.RouletteWheel deterministicWheel = new TheoreticalSensitivity.RouletteWheel(deterministicWeights); for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicWheel.draw(), 1); //test the sums of this deterministic wheel: a sum of n 1's equals n final List<ArrayList<Integer>> deterministicSums = deterministicWheel.sampleCumulativeSums(10, 1, false); for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicSums.get(n).get(0), (Integer) n); } @Test public void testProportionsAboveThresholds() throws Exception { final List<ArrayList<Integer>> sums = new ArrayList<ArrayList<Integer>>(); sums.add(new ArrayList<Integer>(Arrays.asList(0,0,0))); sums.add(new ArrayList<Integer>(Arrays.asList(10, 10))); sums.add(new ArrayList<Integer>(Arrays.asList(5, 11, -2, 4))); final List<Double> thresholds = Arrays.asList(-1.0, 1.0, 6.0); Assert.assertEquals(sums.size(), 3); Assert.assertEquals(thresholds.size(), 3); final List<ArrayList<Double>> proportions = TheoreticalSensitivity.proportionsAboveThresholds(sums, thresholds); Assert.assertEquals(proportions.size(), 3); Assert.assertEquals(proportions.get(0).get(0), (double) 3/3); Assert.assertEquals(proportions.get(0).get(1), (double) 0/3); Assert.assertEquals(proportions.get(0).get(2), (double) 0/3); Assert.assertEquals(proportions.get(1).get(0), (double) 2/2); Assert.assertEquals(proportions.get(1).get(1), (double) 2/2); Assert.assertEquals(proportions.get(1).get(2), (double) 2/2); Assert.assertEquals(proportions.get(2).get(0), (double) 3/4); Assert.assertEquals(proportions.get(2).get(1), (double) 3/4); Assert.assertEquals(proportions.get(2).get(2), (double) 1/4); } @Test public void testHetAltDepthDistribution() throws Exception { final int N = 6; final double p = 0.5; final List<ArrayList<Double>> distribution = TheoreticalSensitivity.hetAltDepthDistribution(N); for (int n = 0; n < N-1; n++) { for (int m = 0; m <= n; m++) { //TODO: java has no built-in binomial coefficient when this is in hellbender, use apache commons int binomialCoefficient = 1; for (int i = n; i > (n - m); i--) binomialCoefficient *= i; for (int i = m; i > 0; i--) binomialCoefficient /= i; Assert.assertEquals(distribution.get(n).get(m), binomialCoefficient*Math.pow(p,n)); } } } //test that large-sample sums from the RouletteWheel converge to a normal distribution //using the empirical CDF as measured by proportionsAboveThresholds @Test public void testCentralLimitTheorem() throws Exception { //use a RouletteWheel that gives 0, 1, 2 with equal probability final double[] weights = {1.0, 1.0, 1.0}; final TheoreticalSensitivity.RouletteWheel wheel = new TheoreticalSensitivity.RouletteWheel(weights); final int sampleSize = 1000; final int numSummands = 100; //the mean and standard deviation of a single roulette draw and of many draws final double muSingleDraw = 1.0; final double sigmaSingleDraw = Math.sqrt(2.0 / 3.0); final double mu = numSummands * muSingleDraw; final double sigma = Math.sqrt(numSummands) * sigmaSingleDraw; //test the sums of this deterministic wheel: a sum of n 1's equals n final List<ArrayList<Integer>> sums = wheel.sampleCumulativeSums(numSummands, sampleSize, false); //we only want the last set of sums, those with numSummands summands sums.subList(0, sums.size() - 1).clear(); Assert.assertEquals(sums.size(), 1); //test whether the number of elements within one standard deviation agrees with the normal distribution final List<Double> thresholds = Arrays.asList(mu - sigma, mu + sigma); //sums is 1 x sampleSize, thresholds is a 2-vector, so proportions is 1 x 2 final List<ArrayList<Double>> proportions = TheoreticalSensitivity.proportionsAboveThresholds(sums, thresholds); final double empiricalProportionWithinOneSigma = proportions.get(0).get(0) - proportions.get(0).get(1); //the proportion within one sigma for the normal distribution //hence whether any element falls within one sigma is a Bernoulli variable final double theoreticalProportionWithinOneSigma = 0.682689492; final double samplingStandardDeviationOfProportion = Math.sqrt(theoreticalProportionWithinOneSigma*(1-theoreticalProportionWithinOneSigma) / sampleSize); Assert.assertEquals(empiricalProportionWithinOneSigma, theoreticalProportionWithinOneSigma, 5*samplingStandardDeviationOfProportion); } //Put it all together for deterministic quality and depths @Test public void testDeterministicQualityAndDepth() throws Exception { final double logOddsThreshold = 0.0; final double tolerance = 0.001; final int sampleSize = 1; //quality is deterministic, hence no sampling error for (int q = 5; q < 10; q++) { for (int n = 5; n < 10; n++) { final double minAltCount = 10*n*Math.log10(2)/q; //alts required to call when log odds ratio threshold = 1 double expectedResult = 0.0; final List<ArrayList<Double>> altCountProbabilities = TheoreticalSensitivity.hetAltDepthDistribution(n+1); for (int altCount = n; altCount > minAltCount; altCount--) { expectedResult += altCountProbabilities.get(n).get(altCount); } //deterministic weights that always yield q are 0.0 for 0 through q - 1 and 1.0 for q final double[] qualityDistribution = new double[q+1]; Arrays.fill(qualityDistribution, 0L); qualityDistribution[qualityDistribution.length-1]=1L; final double[] depthDistribution = new double[n+1]; Arrays.fill(depthDistribution, 0L); depthDistribution[depthDistribution.length-1]=1L; final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); Assert.assertEquals(result, expectedResult, tolerance); } } } @Test public void testHetSensDistributions() throws Exception { //Expect theoretical sens to be close to .9617 for Solexa-332667 final double tolerance = 0.02; final double expectedResult = .9617; final int maxDepth = 500; final double [] depthDistribution = new double[maxDepth+1]; final double [] qualityDistribution = new double[50]; final Scanner scanDepth = new Scanner(DEPTH); for (int i = 0; scanDepth.hasNextDouble(); i++) { depthDistribution[i] = scanDepth.nextDouble(); } final Scanner scanBaseQ = new Scanner(BASEQ); for (int j = 0; scanBaseQ.hasNextDouble(); j++) { qualityDistribution[j] = scanBaseQ.nextDouble(); } final int sampleSize = 1_000; final double logOddsThreshold = 3.0; final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); Assert.assertEquals(result, expectedResult, tolerance); } @DataProvider(name = "hetSensDataProvider") public Object[][] hetSensDataProvider() { final File wgsMetricsFile = new File(TEST_DIR, "test_Solexa-332667.wgs_metrics"); final File targetedMetricsFile = new File(TEST_DIR, "test_25103070136.targeted_pcr_metrics"); //These magic numbers come from a separate implementation of the code in R. return new Object[][] { {0.897_342_54, wgsMetricsFile}, {0.956_186_66, targetedMetricsFile} }; } @Test(dataProvider = "hetSensDataProvider") public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception{ final double tolerance = 0.000_000_01; final MetricsFile Metrics = new MetricsFile(); Metrics.read(new FileReader(metricsFile)); final List<Histogram> histograms = Metrics.getAllHistograms(); final Histogram depthHistogram = histograms.get(0); final Histogram qualityHistogram = histograms.get(1); final double [] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram); final double [] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); final int sampleSize = 1_000; final double logOddsThreshold = 3.0; final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); Assert.assertEquals(result, expected, tolerance); } }