package picard.vcf; /* * The MIT License * * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ import htsjdk.samtools.metrics.MetricsFile; import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.List; /** * Test for CollectVariantCallingMetrics * * @author Joel Thibault (thibault at broadinstitute dot org) */ public class CollectVariantCallingMetricsTest { private static final File TEST_DATA_DIR = new File("testdata/picard/vcf"); @Test public void testMetricsTiny() throws IOException { final File dbSnpFile = new File(TEST_DATA_DIR, "mini.dbsnp.vcf"); final File vcfFile = new File(TEST_DATA_DIR, "mini.vcf"); final File outFile = new File(TEST_DATA_DIR, "vcmetrics_tiny"); final File summaryFile = new File(TEST_DATA_DIR, "vcmetrics_tiny.variant_calling_summary_metrics"); final File detailFile = new File(TEST_DATA_DIR, "vcmetrics_tiny.variant_calling_detail_metrics"); summaryFile.deleteOnExit(); detailFile.deleteOnExit(); final CollectVariantCallingMetrics program = new CollectVariantCallingMetrics(); program.INPUT = vcfFile; program.DBSNP = dbSnpFile; program.OUTPUT = outFile; Assert.assertEquals(program.doWork(), 0); final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>> summary = new MetricsFile<>(); summary.read(new FileReader(summaryFile)); boolean parsedSummary = false; for (final CollectVariantCallingMetrics.VariantCallingSummaryMetrics metrics : summary.getMetrics()) { Assert.assertEquals(metrics.TOTAL_SNPS, 597); Assert.assertEquals(metrics.NOVEL_SNPS, 265); Assert.assertEquals(metrics.NUM_IN_DB_SNP, 332); Assert.assertEquals(metrics.PCT_DBSNP, 0.5561140179634094, 0.01); Assert.assertEquals(metrics.DBSNP_TITV, 3.955224, 0.01); Assert.assertEquals(metrics.NOVEL_TITV, 3.206349, 0.01); Assert.assertEquals(metrics.TOTAL_INDELS, 29); Assert.assertEquals(metrics.NOVEL_INDELS, 11); Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 18); Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.62069, 0.01); Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.125, 0.01); Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.375, 0.01); Assert.assertEquals(metrics.NUM_SINGLETONS, 245); parsedSummary = true; } Assert.assertTrue(parsedSummary, "Did not parse summary metrics."); final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>> detail = new MetricsFile<>(); detail.read(new FileReader(detailFile)); final List<CollectVariantCallingMetrics.VariantCallingDetailMetrics> detailMetrics = detail.getMetrics(); detail.getMetrics().stream().filter(metrics -> metrics.SAMPLE_ALIAS.equals("HG00160")).forEach(metrics -> { Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 0.72549, 0.0001); Assert.assertEquals(metrics.TOTAL_GQ0_VARIANTS, 2); Assert.assertEquals(metrics.PCT_GQ0_VARIANTS, 0.022727); Assert.assertEquals(metrics.TOTAL_SNPS, 81); Assert.assertEquals(metrics.NUM_IN_DB_SNP, 44); Assert.assertEquals(metrics.NOVEL_SNPS, 37); Assert.assertEquals(metrics.PCT_DBSNP, 0.543210, 0.01); Assert.assertEquals(metrics.DBSNP_TITV, 6.333333, 0.01); Assert.assertEquals(metrics.NOVEL_TITV, 2.7, 0.01); Assert.assertEquals(metrics.TOTAL_INDELS, 6); Assert.assertEquals(metrics.NOVEL_INDELS, 3); Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 3); Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.5, 0.01); Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.0, 0.01); Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.0, 0.01); Assert.assertEquals(metrics.TOTAL_MULTIALLELIC_SNPS, 0.0, 0.01); Assert.assertEquals(metrics.NUM_IN_DB_SNP_MULTIALLELIC, 0, 0.01); Assert.assertEquals(metrics.TOTAL_COMPLEX_INDELS, 1.0, 0.01); Assert.assertEquals(metrics.NUM_IN_DB_SNP_COMPLEX_INDELS, 0, 0.01); Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.510204, 0.01); Assert.assertEquals(metrics.NUM_SINGLETONS, 3); }); Assert.assertEquals(detailMetrics.size(), 50, "Did not parse the desired number of detail metrics."); } @Test public void testMetricsTinyGVCF() throws IOException { final File dbSnpFile = new File(TEST_DATA_DIR, "mini.dbsnp.vcf"); final File vcfFile = new File(TEST_DATA_DIR, "mini_gvcf.vcf"); final File outFile = new File(TEST_DATA_DIR, "vcmetrics_tiny_gvcf"); final File summaryFile = new File(outFile+".variant_calling_summary_metrics"); final File detailFile = new File(outFile+".variant_calling_detail_metrics"); summaryFile.deleteOnExit(); detailFile.deleteOnExit(); final CollectVariantCallingMetrics program = new CollectVariantCallingMetrics(); program.INPUT = vcfFile; program.DBSNP = dbSnpFile; program.OUTPUT = outFile; program.GVCF_INPUT = true; Assert.assertEquals(program.doWork(), 0); final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>> summary = new MetricsFile<>(); summary.read(new FileReader(summaryFile)); boolean parsedSummary = false; for (final CollectVariantCallingMetrics.VariantCallingSummaryMetrics metrics : summary.getMetrics()) { Assert.assertEquals(metrics.TOTAL_SNPS, 20); Assert.assertEquals(metrics.NOVEL_SNPS, 19); Assert.assertEquals(metrics.NUM_IN_DB_SNP, 1); Assert.assertEquals(metrics.FILTERED_SNPS, 0); Assert.assertEquals(metrics.PCT_DBSNP, 0.05, 0.001); Assert.assertEquals(metrics.DBSNP_TITV, 0D, 0.01); Assert.assertEquals(metrics.NOVEL_TITV, 12D/(19-12), 0.01); Assert.assertEquals(metrics.TOTAL_INDELS, 7); Assert.assertEquals(metrics.NOVEL_INDELS, 7); Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 0); Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO,3/4D,0.01); Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0, 0.01); Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0, 0.01); Assert.assertEquals(metrics.NUM_SINGLETONS, 8); parsedSummary = true; } Assert.assertTrue(parsedSummary, "Did not parse summary metrics."); final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>> detail = new MetricsFile<>(); detail.read(new FileReader(detailFile)); final List<CollectVariantCallingMetrics.VariantCallingDetailMetrics> detailMetrics = detail.getMetrics(); detail.getMetrics().stream().filter(metrics -> metrics.SAMPLE_ALIAS.equals("NA12878")).forEach(metrics -> { Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 0.421053, 0.0001); Assert.assertEquals(metrics.TOTAL_GQ0_VARIANTS, 2); Assert.assertEquals(metrics.PCT_GQ0_VARIANTS, 0.074074); Assert.assertEquals(metrics.TOTAL_SNPS, 20); Assert.assertEquals(metrics.NUM_IN_DB_SNP, 1); Assert.assertEquals(metrics.NOVEL_SNPS, 19); Assert.assertEquals(metrics.PCT_DBSNP, 1D/20, 0.01); Assert.assertEquals(metrics.DBSNP_TITV, 0D, 0.01); Assert.assertEquals(metrics.NOVEL_TITV, 12D/(19-12), 0.01); Assert.assertEquals(metrics.TOTAL_INDELS, 7); Assert.assertEquals(metrics.NOVEL_INDELS, 7); Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 0); Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0, 0.01); Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.0, 0.01); Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 3/4D, 0.01); Assert.assertEquals(metrics.TOTAL_MULTIALLELIC_SNPS, 0.0, 0.01); Assert.assertEquals(metrics.NUM_IN_DB_SNP_MULTIALLELIC, 0, 0.01); Assert.assertEquals(metrics.TOTAL_COMPLEX_INDELS, 0, 0.01); Assert.assertEquals(metrics.NUM_IN_DB_SNP_COMPLEX_INDELS, 0, 0.01); Assert.assertEquals(metrics.NUM_SINGLETONS, 8); }); Assert.assertEquals(detailMetrics.size(), 1, "Did not parse the expected number of detail metrics."); } }