package picard.vcf; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.FormatUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.vcf.GenotypeConcordanceStates.CallState; import picard.vcf.GenotypeConcordanceStates.TruthAndCallStates; import picard.vcf.GenotypeConcordanceStates.TruthState; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; public class GenotypeConcordanceTest { private static final File OUTPUT_DATA_PATH = IOUtil.createTempDir("GenotypeConcordanceTest", null); private static final File TEST_DATA_PATH = new File("testdata/picard/vcf/"); // Test VCFs private static final File CEU_TRIOS_SNPS_VCF = new File(TEST_DATA_PATH, "CEUTrio-snps.vcf"); private static final File CEU_TRIOS_INDELS_VCF = new File(TEST_DATA_PATH, "CEUTrio-indels.vcf"); // Test that we notice a difference on the first line private static final File CEU_TRIOS_SNPS_FIRST_LINE_DIFF_VCF = new File(TEST_DATA_PATH, "CEUTrio-snps_first_line_diff.vcf"); // Test that we notice a difference on the last line private static final File CEU_TRIOS_SNPS_LAST_LINE_DIFF_VCF = new File(TEST_DATA_PATH, "CEUTrio-snps_last_line_diff.vcf"); // Test that we notice a deleted line private static final File CEU_TRIOS_SNPS_DEL_LINE_VCF = new File(TEST_DATA_PATH, "CEUTrio-snps_del_line.vcf"); // Existing/expected base metrics file names private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC = "CEUTrio-snps_vs_CEUTrio-snps_GtConcordanceDiff"; private static final String CEU_TRIOS_INDELS_VS_CEU_TRIOS_INDELS_GC = "CEUTrio-indels_vs_CEUTrio-indels_GtConcordanceDiff"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_FIRST_LINE_DIFF_GC = "CEUTrio-snps_CEUTrio-snps_first_line_GtConcordanceDiff"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_LAST_LINE_DIFF_GC = "CEUTrio-snps_CEUTrio-snps_last_line_GtConcordanceDiff"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_DEL_LINE_GC = "CEUTrio-snps_CEUTrio-snps_del_line_GtConcordanceDiff"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_ALL_ROWS = "CEUTrio-snps_vs_CEUTrio-snps_GtConcordanceDiff_AllRows"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_MIN_GQ = "CEUTrio-snps_vs_CEUTrio-snps_GtConcordanceDiff_MinGq"; private static final String CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_MIN_DP = "CEUTrio-snps_vs_CEUTrio-snps_GtConcordanceDiff_MinDp"; private static final File INTERVALS_FILE = new File(TEST_DATA_PATH, "IntervalListChr1Small.interval_list"); private static final String TRUTH_SAMPLE_NAME = "Foo"; private static final String CALL_SAMPLE_NAME = "Foo"; // A [ref] / T at 10 private static final String snpLoc = "chr1"; private static final int snpLocStart = 10; private static final int snpLocStop = 10; private static final Allele Aref = Allele.create("A", true); private static final Allele C = Allele.create("C"); private static final Allele G = Allele.create("G"); private static final Allele T = Allele.create("T"); private static final Allele AA = Allele.create("AA"); private static final Allele AAA = Allele.create("AAA"); private static final Allele AAAA = Allele.create("AAAA"); private static final Allele AAAAA = Allele.create("AAAAA"); @AfterClass public void teardown() { IOUtil.deleteDirectoryTree(OUTPUT_DATA_PATH); } @DataProvider(name = "genotypeConcordanceTestFileData") public Object[][] getGenotypeConcordanceTestFileData() { return new Object[][]{ {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_VCF, "NA12878", null, null, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC}, {CEU_TRIOS_INDELS_VCF, "NA12878", CEU_TRIOS_INDELS_VCF, "NA12878", null, null, false, CEU_TRIOS_INDELS_VS_CEU_TRIOS_INDELS_GC}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_FIRST_LINE_DIFF_VCF, "NA12878", null, null, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_FIRST_LINE_DIFF_GC}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_LAST_LINE_DIFF_VCF, "NA12878", null, null, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_LAST_LINE_DIFF_GC}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_DEL_LINE_VCF, "NA12878", null, null, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_DEL_LINE_GC}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_VCF, "NA12878", null, null, true, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_ALL_ROWS}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_VCF, "NA12891", 40, null, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_MIN_GQ}, {CEU_TRIOS_SNPS_VCF, "NA12878", CEU_TRIOS_SNPS_VCF, "NA12891", null, 40, false, CEU_TRIOS_SNPS_VS_CEU_TRIOS_SNPS_GC_MIN_DP} }; } @Test(dataProvider = "genotypeConcordanceTestFileData") public void testGenotypeConcordance(final File vcf1, final String sample1, final File vcf2, final String sample2, final Integer minGq, final Integer minDp, final boolean outputAllRows, final String expectedOutputFileBaseName) throws Exception { final File outputBaseFileName = new File(OUTPUT_DATA_PATH, "actualGtConc"); final File outputSummaryFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.SUMMARY_METRICS_FILE_EXTENSION); final File outputDetailsFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.DETAILED_METRICS_FILE_EXTENSION); final File outputContingencyFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.CONTINGENCY_METRICS_FILE_EXTENSION); outputSummaryFile.deleteOnExit(); outputDetailsFile.deleteOnExit(); outputContingencyFile.deleteOnExit(); final GenotypeConcordance genotypeConcordance = new GenotypeConcordance(); genotypeConcordance.TRUTH_VCF = vcf1; genotypeConcordance.TRUTH_SAMPLE = sample1; genotypeConcordance.CALL_VCF = vcf2; genotypeConcordance.CALL_SAMPLE = sample2; if (minGq != null) genotypeConcordance.MIN_GQ = minGq; if (minDp != null) genotypeConcordance.MIN_DP = minDp; genotypeConcordance.OUTPUT_ALL_ROWS = outputAllRows; genotypeConcordance.OUTPUT = outputBaseFileName; Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); assertMetricsFileEqual(outputSummaryFile, new File(TEST_DATA_PATH, expectedOutputFileBaseName + GenotypeConcordance.SUMMARY_METRICS_FILE_EXTENSION)); assertMetricsFileEqual(outputDetailsFile, new File(TEST_DATA_PATH, expectedOutputFileBaseName + GenotypeConcordance.DETAILED_METRICS_FILE_EXTENSION)); assertMetricsFileEqual(outputContingencyFile, new File(TEST_DATA_PATH, expectedOutputFileBaseName + GenotypeConcordance.CONTINGENCY_METRICS_FILE_EXTENSION)); } private void assertMetricsFileEqual(final File actualMetricsFile, final File expectedMetricsFile) throws FileNotFoundException { // Actual metrics file final MetricsFile<GenotypeConcordanceSummaryMetrics, Comparable<?>> actual = new MetricsFile<GenotypeConcordanceSummaryMetrics, Comparable<?>>(); actual.read(new FileReader(actualMetricsFile)); // Expected metrics file final MetricsFile<GenotypeConcordanceSummaryMetrics, Comparable<?>> expected = new MetricsFile<GenotypeConcordanceSummaryMetrics, Comparable<?>>(); expected.read(new FileReader(expectedMetricsFile)); // Note - cannot use .equals as it calls .areHeadersEqual and they are not since the timestamp (at a minimum is different) Assert.assertTrue(expected.areMetricsEqual(actual)); Assert.assertTrue(expected.areHistogramsEqual(actual)); } @Test public void testGenotypeConcordanceDetails() throws Exception { final File outputBaseFileName = new File(OUTPUT_DATA_PATH, "actualGtConc"); final File outputSummaryFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.SUMMARY_METRICS_FILE_EXTENSION); final File outputDetailsFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.DETAILED_METRICS_FILE_EXTENSION); outputSummaryFile.deleteOnExit(); outputDetailsFile.deleteOnExit(); final GenotypeConcordance genotypeConcordance = new GenotypeConcordance(); genotypeConcordance.TRUTH_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.TRUTH_SAMPLE = "NA12878"; genotypeConcordance.CALL_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.CALL_SAMPLE = "NA12878"; genotypeConcordance.OUTPUT = outputBaseFileName; Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); final Map<TruthAndCallStates, Integer> nonZeroCounts = new HashMap<TruthAndCallStates, Integer>(); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HET_REF_VAR1), 104); nonZeroCounts.put(new TruthAndCallStates(TruthState.HOM_VAR1, CallState.HOM_VAR1), 59); nonZeroCounts.put(new TruthAndCallStates(TruthState.VC_FILTERED, CallState.VC_FILTERED), 40); GenotypeConcordanceCounts concordanceCounts = genotypeConcordance.getSnpCounter(); assertNonZeroCountsAgree(concordanceCounts, nonZeroCounts); final FormatUtil fmt = new FormatUtil(); final GenotypeConcordanceScheme scheme = new GenotypeConcordanceScheme(); concordanceCounts.validateCountsAgainstScheme(scheme); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HET_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HET_CALL_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HOM_VAR_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HOM_VAR_CALL_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.VAR_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.VAR_CALL_STATES)), "1"); // Now run it again with different samples genotypeConcordance.TRUTH_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.TRUTH_SAMPLE = "NA12878"; genotypeConcordance.CALL_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.CALL_SAMPLE = "NA12891"; Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); nonZeroCounts.clear(); nonZeroCounts.put(new TruthAndCallStates(TruthState.HOM_REF, CallState.HET_REF_VAR1), 31); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HOM_REF), 30); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HET_REF_VAR1), 50); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HOM_VAR1), 24); nonZeroCounts.put(new TruthAndCallStates(TruthState.HOM_VAR1, CallState.HET_REF_VAR1), 18); nonZeroCounts.put(new TruthAndCallStates(TruthState.HOM_VAR1, CallState.HOM_VAR1), 41); nonZeroCounts.put(new TruthAndCallStates(TruthState.VC_FILTERED, CallState.VC_FILTERED), 49); concordanceCounts = genotypeConcordance.getSnpCounter(); assertNonZeroCountsAgree(concordanceCounts, nonZeroCounts); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HET_TRUTH_STATES)), "0.711538"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HET_CALL_STATES)), "0.686869"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HOM_VAR_TRUTH_STATES)), "0.766234"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HOM_VAR_CALL_STATES)), "0.730337"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.VAR_TRUTH_STATES)), "0.734807"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.VAR_CALL_STATES)), "0.707447"); } private void assertNonZeroCountsAgree(final GenotypeConcordanceCounts counter, final Map<TruthAndCallStates, Integer> expectedCountMap) { for (final TruthState truthState : TruthState.values()) { for (final CallState callState : CallState.values()) { Integer expectedCount = expectedCountMap.get(new TruthAndCallStates(truthState, callState)); if (expectedCount == null) expectedCount = 0; Assert.assertEquals(counter.getCount(truthState, callState), expectedCount.intValue()); } } } @Test public void testGenotypeConcordanceDetailsWithIntervals() throws Exception { final File outputBaseFileName = new File(OUTPUT_DATA_PATH, "actualGtConc"); final File outputSummaryFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.SUMMARY_METRICS_FILE_EXTENSION); final File outputDetailsFile = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.DETAILED_METRICS_FILE_EXTENSION); outputSummaryFile.deleteOnExit(); outputDetailsFile.deleteOnExit(); final GenotypeConcordance genotypeConcordance = new GenotypeConcordance(); genotypeConcordance.TRUTH_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.TRUTH_SAMPLE = "NA12878"; genotypeConcordance.CALL_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.CALL_SAMPLE = "NA12878"; genotypeConcordance.INTERVALS = Collections.singletonList(INTERVALS_FILE); genotypeConcordance.OUTPUT = outputBaseFileName; Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); final Map<TruthAndCallStates, Integer> nonZeroCounts = new HashMap<TruthAndCallStates, Integer>(); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HET_REF_VAR1), 1); nonZeroCounts.put(new TruthAndCallStates(TruthState.VC_FILTERED, CallState.VC_FILTERED), 2); GenotypeConcordanceCounts concordanceCounts = genotypeConcordance.getSnpCounter(); assertNonZeroCountsAgree(concordanceCounts, nonZeroCounts); final FormatUtil fmt = new FormatUtil(); final GenotypeConcordanceScheme scheme = new GenotypeConcordanceScheme(); concordanceCounts.validateCountsAgainstScheme(scheme); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HET_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HET_CALL_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HOM_VAR_TRUTH_STATES)), "?"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HOM_VAR_CALL_STATES)), "?"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.VAR_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.VAR_CALL_STATES)), "1"); // Now run it again with different samples genotypeConcordance.TRUTH_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.TRUTH_SAMPLE = "NA12878"; genotypeConcordance.CALL_VCF = CEU_TRIOS_SNPS_VCF; genotypeConcordance.CALL_SAMPLE = "NA12891"; genotypeConcordance.INTERVALS = Collections.singletonList(INTERVALS_FILE); Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); nonZeroCounts.clear(); nonZeroCounts.put(new TruthAndCallStates(TruthState.HOM_REF, CallState.HET_REF_VAR1), 1); nonZeroCounts.put(new TruthAndCallStates(TruthState.HET_REF_VAR1, CallState.HET_REF_VAR1), 1); nonZeroCounts.put(new TruthAndCallStates(TruthState.VC_FILTERED, CallState.VC_FILTERED), 2); concordanceCounts = genotypeConcordance.getSnpCounter(); assertNonZeroCountsAgree(concordanceCounts, nonZeroCounts); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HET_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HET_CALL_STATES)), "0.5"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.HOM_VAR_TRUTH_STATES)), "?"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.HOM_VAR_CALL_STATES)), "?"); Assert.assertEquals(fmt.format(concordanceCounts.getSensitivity(scheme, GenotypeConcordanceCounts.VAR_TRUTH_STATES)), "1"); Assert.assertEquals(fmt.format(concordanceCounts.Ppv(scheme, GenotypeConcordanceCounts.VAR_CALL_STATES)), "0.5"); } @DataProvider(name = "genotypeConcordanceDetermineStateDataProvider") public Object[][] genotypeConcordanceDetermineStateDataProvider() { final Object[][] originalUnitTestData = new Object[][]{ {Aref, Aref, TruthState.HOM_REF, Aref, Aref, CallState.HOM_REF}, {Aref, Aref, TruthState.HOM_REF, Aref, C, CallState.HET_REF_VAR1}, {Aref, Aref, TruthState.HOM_REF, Aref, G, CallState.HET_REF_VAR1}, {Aref, Aref, TruthState.HOM_REF, Aref, T, CallState.HET_REF_VAR1}, {Aref, Aref, TruthState.HOM_REF, C, G, CallState.HET_VAR1_VAR2}, {Aref, Aref, TruthState.HOM_REF, C, T, CallState.HET_VAR1_VAR2}, {Aref, Aref, TruthState.HOM_REF, G, T, CallState.HET_VAR1_VAR2}, {Aref, Aref, TruthState.HOM_REF, C, C, CallState.HOM_VAR1}, {Aref, Aref, TruthState.HOM_REF, G, G, CallState.HOM_VAR1}, {Aref, Aref, TruthState.HOM_REF, T, T, CallState.HOM_VAR1}, //--- {Aref, C, TruthState.HET_REF_VAR1, Aref, Aref, CallState.HOM_REF}, {Aref, G, TruthState.HET_REF_VAR1, Aref, Aref, CallState.HOM_REF}, {Aref, T, TruthState.HET_REF_VAR1, Aref, Aref, CallState.HOM_REF}, {Aref, C, TruthState.HET_REF_VAR1, Aref, C, CallState.HET_REF_VAR1}, {Aref, C, TruthState.HET_REF_VAR1, Aref, G, CallState.HET_REF_VAR2}, {Aref, C, TruthState.HET_REF_VAR1, Aref, T, CallState.HET_REF_VAR2}, {Aref, G, TruthState.HET_REF_VAR1, Aref, C, CallState.HET_REF_VAR2}, {Aref, G, TruthState.HET_REF_VAR1, Aref, G, CallState.HET_REF_VAR1}, {Aref, G, TruthState.HET_REF_VAR1, Aref, T, CallState.HET_REF_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, Aref, C, CallState.HET_REF_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, Aref, G, CallState.HET_REF_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, Aref, T, CallState.HET_REF_VAR1}, {Aref, C, TruthState.HET_REF_VAR1, C, G, CallState.HET_VAR1_VAR2}, {Aref, C, TruthState.HET_REF_VAR1, C, T, CallState.HET_VAR1_VAR2}, {Aref, C, TruthState.HET_REF_VAR1, G, T, CallState.HET_VAR3_VAR4}, // Why isn't this called HET_VAR2_VAR3??? {Aref, G, TruthState.HET_REF_VAR1, C, G, CallState.HET_VAR1_VAR2}, {Aref, G, TruthState.HET_REF_VAR1, C, T, CallState.HET_VAR3_VAR4}, {Aref, G, TruthState.HET_REF_VAR1, G, T, CallState.HET_VAR1_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, C, G, CallState.HET_VAR3_VAR4}, {Aref, T, TruthState.HET_REF_VAR1, C, T, CallState.HET_VAR1_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, G, T, CallState.HET_VAR1_VAR2}, {Aref, C, TruthState.HET_REF_VAR1, C, C, CallState.HOM_VAR1}, {Aref, C, TruthState.HET_REF_VAR1, G, G, CallState.HOM_VAR2}, {Aref, C, TruthState.HET_REF_VAR1, T, T, CallState.HOM_VAR2}, {Aref, G, TruthState.HET_REF_VAR1, C, C, CallState.HOM_VAR2}, {Aref, G, TruthState.HET_REF_VAR1, G, G, CallState.HOM_VAR1}, {Aref, G, TruthState.HET_REF_VAR1, T, T, CallState.HOM_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, C, C, CallState.HOM_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, G, G, CallState.HOM_VAR2}, {Aref, T, TruthState.HET_REF_VAR1, T, T, CallState.HOM_VAR1}, //--- {C, G, TruthState.HET_VAR1_VAR2, Aref, Aref, CallState.HOM_REF}, {C, T, TruthState.HET_VAR1_VAR2, Aref, Aref, CallState.HOM_REF}, {G, T, TruthState.HET_VAR1_VAR2, Aref, Aref, CallState.HOM_REF}, {C, G, TruthState.HET_VAR1_VAR2, Aref, C, CallState.HET_REF_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, Aref, G, CallState.HET_REF_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, Aref, T, CallState.HET_REF_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, Aref, C, CallState.HET_REF_VAR1}, {C, T, TruthState.HET_VAR1_VAR2, Aref, G, CallState.HET_REF_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, Aref, T, CallState.HET_REF_VAR1}, {G, T, TruthState.HET_VAR1_VAR2, Aref, C, CallState.HET_REF_VAR3}, {G, T, TruthState.HET_VAR1_VAR2, Aref, G, CallState.HET_REF_VAR1}, {G, T, TruthState.HET_VAR1_VAR2, Aref, T, CallState.HET_REF_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, C, C, CallState.HOM_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, G, G, CallState.HOM_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, T, T, CallState.HOM_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, C, C, CallState.HOM_VAR1}, {C, T, TruthState.HET_VAR1_VAR2, G, G, CallState.HOM_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, T, T, CallState.HOM_VAR1}, {G, T, TruthState.HET_VAR1_VAR2, C, C, CallState.HOM_VAR3}, {G, T, TruthState.HET_VAR1_VAR2, G, G, CallState.HOM_VAR1}, {G, T, TruthState.HET_VAR1_VAR2, T, T, CallState.HOM_VAR1}, {C, G, TruthState.HET_VAR1_VAR2, C, G, CallState.HET_VAR1_VAR2}, {C, G, TruthState.HET_VAR1_VAR2, C, T, CallState.HET_VAR1_VAR3}, {C, G, TruthState.HET_VAR1_VAR2, G, T, CallState.HET_VAR1_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, C, G, CallState.HET_VAR1_VAR3}, {C, T, TruthState.HET_VAR1_VAR2, C, T, CallState.HET_VAR1_VAR2}, {C, T, TruthState.HET_VAR1_VAR2, G, T, CallState.HET_VAR1_VAR3}, {G, T, TruthState.HET_VAR1_VAR2, C, G, CallState.HET_VAR1_VAR3}, {G, T, TruthState.HET_VAR1_VAR2, C, T, CallState.HET_VAR1_VAR3}, {G, T, TruthState.HET_VAR1_VAR2, G, T, CallState.HET_VAR1_VAR2}, //--- {C, C, TruthState.HOM_VAR1, Aref, Aref, CallState.HOM_REF}, {G, G, TruthState.HOM_VAR1, Aref, Aref, CallState.HOM_REF}, {T, T, TruthState.HOM_VAR1, Aref, Aref, CallState.HOM_REF}, {C, C, TruthState.HOM_VAR1, Aref, C, CallState.HET_REF_VAR1}, {C, C, TruthState.HOM_VAR1, Aref, G, CallState.HET_REF_VAR2}, {C, C, TruthState.HOM_VAR1, Aref, T, CallState.HET_REF_VAR2}, {G, G, TruthState.HOM_VAR1, Aref, C, CallState.HET_REF_VAR2}, {G, G, TruthState.HOM_VAR1, Aref, G, CallState.HET_REF_VAR1}, {G, G, TruthState.HOM_VAR1, Aref, T, CallState.HET_REF_VAR2}, {T, T, TruthState.HOM_VAR1, Aref, C, CallState.HET_REF_VAR2}, {T, T, TruthState.HOM_VAR1, Aref, G, CallState.HET_REF_VAR2}, {T, T, TruthState.HOM_VAR1, Aref, T, CallState.HET_REF_VAR1}, {C, C, TruthState.HOM_VAR1, C, C, CallState.HOM_VAR1}, {C, C, TruthState.HOM_VAR1, G, G, CallState.HOM_VAR2}, {C, C, TruthState.HOM_VAR1, T, T, CallState.HOM_VAR2}, {G, G, TruthState.HOM_VAR1, C, C, CallState.HOM_VAR2}, {G, G, TruthState.HOM_VAR1, G, G, CallState.HOM_VAR1}, {G, G, TruthState.HOM_VAR1, T, T, CallState.HOM_VAR2}, {T, T, TruthState.HOM_VAR1, C, C, CallState.HOM_VAR2}, {T, T, TruthState.HOM_VAR1, G, G, CallState.HOM_VAR2}, {T, T, TruthState.HOM_VAR1, T, T, CallState.HOM_VAR1}, {C, C, TruthState.HOM_VAR1, C, G, CallState.HET_VAR1_VAR2}, {C, C, TruthState.HOM_VAR1, C, T, CallState.HET_VAR1_VAR2}, {C, C, TruthState.HOM_VAR1, G, T, CallState.HET_VAR3_VAR4}, {G, G, TruthState.HOM_VAR1, C, G, CallState.HET_VAR1_VAR2}, {G, G, TruthState.HOM_VAR1, C, T, CallState.HET_VAR3_VAR4}, {G, G, TruthState.HOM_VAR1, G, T, CallState.HET_VAR1_VAR2}, {T, T, TruthState.HOM_VAR1, C, G, CallState.HET_VAR3_VAR4}, {T, T, TruthState.HOM_VAR1, C, T, CallState.HET_VAR1_VAR2}, {T, T, TruthState.HOM_VAR1, G, T, CallState.HET_VAR1_VAR2}, // Some Indel Cases {AA, AA, TruthState.HOM_VAR1, AAAA, AAAAA, CallState.HET_VAR3_VAR4}, {AA, AAA, TruthState.HET_VAR1_VAR2, AAAA, AAAAA, CallState.HET_VAR3_VAR4}, // Mixed Cases {C, AA, TruthState.IS_MIXED, AAAA, AAAAA, CallState.HET_VAR1_VAR2}, {AA, C, TruthState.IS_MIXED, AAAA, AAAAA, CallState.HET_VAR1_VAR2}, {AA, AA, TruthState.HOM_VAR1, C, AAAAA, CallState.IS_MIXED}, {AA, AAA, TruthState.HET_VAR1_VAR2, AAAA, C, CallState.IS_MIXED}, // No Call cases {Allele.NO_CALL, Aref, TruthState.NO_CALL, Aref, Aref, CallState.HOM_REF}, {Aref, Allele.NO_CALL, TruthState.NO_CALL, Aref, Aref, CallState.HOM_REF}, {Allele.NO_CALL, Allele.NO_CALL, TruthState.NO_CALL, Aref, Aref, CallState.HOM_REF}, {Aref, Aref, TruthState.HOM_REF, Allele.NO_CALL, Aref, CallState.NO_CALL}, {Aref, Aref, TruthState.HOM_REF, Aref, Allele.NO_CALL, CallState.NO_CALL}, {Aref, Aref, TruthState.HOM_REF, Allele.NO_CALL, Allele.NO_CALL, CallState.NO_CALL} }; // Rebuild a new set of unit test data with all permutations of alleles. final List<Object[]> allPermutationUnitTestDataList = new ArrayList<Object[]>(); for (final Object[] unitTestData : originalUnitTestData) { allPermutationUnitTestDataList.add(unitTestData); final Allele truthAllele1 = (Allele) unitTestData[0]; final Allele truthAllele2 = (Allele) unitTestData[1]; final TruthState expectedTruthState = (TruthState) unitTestData[2]; final Allele callAllele1 = (Allele) unitTestData[3]; final Allele callAllele2 = (Allele) unitTestData[4]; final CallState expectedCallState = (CallState) unitTestData[5]; if (!callAllele1.equals(callAllele2)) { allPermutationUnitTestDataList.add(new Object[]{truthAllele1, truthAllele2, expectedTruthState, callAllele2, callAllele1, expectedCallState}); } if (!truthAllele1.equals(truthAllele2)) { allPermutationUnitTestDataList.add(new Object[]{truthAllele2, truthAllele1, expectedTruthState, callAllele1, callAllele2, expectedCallState}); if (!callAllele1.equals(callAllele2)) { allPermutationUnitTestDataList.add(new Object[]{truthAllele2, truthAllele1, expectedTruthState, callAllele2, callAllele1, expectedCallState}); } } } Object[][] allPermutationUnitTestData = new Object[allPermutationUnitTestDataList.size()][]; allPermutationUnitTestData = allPermutationUnitTestDataList.toArray(allPermutationUnitTestData); return allPermutationUnitTestData; } @Test(dataProvider = "genotypeConcordanceDetermineStateDataProvider") public void testGenotypeConcordanceDetermineState(final Allele truthAllele1, final Allele truthAllele2, final TruthState expectedTruthState, final Allele callAllele1, final Allele callAllele2, final CallState expectedCallState) throws Exception { final List<Allele> truthAlleles = makeUniqueListOfAlleles(truthAllele1, truthAllele2); final Genotype truthGt = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(truthAllele1, truthAllele2)); final VariantContext truthVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, truthAlleles).genotypes(truthGt).make(); final List<Allele> callAlleles = makeUniqueListOfAlleles(callAllele1, callAllele2); final Genotype callGt = GenotypeBuilder.create(CALL_SAMPLE_NAME, Arrays.asList(callAllele1, callAllele2)); final VariantContext callVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, callAlleles).genotypes(callGt).make(); testGenotypeConcordanceDetermineState(truthVariantContext, expectedTruthState, callVariantContext, expectedCallState, 0, 0); } @Test public void testGenotypeConcordanceDetermineStateNull() throws Exception { final List<Allele> alleles = makeUniqueListOfAlleles(Aref, C); final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vc1 = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(gt1).make(); testGenotypeConcordanceDetermineState(null, TruthState.MISSING, null, CallState.MISSING, 0, 0); testGenotypeConcordanceDetermineState(vc1, TruthState.HET_REF_VAR1, null, CallState.MISSING, 0, 0); testGenotypeConcordanceDetermineState(null, TruthState.MISSING, vc1, CallState.HET_REF_VAR1, 0, 0); } @Test public void testGenotypeConcordanceDetermineStateFilter() throws Exception { final Set<String> filters = new HashSet<String>(Arrays.asList("BAD!")); // Filtering on the variant context final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C); final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make(); final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T); final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T)); final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make(); testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0); testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0); testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0); // Filtering on the genotype final List<String> gtFilters = new ArrayList<String>(Arrays.asList("WICKED")); final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C); final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make(); final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make(); testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0); testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0); testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0); } @Test public void testGenotypeConcordanceDetermineStateDp() throws Exception { final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C); final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make(); final List<Allele> allelesLowDp = makeUniqueListOfAlleles(Aref, C); final Genotype gtLowDp = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).DP(4).make(); final VariantContext vcLowDp = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowDp).genotypes(gtLowDp).make(); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcNormal, CallState.HET_REF_VAR1, 0, 20); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.LOW_DP, 0, 20); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcLowDp, CallState.LOW_DP, 0, 20); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); } @Test public void testGenotypeConcordanceDetermineStateGq() throws Exception { final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C); final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make(); final List<Allele> allelesLowGq = makeUniqueListOfAlleles(Aref, C); final Genotype gtLowGq = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).GQ(4).make(); final VariantContext vcLowGq = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowGq).genotypes(gtLowGq).make(); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcNormal, CallState.HET_REF_VAR1, 20, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.LOW_GQ, 20, 0); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcLowGq, CallState.LOW_GQ, 20, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); } /** * Test method to determine that the expected truth and call states are returned for a pair of truth and call variant contexts. * @param truthVariantContext * @param expectedTruthState * @param callVariantContext * @param expectedCallState * @param minGq * @param minDp */ private void testGenotypeConcordanceDetermineState(final VariantContext truthVariantContext, final TruthState expectedTruthState, final VariantContext callVariantContext, final CallState expectedCallState, final int minGq, final int minDp) { final GenotypeConcordance genotypeConcordance = new GenotypeConcordance(); genotypeConcordance.TRUTH_SAMPLE = TRUTH_SAMPLE_NAME; genotypeConcordance.CALL_SAMPLE = CALL_SAMPLE_NAME; final TruthAndCallStates truthAndCallStates = genotypeConcordance.determineState(truthVariantContext, TRUTH_SAMPLE_NAME, callVariantContext, CALL_SAMPLE_NAME, minGq, minDp); Assert.assertEquals(truthAndCallStates.truthState, expectedTruthState); Assert.assertEquals(truthAndCallStates.callState, expectedCallState); } /** * Simple method to return a list of unique alleles. */ private List<Allele> makeUniqueListOfAlleles(final Allele... alleles) { final Set<Allele> uniqueAlleles = new HashSet<Allele>(); for (final Allele allele : alleles) { if (!allele.equals(Allele.NO_CALL)) { uniqueAlleles.add(allele); } } if (!uniqueAlleles.contains(Aref)) uniqueAlleles.add(Aref); return new ArrayList<Allele>(uniqueAlleles); } }