package picard.vcf.MendelianViolations; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.LineIteratorImpl; import htsjdk.variant.vcf.VCFCodec; import org.testng.Assert; import org.testng.annotations.Test; import picard.pedigree.Sex; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.regex.Pattern; import static picard.vcf.MendelianViolations.MendelianViolationDetector.MendelianViolation.*; /** * Created by farjoun on 5/13/16. */ public class FindMendelianViolationsTest { private static final File TEST_DATA_DIR = new File("testdata/picard/vcf"); @Test public void testFindMedelianViolations() throws IOException { final File vcfFile = new File(TEST_DATA_DIR, "CEUTrio_plus_FAKE.vcf"); final File vcfIndexFile = new File(TEST_DATA_DIR, "CEUTrio_plus_FAKE.vcf.idx"); if (vcfFile.lastModified() > vcfIndexFile.lastModified()) { if (vcfIndexFile.exists()) { System.err.println("Deleting " + vcfIndexFile); vcfIndexFile.delete(); } IndexFactory.createDynamicIndex(vcfFile, new VCFCodec()).writeBasedOnFeatureFile(vcfFile); } final File pedFile = new File(TEST_DATA_DIR, "NA12878.ped"); final File directoryForViolations = TestUtil.getTempDirectory("MendelianViolations", "temp"); directoryForViolations.mkdir(); final File violationsNA12878 = new File(directoryForViolations, "NA12878.vcf"); final File violationsFAKE = new File(directoryForViolations, "FAKE.vcf"); violationsNA12878.deleteOnExit(); violationsFAKE.deleteOnExit(); directoryForViolations.deleteOnExit(); final File resultantMetrics = File.createTempFile("MendelianViolations", "file"); resultantMetrics.deleteOnExit(); final FindMendelianViolations program = new FindMendelianViolations(); program.INPUT = vcfFile; program.TRIOS = pedFile; program.OUTPUT = resultantMetrics; program.VCF_DIR = directoryForViolations; program.MIN_DP = 10; Assert.assertEquals(program.doWork(), 0); final MetricsFile<MendelianViolationMetrics, Comparable<?>> summary = new MetricsFile<>(); summary.read(new FileReader(resultantMetrics)); Assert.assertEquals(summary.getMetrics().size(), 2); MendelianViolationMetrics metrics = summary.getMetrics().get(0); final MendelianViolationMetrics mv = new MendelianViolationMetrics(); mv.FAMILY_ID = "NA12878"; mv.OFFSPRING = "NA12878"; mv.OFFSPRING_SEX = Sex.Female; mv.FATHER = "NA12891"; mv.MOTHER = "NA12892"; mv.NUM_VARIANT_SITES = grep(vcfFile, "GoodVariant") + grep(vcfFile, "GoodVariantNA12878"); mv.NUM_DIPLOID_DENOVO = grep(vcfFile, "DiploidDenovoBOTH") + grep(vcfFile, "DiploidDenovoNA12878"); mv.NUM_HOMREF_HOMVAR_HOM = grep(vcfFile, "HomrefHomvarHomBOTH") + grep(vcfFile, "HomrefHomvarHomNA12878"); mv.NUM_HOMVAR_HOMVAR_HET = grep(vcfFile, "HomvarHomvarHetBOTH") + grep(vcfFile, "HomrefHomvarHetNA12878"); mv.NUM_HOM_HET_HOM = grep(vcfFile, "HomHetHomBOTH") + grep(vcfFile, "HomHetHomNA12878"); mv.NUM_HAPLOID_DENOVO = grep(vcfFile, "HaploidDenovoBOTH") + grep(vcfFile, "HaploidDenovoNA12878"); mv.NUM_HAPLOID_OTHER = grep(vcfFile, "HaploidOtherBOTH") + grep(vcfFile, "HaploidOtherNA12878"); mv.NUM_OTHER = grep(vcfFile, "OtherBOTH") + grep(vcfFile, "OtherNA12878"); mv.calculateDerivedFields(); Assert.assertEquals(mv.TOTAL_MENDELIAN_VIOLATIONS, mv.NUM_DIPLOID_DENOVO + mv.NUM_HOMVAR_HOMVAR_HET + mv.NUM_HOMREF_HOMVAR_HOM + mv.NUM_HOM_HET_HOM + mv.NUM_HAPLOID_DENOVO + mv.NUM_HAPLOID_OTHER + mv.NUM_OTHER ); Assert.assertEquals(metrics, mv); Assert.assertEquals(grepMv(violationsNA12878, Diploid_Denovo.name()), mv.NUM_DIPLOID_DENOVO); Assert.assertEquals(grepMv(violationsNA12878, HomVar_HomVar_Het.name()), mv.NUM_HOMVAR_HOMVAR_HET); Assert.assertEquals(grepMv(violationsNA12878, HomRef_HomVar_Hom.name()), mv.NUM_HOMREF_HOMVAR_HOM); Assert.assertEquals(grepMv(violationsNA12878, Hom_Het_Hom.name()), mv.NUM_HOM_HET_HOM); Assert.assertEquals(grepMv(violationsNA12878, Haploid_Denovo.name()), mv.NUM_HAPLOID_DENOVO); Assert.assertEquals(grepMv(violationsNA12878, Haploid_Other.name()), mv.NUM_HAPLOID_OTHER); Assert.assertEquals(grepMv(violationsNA12878, Other.name()), mv.NUM_OTHER); metrics = summary.getMetrics().get(1); mv.FAMILY_ID = "FAKE"; mv.OFFSPRING = "FAKE"; mv.OFFSPRING_SEX = Sex.Male; mv.NUM_DIPLOID_DENOVO = grep(vcfFile, "DiploidDenovoBOTH") + grep(vcfFile, "DiploidDenovoFAKE"); mv.NUM_HOMREF_HOMVAR_HOM = grep(vcfFile, "HomrefHomvarHomBOTH") + grep(vcfFile, "HomrefHomvarHomFAKE"); mv.NUM_HOMVAR_HOMVAR_HET = grep(vcfFile, "HomvarHomvarHetBOTH") + grep(vcfFile, "HomvarHomvarHetFAKE"); mv.NUM_HAPLOID_DENOVO = grep(vcfFile, "HaploidDenovoBOTH") + grep(vcfFile, "HaploidDenovoFAKE"); mv.NUM_HAPLOID_OTHER = grep(vcfFile, "HaploidOtherBOTH") + grep(vcfFile, "HaploidOtherFAKE"); mv.NUM_OTHER = grep(vcfFile, "OtherBOTH") + grep(vcfFile, "OtherFAKE"); mv.calculateDerivedFields(); Assert.assertEquals(metrics, mv); Assert.assertEquals(mv.TOTAL_MENDELIAN_VIOLATIONS, mv.NUM_DIPLOID_DENOVO + mv.NUM_HOMVAR_HOMVAR_HET + mv.NUM_HOMREF_HOMVAR_HOM + mv.NUM_HOM_HET_HOM + mv.NUM_HAPLOID_DENOVO + mv.NUM_HAPLOID_OTHER + mv.NUM_OTHER ); Assert.assertEquals(grepMv(violationsFAKE, Diploid_Denovo.name()), mv.NUM_DIPLOID_DENOVO); Assert.assertEquals(grepMv(violationsFAKE, HomVar_HomVar_Het.name()), mv.NUM_HOMVAR_HOMVAR_HET); Assert.assertEquals(grepMv(violationsFAKE, HomRef_HomVar_Hom.name()), mv.NUM_HOMREF_HOMVAR_HOM); Assert.assertEquals(grepMv(violationsFAKE, Hom_Het_Hom.name()), mv.NUM_HOM_HET_HOM); Assert.assertEquals(grepMv(violationsFAKE, Haploid_Denovo.name()), mv.NUM_HAPLOID_DENOVO); Assert.assertEquals(grepMv(violationsFAKE, Haploid_Other.name()), mv.NUM_HAPLOID_OTHER); Assert.assertEquals(grepMv(violationsFAKE, Other.name()), mv.NUM_OTHER); TestUtil.recursiveDelete(directoryForViolations); } /** returns the number of lines in the file that contain a regular expression (decorated with "MV=" and * expected to be in an INFO field in a vcf) * * @param file File to examine * @param regex String containing a regular expression to look for in the file * @return the number of lines that contain regex */ private int grep(final File file, final String regex) { int results = 0; final Pattern pattern = Pattern.compile(".*"+regex+".*"); try (final LineIteratorImpl li = new LineIteratorImpl(new AsciiLineReader(IOUtil.openFileForReading(file)))) { while (li.hasNext()) { final String line = li.next(); if (pattern.matcher(line).matches()) { results++; } } } catch (final IOException e) { e.printStackTrace(); } return results; } private int grepMv(final File file, final String regex) { return grep(file, "[;\t]MV="+regex+"[;\t]"); } }