/* * The MIT License * * Copyright (c) 2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.vcf.MendelianViolations; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.ProgressLogger; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFConstants; import picard.pedigree.Sex; import picard.vcf.processor.VariantProcessor; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; /** * @author mccowan */ class MendelianViolationDetector implements VariantProcessor.Accumulator<MendelianViolationDetector.Result> { private final Set<String> SKIP_CHROMS, MALE_CHROMS, FEMALE_CHROMS; private final List<MendelianViolationMetrics> trios; private final List<Interval> parIntervals; private final double MIN_HET_FRACTION; private final int MIN_GQ; private final int MIN_DP; private final ProgressLogger logger; private final MendelianViolationsByFamily familyToViolations; MendelianViolationDetector(final Set<String> skip_chroms, final Set<String> male_chroms, final Set<String> female_chroms, final double min_het_fraction, final int min_gq, final int min_dp, final List<MendelianViolationMetrics> trios, final List<Interval> parIntervals, final ProgressLogger logger) { SKIP_CHROMS = skip_chroms; MALE_CHROMS = male_chroms; FEMALE_CHROMS = female_chroms; MIN_HET_FRACTION = min_het_fraction; MIN_GQ = min_gq; MIN_DP = min_dp; this.trios = trios; this.parIntervals = parIntervals; this.logger = logger; familyToViolations = new MendelianViolationsByFamily(); } /** A little enum to describe the different type of Mendelian violations possible at a bi-allelic site. */ enum MendelianViolation { Diploid_Denovo("Parents are both Homozygous Ref and offspring is Heterozygous."), HomVar_HomVar_Het("Parents are both Homozygous Variant and offspring is Heterozygous."), HomRef_HomVar_Hom("One parent is Homozygous Ref, the other Homozygous Variant and the offspring is Homozygous."), Hom_Het_Hom("One parent is Homozygous (Ref or Var), the other is Heterozygous and the offspring is incompatibly Homozygous."), Haploid_Denovo("Offspring variant genotype is expected to be haploid and does not match an expected parental allele."), Haploid_Other("Offspring reference genotype is expected to be haploid and does not match an expected parental allele."), Other("Other unclassified violation of allele transmission."); private final String description; MendelianViolation(final String desc) { this.description = desc; } public String getDescription() { return description; } } public final static String MENDELIAN_VIOLATION_KEY = "MV"; public final static String ORIGINAL_AC = "AC_Orig"; public final static String ORIGINAL_AF = "AF_Orig"; public final static String ORIGINAL_AN = "AN_Orig"; @Override public void accumulate(final VariantContext ctx) { logger.record(ctx.getContig(), ctx.getStart()); final String variantChrom = ctx.getContig(); final int variantPos = ctx.getStart(); // Skip anything a little too funky if (ctx.isFiltered()) return; if (!ctx.isVariant()) return; if (SKIP_CHROMS.contains(variantChrom)) return; for (final MendelianViolationMetrics trio : trios) { final Genotype momGt = ctx.getGenotype(trio.MOTHER); final Genotype dadGt = ctx.getGenotype(trio.FATHER); final Genotype kidGt = ctx.getGenotype(trio.OFFSPRING); // if any genotype: // - has a non-snp allele; or // - lacks a reference allele // // then ignore this trio if (CollectionUtil.makeList(momGt, dadGt, kidGt).stream().anyMatch(gt -> gt.isHetNonRef() || Stream.concat(Stream.of(ctx.getReference()), gt.getAlleles().stream()).anyMatch(a -> a.length() != 1 || a.isSymbolic()))) { continue; } // if between the trio there are more than 2 alleles including the reference, continue if (Stream.concat( Collections.singleton(ctx.getReference()).stream(), CollectionUtil.makeList(momGt, dadGt, kidGt) .stream() .flatMap(gt -> gt.getAlleles().stream())) .collect(Collectors.toSet()).size() > 2) continue; // Test to make sure: // 1) That the site is in fact variant in the trio // 2) that the offspring doesn't have a really wacky het allele balance if (!isVariant(momGt, dadGt, kidGt)) continue; if (kidGt.isHet()) { final int[] ad = kidGt.getAD(); if (ad == null) continue; final List<Integer> adOfAlleles = kidGt.getAlleles().stream() .map(a->ad[ctx.getAlleleIndex(a)]).collect(Collectors.toList()); final double minAlleleFraction = Math.min(adOfAlleles.get(0), adOfAlleles.get(1)) / (double) (adOfAlleles.get(0) + adOfAlleles.get(1)); if (minAlleleFraction < MIN_HET_FRACTION) continue; } /////////////////////////////////////////////////////////////// // Determine whether the offspring should be haploid at this // locus and which is the parental donor of the haploid genotype /////////////////////////////////////////////////////////////// boolean haploid = false; Genotype haploidParentalGenotype = null; if (FEMALE_CHROMS.contains(variantChrom) && trio.OFFSPRING_SEX != Sex.Unknown) { if (trio.OFFSPRING_SEX == Sex.Female) { // famale haploid = false; } else if (isInPseudoAutosomalRegion(variantChrom, variantPos)) { // male but in PAR on X, so diploid haploid = false; } else { // male, out of PAR on X, haploid haploid = true; haploidParentalGenotype = momGt; } } // the PAR on the male chromosome should be masked so that reads // align to the female chromosomes instead, so there's no point // of worrying about that here. if (MALE_CHROMS.contains(variantChrom)) { if (trio.OFFSPRING_SEX == Sex.Male) { haploid = true; haploidParentalGenotype = dadGt; } else { continue; } } // We only want to look at sites where we have high enough confidence that the genotypes we are looking at are // interesting. We want to ensure that parents are always GQ>=MIN_GQ, and that the kid is either GQ>=MIN_GQ or in the // case where kid is het that the phred-scaled-likelihood of being reference is >=MIN_GQ. if (haploid && (haploidParentalGenotype.isNoCall() || haploidParentalGenotype.getGQ() < MIN_GQ)) continue; if (!haploid && (momGt.isNoCall() || momGt.getGQ() < MIN_GQ || dadGt.isNoCall() || dadGt.getGQ() < MIN_GQ)) continue; if (kidGt.isNoCall()) continue; if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) { if (kidGt.getPL()[0] < MIN_GQ) continue; } else if (kidGt.getGQ() < MIN_GQ) continue; // Also filter on the DP for each of the samples - it's possible to miss hets when DP is too low if (haploid && (kidGt.getDP() < MIN_DP || haploidParentalGenotype.getDP() < MIN_DP)) continue; if (!haploid && (kidGt.getDP() < MIN_DP || momGt.getDP() < MIN_DP || dadGt.getDP() < MIN_DP)) continue; trio.NUM_VARIANT_SITES++; /////////////////////////////////////////////////////////////// // First test for haploid violations /////////////////////////////////////////////////////////////// MendelianViolation type = null; if (haploid) { if (kidGt.isHet()) continue; // Should not see heterozygous calls at haploid regions if (!haploidParentalGenotype.getAlleles().contains(kidGt.getAllele(0))) { if (kidGt.isHomRef()) { type = MendelianViolation.Haploid_Other; trio.NUM_HAPLOID_OTHER++; } else { type = MendelianViolation.Haploid_Denovo; trio.NUM_HAPLOID_DENOVO++; } } } /////////////////////////////////////////////////////////////// // Then test for diploid mendelian violations /////////////////////////////////////////////////////////////// else if (isMendelianViolation(momGt, dadGt, kidGt)) { if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) { trio.NUM_DIPLOID_DENOVO++; type = MendelianViolation.Diploid_Denovo; } else if (momGt.isHomVar() && dadGt.isHomVar() && kidGt.isHet()) { trio.NUM_HOMVAR_HOMVAR_HET++; type = MendelianViolation.HomVar_HomVar_Het; } else if (kidGt.isHom() && ((momGt.isHomRef() && dadGt.isHomVar()) || (momGt.isHomVar() && dadGt.isHomRef()))) { trio.NUM_HOMREF_HOMVAR_HOM++; type = MendelianViolation.HomRef_HomVar_Hom; } else if (kidGt.isHom() && ((momGt.isHom() && dadGt.isHet()) || (momGt.isHet() && dadGt.isHom()))) { trio.NUM_HOM_HET_HOM++; type = MendelianViolation.Hom_Het_Hom; } else { trio.NUM_OTHER++; type = MendelianViolation.Other; } } // Output a record into the family's violation VCF if (type != null) { // Create a new Context subsetted to the three samples final VariantContextBuilder builder = new VariantContextBuilder(ctx); builder.genotypes(ctx.getGenotypes().subsetToSamples(CollectionUtil.makeSet(trio.MOTHER, trio.FATHER, trio.OFFSPRING))); builder.attribute(MENDELIAN_VIOLATION_KEY, type.name()); // Copy over some useful attributes from the full context if (ctx.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) builder.attribute(ORIGINAL_AC, ctx.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) builder.attribute(ORIGINAL_AF, ctx.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)); if (ctx.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) builder.attribute(ORIGINAL_AN, ctx.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); // Write out the variant record familyToViolations.get(trio.FAMILY_ID).add(builder.make()); } } } @Override public Result result() { return new Result(trios, familyToViolations); } /** Tests to ensure that a locus is bi-allelic within the given set of samples. */ private boolean isVariant(final Genotype... gts) { for (final Genotype gt : gts) { if (gt.isCalled() && !gt.isHomRef()) return true; } return false; } /** Tests whether the alleles of the offspring are possible given the alleles of the parents. */ private boolean isMendelianViolation(final Genotype p1, final Genotype p2, final Genotype off) { final Allele offAllele1 = off.getAllele(0); final Allele offAllele2 = off.getAllele(1); if(p1.getAlleles().contains(offAllele1) && p2.getAlleles().contains(offAllele2)) return false; if(p2.getAlleles().contains(offAllele1) && p1.getAlleles().contains(offAllele2)) return false; return true; } /** * Tests whether the variant is within one of the pseudo-autosomal regions */ private boolean isInPseudoAutosomalRegion(final String chr, final int pos) { for (final Interval par : parIntervals) { if (par.getContig().equals(chr) && pos >= par.getStart() && pos <= par.getEnd()) return true; } return false; } /** Represents the result of the work this class does. */ static class Result { private final Collection<MendelianViolationMetrics> metrics; private final MendelianViolationsByFamily violations; Result(final Collection<MendelianViolationMetrics> metrics, final MendelianViolationsByFamily violations) { this.metrics = metrics; this.violations = violations; } public Collection<MendelianViolationMetrics> metrics() { return metrics; } MendelianViolationsByFamily violations() { return violations; } public static MendelianViolationDetector.Result merge(final Collection<MendelianViolationDetector.Result> results) { final Collection<Collection<MendelianViolationMetrics>> metricCollections = new ArrayList<>(); final Collection<MendelianViolationsByFamily> violationCollections = new ArrayList<>(); for (final MendelianViolationDetector.Result result : results) { metricCollections.add(result.metrics()); violationCollections.add(result.violations()); } return new MendelianViolationDetector.Result(mergeMetrics(metricCollections), mergeViolations(violationCollections)); } private static MendelianViolationsByFamily mergeViolations(final Collection<MendelianViolationsByFamily> resultsToReduce) { final MendelianViolationsByFamily masterFamilyViolationsMap = new MendelianViolationsByFamily(); for (final Map<String, Collection<VariantContext>> childFamilyViolationsMap : resultsToReduce) { for (final String childFamily : childFamilyViolationsMap.keySet()) { masterFamilyViolationsMap.get(childFamily).addAll(childFamilyViolationsMap.get(childFamily)); } } return masterFamilyViolationsMap; } /** Flattens out the provided metrics, collates them by "sample", and merges those collations. */ private static Collection<MendelianViolationMetrics> mergeMetrics(final Collection<Collection<MendelianViolationMetrics>> resultsToReduce) { final Collection<MendelianViolationMetrics> allMetrics = new ArrayList<>(); resultsToReduce.forEach(allMetrics::addAll); final Map<String, List<MendelianViolationMetrics>> sampleToMetricsMap = allMetrics .stream() .collect(Collectors .groupingBy(m -> String.format("%s|%s|%s|%s", m.FAMILY_ID, m.FATHER, m.MOTHER, m.OFFSPRING))); return sampleToMetricsMap .values() .stream() .map(a-> (MendelianViolationMetrics) new MendelianViolationMetrics().merge(a)) .collect(Collectors.<MendelianViolationMetrics, List<MendelianViolationMetrics>>toCollection(ArrayList<MendelianViolationMetrics>::new)); } } }