/* * The MIT License * * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.fingerprint; import htsjdk.samtools.util.QualityUtil; import picard.util.MathUtil; import static java.lang.Math.log10; /** * Represents the probability of the underlying haplotype of the contaminating sample given the data. * By convention the alleles stored for each SNP are in phase. * * @author Yossi Farjoun */ public class HaplotypeProbabilitiesFromContaminatorSequence extends HaplotypeProbabilitiesFromSequence { public double contamination; // for each model (contGenotype, mainGenotype) there's a likelihood of the data. These need to be collected separately // and only collated once all the data is in. double[][] likelihoodMap = {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}; public HaplotypeProbabilitiesFromContaminatorSequence(final HaplotypeBlock haplotypeBlock, final double contamination) { super(haplotypeBlock); assert (contamination <= 1.0); assert (contamination >= 0.0); this.contamination = contamination; } /** * Adds a base observation with the observed quality to the evidence for this haplotype * based on the fact that the SNP is part of the haplotype. */ public void addToProbs(final Snp snp, final byte base, final byte qual) { assertSnpPartOfHaplotype(snp); // Skip bases that don't match either expected allele for this SNP final boolean altAllele; if (base == snp.getAllele1()) { this.obsAllele1++; altAllele = false; } else if (base == snp.getAllele2()) { this.obsAllele2++; altAllele = true; } else { this.obsAlleleOther++; return; } final double pErr = QualityUtil.getErrorProbabilityFromPhredScore(qual); // we need to keep the 9 models separate until all the reads have been seen. // Once we have seen all the reads, we add across the mainGeno and the three likelihoods // are the likelihoods of the contaminator, the main sample's genotype needs to be summed over in each case: for (final Genotype contGeno : Genotype.values()) { for (final Genotype mainGeno : Genotype.values()) { //theta is the expected frequency of the alternate allele final double theta = 0.5 * ((1 - contamination) * mainGeno.v + contamination * contGeno.v); likelihoodMap[contGeno.v][mainGeno.v] *= (( altAllele ? theta : (1 - theta)) * (1 - pErr) + (!altAllele ? theta : (1 - theta)) * pErr); } } } //a function needed to update the logLikelihoods from the likelihoodMap. private void updateLikelihoods() { final double[] ll = new double[Genotype.values().length]; for (final Genotype contGeno : Genotype.values()) { // p(a | g_c) = \sum_g_m { P(g_m) \prod_i P(a_i| g_m, g_c)} ll[contGeno.v] = log10(MathUtil.sum(MathUtil.multiply(this.getPriorProbablities(), likelihoodMap[contGeno.v]))); } setLogLikelihoods(ll); } @Override public void merge(final HaplotypeProbabilities other) { super.merge(other); if (!this.getHaplotype().equals(other.getHaplotype())) { throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " + getHaplotype() + ", " + other.getHaplotype()); } if (!(other instanceof HaplotypeProbabilitiesFromContaminatorSequence)) { throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class."); } final HaplotypeProbabilitiesFromContaminatorSequence o = (HaplotypeProbabilitiesFromContaminatorSequence) other; if (o.contamination != this.contamination) { throw new IllegalArgumentException("Can only merge HaplotypeProbabilitiesFromContaminatorSequence with the same contamination value."); } for (final Genotype contGeno : Genotype.values()) { this.likelihoodMap[contGeno.v] = MathUtil.multiply(this.likelihoodMap[contGeno.v], o.likelihoodMap[contGeno.v]); } } @Override public double[] getLogLikelihoods() { updateLikelihoods(); return super.getLogLikelihoods(); } }