package picard.fingerprint;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.QualityUtil;
import htsjdk.variant.variantcontext.Allele;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.util.MathUtil;
import java.util.Collections;
import java.util.List;
import static picard.util.TestNGUtil.assertEqualDoubleArrays;
import static java.lang.Math.log10;
/**
* Basic tests for HaplotypeProbabilities and derived classes
*
* @author yossi farjoun
*/
public class HaplotypeProbabilitiesTest {
static Snp snp1, snp2;
static HaplotypeBlock hb1, hb2;
@BeforeTest
public static void initializeHaplotypeBlock() {
snp1 = new Snp("SNP1", "test", 1, (byte) 'A', (byte) 'T', 0.25, Collections.<String>emptyList());
snp2 = new Snp("SNP2", "test", 2, (byte) 'A', (byte) 'G', 0.5, Collections.<String>emptyList());
hb1 = new HaplotypeBlock(.25);
hb1.addSnp(snp1);
hb2 = new HaplotypeBlock(.4);
hb2.addSnp(snp1);
hb2.addSnp(snp2);
}
@DataProvider(name = "dataTestpEvidenceGivenPriorFromGLs")
public Object[][] dataTestpEvidenceGivenPriorFromGLs() {
return new Object[][]{
new Object[]{
new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb1),
Collections.singletonList(snp1),
Collections.singletonList(false),
Collections.singletonList(new double[]{0, -1, -2})},
new Object[]{
new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb1),
Collections.singletonList(snp1),
Collections.singletonList(true),
Collections.singletonList(new double[]{0, -1, -2})},
new Object[]{
new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
CollectionUtil.makeList(snp1, snp2),
CollectionUtil.makeList(false, false),
CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{0, -1, -2})},
new Object[]{
new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
CollectionUtil.makeList(snp1, snp2),
CollectionUtil.makeList(false, false),
CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-1, 0, -1})},
new Object[]{
new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
CollectionUtil.makeList(snp1, snp2),
CollectionUtil.makeList(false, false),
CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-2, -1, 0})},
};
}
@Test(dataProvider = "dataTestpEvidenceGivenPriorFromGLs")
public void testpEvidenceGivenPriorFromGLs(final HaplotypeProbabilitiesFromGenotypeLikelihoods hp, final List<Snp> snps, final List<Boolean> swaps, final List<double[]> GLs) throws Exception {
for (int i = 0; i < snps.size(); ++i) {
final Allele a = Allele.create(swaps.get(i) ? snps.get(i).getAllele2() : snps.get(i).getAllele1());
final Allele b = Allele.create(swaps.get(i) ? snps.get(i).getAllele1() : snps.get(i).getAllele2());
hp.addToLogLikelihoods(snps.get(i), CollectionUtil.makeList(a, b), GLs.get(i));
}
final double[] logLikelihood = new double[3];
for (int genotype = 0; genotype < 3; genotype++) {
logLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype));
for (int i = 0; i < GLs.size(); i++) {
final double[] genotypeLogLikelihoods = GLs.get(i);
if (swaps.get(i))
logLikelihood[genotype] += genotypeLogLikelihoods[2 - genotype];
else
logLikelihood[genotype] += genotypeLogLikelihoods[genotype];
}
}
assertEqualDoubleArrays(hp.getPosteriorProbabilities(), MathUtil.pNormalizeLogProbability(logLikelihood), 1e-10);
}
@DataProvider(name = "dataTestHaplotypeProbabilitiesFromSequenceAddToProbs")
public Object[][] dataTestHaplotypeProbabilitiesFromSequenceAddToProbs() {
return new Object[][]{
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'G'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A', 'T', 'A', 'A', 'A', 'A', 'A', 'A'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A', 'T', 'A', 'A', 'G', 'A', 'A', 'A'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T', 'T', 'A', 'A', 'A', 'T', 'T', 'A', 'T'}, 7},
{new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T', 'A', 'T', 'T', 'T', 'T', 'T', 'T', 'A'}, 7}
};
}
@Test(dataProvider = "dataTestHaplotypeProbabilitiesFromSequenceAddToProbs")
public void testHaplotypeProbabilitiesFromSequenceAddToProbs(final HaplotypeProbabilitiesFromSequence hp, final Snp snp, final byte[] bases, final int qual) throws Exception {
for (final byte base : bases) {
hp.addToProbs(snp, base, (byte) qual);
}
final double pError = QualityUtil.getErrorProbabilityFromPhredScore(qual);
final double[] logLikelihood = new double[3];
for (int genotype = 0; genotype < 3; genotype++) {
logLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype));
for (final byte a : bases) {
final double theta = 0.5 * genotype;
if (a == snp.getAllele1())
logLikelihood[genotype] += log10((1 - theta) * (1 - pError) + theta * pError);
if (a == snp.getAllele2())
logLikelihood[genotype] += log10((1 - theta) * (pError) + theta * (1 - pError));
}
}
final double[] posterior = MathUtil.pNormalizeLogProbability(logLikelihood);
assertEqualDoubleArrays(hp.getPosteriorProbabilities(), posterior, 1e-10);
}
@DataProvider(name = "dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs")
public Object[][] dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs() {
return new Object[][]{
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 0},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 1},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 3, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 7, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 35, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 40, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 45, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 69, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 73, 76},
{new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 76, 76}
};
}
static final int[] genotypes = {0, 1, 2};
@Test(dataProvider = "dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs")
public void testHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs(final HaplotypeProbabilitiesFromContaminatorSequence hp, final Snp snp, final int nAlt, final int nTotal) throws Exception {
final byte qual = 7;
for (int i = 0; i < nAlt; i++) {
hp.addToProbs(snp, snp.getAllele2(), qual);
}
for (int i = nAlt; i < nTotal; i++) {
hp.addToProbs(snp, snp.getAllele1(), qual);
}
final double pError = QualityUtil.getErrorProbabilityFromPhredScore(qual);
final double[] unnormalizedLikelihood = {0d, 0d, 0d};
for (final int contG : genotypes) {
for (final int mainG : genotypes) {
final double pAlt = (hp.contamination * contG + (1 - hp.contamination) * mainG) / 2;
double l = hp.getHaplotype().getHaplotypeFrequency(mainG);
for (int i = 0; i < nAlt; i++) {
l *= pAlt * (1 - pError) + (1 - pAlt) * pError;
}
for (int i = nAlt; i < nTotal; i++) {
l *= pAlt * (pError) + (1 - pAlt) * (1 - pError);
}
unnormalizedLikelihood[contG] += l;
}
}
final double[] likelihood = MathUtil.pNormalizeVector(unnormalizedLikelihood);
assertEqualDoubleArrays(hp.getLikelihoods(), likelihood, 1e-10);
}
}