/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.tribble.util.popgen;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2004 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
/**
* This class calculates a HardyWeinberg p-value given three values representing
* the observed frequences of homozygous and heterozygous genotypes in the
* test population.
*
* @author Bob Handsaker
*/
public final class HardyWeinbergCalculation {
/**
* This class is not instantiable.
*/
private HardyWeinbergCalculation() {
}
/**
* Calculates exact two-sided hardy-weinberg p-value. Parameters
* are number of genotypes, number of rare alleles observed and
* number of heterozygotes observed.
*
* (c) 2003 Jan Wigginton, Goncalo Abecasis (goncalo@umich.edu)
*/
public static double hwCalculate(int obsAA, int obsAB, int obsBB) {
int diplotypes = obsAA + obsAB + obsBB;
int rare = (obsAA * 2) + obsAB;
int hets = obsAB;
//make sure "rare" allele is really the rare allele
if (rare > diplotypes) {
rare = (2 * diplotypes) - rare;
}
//make sure numbers aren't screwy
if (hets > rare) {
return -1;
}
double[] tailProbs = new double[rare + 1];
for (int z = 0; z < tailProbs.length; z++) {
tailProbs[z] = 0;
}
//start at midpoint
//cast to long and back to int to avoid overflow at large numbers
int mid = (int) (((long) rare * ((2 * diplotypes) - rare)) / (2 * diplotypes));
//check to ensure that midpoint and rare alleles have same parity
if (((rare & 1) ^ (mid & 1)) != 0) {
mid++;
}
int het = mid;
int hom_r = (rare - mid) / 2;
int hom_c = diplotypes - het - hom_r;
//Calculate probability for each possible observed heterozygote
//count up to a scaling constant, to avoid underflow and overflow
tailProbs[mid] = 1.0;
double sum = tailProbs[mid];
for (het = mid; het > 1; het -= 2) {
tailProbs[het - 2] = (tailProbs[het] * het * (het - 1.0)) / (4.0 * (hom_r + 1.0) * (hom_c +
1.0));
sum += tailProbs[het - 2];
//2 fewer hets for next iteration -> add one rare and one common homozygote
hom_r++;
hom_c++;
}
het = mid;
hom_r = (rare - mid) / 2;
hom_c = diplotypes - het - hom_r;
for (het = mid; het <= (rare - 2); het += 2) {
tailProbs[het + 2] = (tailProbs[het] * 4.0 * hom_r * hom_c) / ((het + 2.0) * (het +
1.0));
sum += tailProbs[het + 2];
//2 more hets for next iteration -> subtract one rare and one common homozygote
hom_r--;
hom_c--;
}
for (int z = 0; z < tailProbs.length; z++) {
tailProbs[z] /= sum;
}
double top = tailProbs[hets];
for (int i = hets + 1; i <= rare; i++) {
top += tailProbs[i];
}
double otherSide = tailProbs[hets];
for (int i = hets - 1; i >= 0; i--) {
otherSide += tailProbs[i];
}
if ((top > 0.5) && (otherSide > 0.5)) {
return 1.0;
}
if (top < otherSide) {
return top * 2;
}
return otherSide * 2;
}
}