package org.molgenis.data.annotation.core.entity.impl.gavin;
import org.molgenis.data.annotation.core.entity.impl.snpeff.Impact;
import static org.molgenis.data.annotation.core.entity.impl.gavin.Category.*;
import static org.molgenis.data.annotation.core.entity.impl.gavin.Judgment.Classification.*;
import static org.molgenis.data.annotation.core.entity.impl.gavin.Judgment.Method.calibrated;
import static org.molgenis.data.annotation.core.entity.impl.gavin.Judgment.Method.genomewide;
import static org.molgenis.data.annotation.core.entity.impl.snpeff.Impact.*;
public class GavinAlgorithm
{
public static final String NAME = "GavinAnnotator";
public static final double GENOMEWIDE_MAF_THRESHOLD = 0.003456145;
public static final int GENOMEWIDE_CADD_THRESHOLD = 15;
/**
* Sensitivity is more important than specificity, so we can adjust this parameter to globally adjust the thresholds.
* <ul>
* <li>At a setting of 1, ie. default behaviour, GAVIN is 89% sensitive and 83% specific</li>
* <li>at a setting of 5, a more sensitive setting, GAVIN is 92% sensitive and 78% specific</li>
* </ul>
* Technically we lose more than we gain, but having >90% sensitivity is really important.
* Better to have a few more false positives than to miss a true positive.
*/
public static final int EXTRA_SENSITIVITY_FACTOR = 5;
/**
* Classifies a variant using the given GavinThresholds.
*
* @param impact putative impact of the variant, determined by snpEff
* @param caddScaled scaled cadd score for the variant
* @param exacMAF maf of the variant, determined by exac
* @param gene gene of the variant
* @param gavinThresholds the {@link GavinThresholds} for this gene
* @return the {@link Judgment} of the gavin algorithm
*/
public Judgment classifyVariant(Impact impact, Double caddScaled, Double exacMAF, String gene,
GavinThresholds gavinThresholds)
{
if (gavinThresholds == null)
{
//if we have no data for this gene, immediately fall back to the genomewide method
return genomewideClassifyVariant(impact, caddScaled, exacMAF, gene);
}
gavinThresholds = gavinThresholds.withExtraSensitivity(EXTRA_SENSITIVITY_FACTOR);
// CADD score based classification, calibrated
if (caddScaled != null)
{
switch (gavinThresholds.getCategory())
{
case C1:
case C2:
if (gavinThresholds.isAboveMeanPathogenicCADDScore(caddScaled))
{
return Judgment.create(Pathogenic, calibrated, gene,
"Variant CADD score of " + caddScaled + " is greater than " + gavinThresholds
.getMeanPathogenicCADDScore()
+ " in a gene for which CADD scores are informative.");
}
if (gavinThresholds.isBelowMeanPopulationCADDScore(caddScaled))
{
return Judgment.create(Benign, calibrated, gene,
"Variant CADD score of " + caddScaled + " is less than " + gavinThresholds
.getMeanPopulationCADDScore()
+ " in a gene for which CADD scores are informative.");
}
//this rule does not classify apparently, just continue onto the next rules
break;
case C3:
case C4:
case C5:
if (gavinThresholds.isAboveSpec95thPerCADDThreshold(caddScaled))
{
return Judgment.create(Pathogenic, calibrated, gene,
"Variant CADD score of " + caddScaled + " is greater than " + gavinThresholds
.getSpec95thPerCADDThreshold() + " for this gene.");
}
if (gavinThresholds.isBelowSens95PerCADDThreshold(caddScaled))
{
return Judgment.create(Benign, calibrated, gene,
"Variant CADD score of " + caddScaled + " is less than " + gavinThresholds
.getSens95thPerCADDThreshold() + " for this gene.");
}
//this rule does not classify apparently, just continue onto the next rules
break;
}
}
// MAF-based classification, calibrated
if (gavinThresholds.isAbovePathoMAFThreshold(exacMAF))
{
return Judgment.create(Benign, calibrated, gene,
"Variant MAF of " + exacMAF + " is greater than " + gavinThresholds.getPathoMAFThreshold() + ".");
}
String mafReason =
"the variant MAF of " + exacMAF + " is less than a MAF of " + gavinThresholds.getPathoMAFThreshold()
+ ".";
// Impact based classification, calibrated
if (impact != null)
{
if (gavinThresholds.getCategory() == I1 && impact == HIGH)
{
return Judgment.create(Pathogenic, calibrated, gene,
"Variant is of high impact, while there are no known high impact variants in the population. Also, "
+ mafReason);
}
if (gavinThresholds.getCategory() == I2 && (impact == MODERATE || impact == HIGH))
{
return Judgment.create(Pathogenic, calibrated, gene,
"Variant is of high/moderate impact, while there are no known high/moderate impact variants in the population. Also, "
+ mafReason);
}
if (gavinThresholds.getCategory() == I3 && (impact == LOW || impact == MODERATE || impact == HIGH))
{
return Judgment.create(Pathogenic, calibrated, gene,
"Variant is of high/moderate/low impact, while there are no known high/moderate/low impact variants in the population. Also, "
+ mafReason);
}
if (impact == MODIFIER)
{
return Judgment.create(Benign, calibrated, gene,
"Variant is of 'modifier' impact, and therefore unlikely to be pathogenic. However, "
+ mafReason);
}
}
//if everything so far has failed, we can still fall back to the genome-wide method
return genomewideClassifyVariant(impact, caddScaled, exacMAF, gene);
}
/**
* Classifies a variant based on genome-wide thresholds
*
* @param impact putative impact of the variant, determined by snpEff
* @param caddScaled scaled cadd score for the variant
* @param exacMAF maf of the variant, determined by exac
* @param gene gene of the variant
* @return the {@link Judgment} of the gavin algorithm
*/
public Judgment genomewideClassifyVariant(Impact impact, Double caddScaled, Double exacMAF, String gene)
{
exacMAF = exacMAF != null ? exacMAF : 0;
if (exacMAF > GENOMEWIDE_MAF_THRESHOLD)
{
return Judgment.create(Benign, genomewide, gene,
"Variant MAF of " + exacMAF + " is not rare enough to generally be considered pathogenic.");
}
if (impact == MODIFIER)
{
return Judgment.create(Benign, genomewide, gene,
"Variant is of 'modifier' impact, and therefore unlikely to be pathogenic.");
}
if (caddScaled != null && caddScaled > GENOMEWIDE_CADD_THRESHOLD)
{
return Judgment.create(Pathogenic, genomewide, gene,
"Variant MAF of " + exacMAF + " is rare enough to be potentially pathogenic and its CADD score of "
+ caddScaled + " is greater than a global threshold of " + GENOMEWIDE_CADD_THRESHOLD + ".");
}
if (caddScaled != null && caddScaled <= GENOMEWIDE_CADD_THRESHOLD)
{
return Judgment.create(Benign, genomewide, gene,
"Variant CADD score of " + caddScaled + " is less than a global threshold of "
+ GENOMEWIDE_CADD_THRESHOLD + ", although the variant MAF of " + exacMAF
+ " is rare enough to be potentially pathogenic.");
}
return Judgment.create(VOUS, genomewide, gene,
"Unable to classify variant as benign or pathogenic. The combination of " + impact
+ " impact, a CADD score of " + caddScaled + " and MAF of " + exacMAF + " in " + gene
+ " is inconclusive.");
}
}