/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.variantcontext;
import htsjdk.tribble.util.ParsingUtils;
import htsjdk.variant.vcf.VCFConstants;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
/**
* This class encompasses all the basic information about a genotype. It is immutable.
*
* @author Mark DePristo
*/
public abstract class Genotype implements Comparable<Genotype> {
/**
* A list of genotype field keys corresponding to values we
* manage inline in the Genotype object. They must not appear in the
* extended attributes map
*/
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
VCFConstants.GENOTYPE_FILTER_KEY,
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
VCFConstants.GENOTYPE_PL_KEY);
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
private final String sampleName;
private GenotypeType type = null;
private final String filters;
protected Genotype(final String sampleName, final String filters) {
this.sampleName = sampleName;
this.filters = filters;
}
/**
* @return the alleles for this genotype. Cannot be null. May be empty
*/
public abstract List<Allele> getAlleles();
/**
* Returns how many times allele appears in this genotype object?
*
* @param allele
* @return a value >= 0 indicating how many times the allele occurred in this sample's genotype
*/
public int countAllele(final Allele allele) {
int c = 0;
for ( final Allele a : getAlleles() )
if ( a.equals(allele) )
c++;
return c;
}
/**
* Get the ith allele in this genotype
*
* @param i the ith allele, must be < the ploidy, starting with 0
* @return the allele at position i, which cannot be null
*/
public abstract Allele getAllele(int i);
/**
* Are the alleles phased w.r.t. the global phasing system?
*
* @return true if yes
*/
public abstract boolean isPhased();
/**
* What is the ploidy of this sample?
*
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
public int getPloidy() {
return getAlleles().size();
}
/**
* @return the sequencing depth of this sample, or -1 if this value is missing
*/
public abstract int getDP();
/**
* @return the count of reads, one for each allele in the surrounding Variant context,
* matching the corresponding allele, or null if this value is missing. MUST
* NOT BE MODIFIED!
*/
public abstract int[] getAD();
/**
* Returns the name associated with this sample.
*
* @return a non-null String
*/
public String getSampleName() {
return sampleName;
}
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
public abstract int getGQ();
/**
* Does the PL field have a value?
* @return true if there's a PL field value
*/
public boolean hasPL() {
return getPL() != null;
}
/**
* Does the AD field have a value?
* @return true if there's a AD field value
*/
public boolean hasAD() {
return getAD() != null;
}
/**
* Does the GQ field have a value?
* @return true if there's a GQ field value
*/
public boolean hasGQ() {
return getGQ() != -1;
}
/**
* Does the DP field have a value?
* @return true if there's a DP field value
*/
public boolean hasDP() {
return getDP() != -1;
}
// ---------------------------------------------------------------------------------------------------------
//
// The type of this genotype
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return the high-level type of this sample's genotype
*/
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
return type;
}
/**
* Internal code to determine the type of the genotype from the alleles vector
* @return the type
*/
protected GenotypeType determineType() {
// TODO -- this code is slow and could be optimized for the diploid case
final List<Allele> alleles = getAlleles();
if ( alleles.isEmpty() )
return GenotypeType.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
for ( final Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
if ( sawNoCall ) {
if ( observedAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new IllegalStateException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if we're het (observed alleles differ) and neither allele is reference; if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHetNonRef() { return (getType() == GenotypeType.HET && getAllele(0).isNonReference() && getAllele(1).isNonReference()); }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
// ------------------------------------------------------------------------------
//
// methods for getting genotype likelihoods for a genotype object, if present
//
// ------------------------------------------------------------------------------
/**
* @return Returns true if this Genotype has PL field values
*/
public boolean hasLikelihoods() {
return getPL() != null;
}
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return a non-null String representation for the PL of this sample
*/
public String getLikelihoodsString() {
return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4;
}
/**
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
* @return null or a GenotypesLikelihood object for this sample's PL field
*/
public GenotypeLikelihoods getLikelihoods() {
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
}
/**
* Are all likelihoods for this sample non-informative?
*
* Returns true if all PLs are 0 => 0,0,0 => true
* 0,0,0,0,0,0 => true
* 0,10,100 => false
*
* @return true if all samples PLs are equal and == 0
*/
public boolean isNonInformative() {
if ( getPL() == null )
return true;
else {
for ( final int PL : getPL() ) {
if ( PL != 0 )
return false;
}
return true;
}
}
/**
* Unsafe low-level accessor the PL field itself, may be null.
*
* @return a pointer to the underlying PL data. MUST NOT BE MODIFIED!
*/
public abstract int[] getPL();
// ---------------------------------------------------------------------------------------------------------
//
// Many different string representations
//
// ---------------------------------------------------------------------------------------------------------
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* Does not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
public String getGenotypeString() {
return getGenotypeString(true);
}
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* If ignoreRefState is true, will not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
public String getGenotypeString(boolean ignoreRefState) {
if ( getPloidy() == 0 )
return "NA";
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? (Collection) getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
/**
* Utility that returns a list of allele strings corresponding to the alleles in this sample
* @return
*/
protected List<String> getAlleleStrings() {
final List<String> al = new ArrayList<String>(getPloidy());
for ( Allele a : getAlleles() )
al.add(a.getBaseString());
return al;
}
public String toString() {
return String.format("[%s %s%s%s%s%s%s%s]",
getSampleName(),
getGenotypeString(false),
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
toStringIfExists(VCFConstants.GENOTYPE_FILTER_KEY, getFilters()),
sortedString(getExtendedAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getGQ());
}
// ---------------------------------------------------------------------------------------------------------
//
// Comparison operations
//
// ---------------------------------------------------------------------------------------------------------
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
public boolean sameGenotype(final Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines for extended attributes
//
// ---------------------------------------------------------------------------------------------------------
/**
* Returns the extended attributes for this object
* @return is never null, but is often isEmpty()
*/
public abstract Map<String, Object> getExtendedAttributes();
/**
* Is key associated with a value (even a null one) in the extended attributes?
*
* Note this will not return true for the inline attributes DP, GQ, AD, or PL
*
* @param key a non-null string key to check for an association
* @return true if key has a value in the extendedAttributes
*/
public boolean hasExtendedAttribute(final String key) {
return getExtendedAttributes().containsKey(key);
}
/**
* Get the extended attribute value associated with key, if possible
*
* @param key a non-null string key to fetch a value for
* @param defaultValue the value to return if key isn't in the extended attributes
* @return a value (potentially) null associated with key, or defaultValue if no association exists
*/
public Object getExtendedAttribute(final String key, final Object defaultValue) {
return hasExtendedAttribute(key) ? getExtendedAttributes().get(key) : defaultValue;
}
/**
* Same as #getExtendedAttribute with a null default
*
* @param key
* @return
*/
public Object getExtendedAttribute(final String key) {
return getExtendedAttribute(key, null);
}
/**
* Returns the filter string associated with this Genotype.
*
* @return If this result == null, then the genotype is considered PASSing filters
* If the result != null, then the genotype has failed filtering for the reason(s)
* specified in result. To be reference compliant multiple filter field
* string values can be encoded with a ; separator.
*/
public final String getFilters() {
return filters;
}
/**
* Is this genotype filtered or not?
*
* @return returns false if getFilters() == null
*/
public final boolean isFiltered() {
return getFilters() != null;
}
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
@Deprecated public int getPhredScaledQual() { return getGQ(); }
@Deprecated
public String getAttributeAsString(String key, String defaultValue) {
Object x = getExtendedAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof String ) return (String)x;
return String.valueOf(x); // throws an exception if this isn't a string
}
@Deprecated
public int getAttributeAsInt(String key, int defaultValue) {
Object x = getExtendedAttribute(key);
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
if ( x instanceof Integer ) return (Integer)x;
return Integer.valueOf((String)x); // throws an exception if this isn't a string
}
@Deprecated
public double getAttributeAsDouble(String key, double defaultValue) {
Object x = getExtendedAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
/**
* A totally generic getter, that allows you to specific keys that correspond
* to even inline values (GQ, for example). Can be very expensive. Additionally,
* all int[] are converted inline into List<Integer> for convenience.
*
* @param key
* @return
*/
public Object getAnyAttribute(final String key) {
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
return getAlleles();
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
return getGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return Arrays.asList(getAD());
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
return Arrays.asList(getPL());
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return getDP();
} else {
return getExtendedAttribute(key);
}
}
public boolean hasAnyAttribute(final String key) {
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
return isAvailable();
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
return hasGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return hasAD();
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
return hasPL();
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return hasDP();
} else {
return hasExtendedAttribute(key);
}
}
// TODO -- add getAttributesAsX interface here
// ------------------------------------------------------------------------------
//
// private utilities
//
// ------------------------------------------------------------------------------
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
protected static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
final List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
final List<String> pairs = new ArrayList<String>();
for (final T k : t) {
pairs.add(k + "=" + c.get(k));
}
return pairs.isEmpty() ? "" : " {" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
/**
* Returns a display name for field name with value v if this isn't -1. Otherwise returns ""
* @param name of the field ("AD")
* @param v the value of the field, or -1 if missing
* @return a non-null string for display if the field is not missing
*/
protected final static String toStringIfExists(final String name, final int v) {
return v == -1 ? "" : " " + name + " " + v;
}
/**
* Returns a display name for field name with String value v if this isn't null. Otherwise returns ""
* @param name of the field ("FT")
* @param v the value of the field, or null if missing
* @return a non-null string for display if the field is not missing
*/
protected final static String toStringIfExists(final String name, final String v) {
return v == null ? "" : " " + name + " " + v;
}
/**
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
* @param name of the field ("AD")
* @param vs the value of the field, or null if missing
* @return a non-null string for display if the field is not missing
*/
protected final static String toStringIfExists(final String name, final int[] vs) {
if ( vs == null )
return "";
else {
StringBuilder b = new StringBuilder();
b.append(" ").append(name).append(" ");
for ( int i = 0; i < vs.length; i++ ) {
if ( i != 0 ) b.append(",");
b.append(vs[i]);
}
return b.toString();
}
}
/**
* Does the attribute map have a mapping involving a forbidden key (i.e.,
* one that's managed inline by this Genotypes object?
*
* @param attributes the extended attributes key
* @return
*/
protected final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
for ( final String forbidden : PRIMARY_KEYS)
if ( attributes.containsKey(forbidden) )
return true;
return false;
}
protected final static boolean isForbiddenKey(final String key) {
return PRIMARY_KEYS.contains(key);
}
}