/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.variant.vcf;
import org.apache.log4j.Logger;
import org.broad.igv.variant.Allele;
import org.broad.igv.variant.Genotype;
import org.broad.igv.variant.Variant;
import org.broad.igv.variant.VariantTrack;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import java.util.*;
/**
* @author Jim Robinson, jacob
* @date Aug 1, 2011
*/
public class VCFVariant implements Variant {
private static Logger log = Logger.getLogger(Variant.class);
VariantContext variantContext;
List<Allele> alternateAlleles;
// private ZygosityCount zygosityCount;
String chr;
private double[] alleleFreqs;
private int[] alleleCounts;
private double methylationRate = Double.NaN; // <= signals unknown / not applicable
private double coveredSampleFraction = Double.NaN;
Map<String, VCFGenotype> genotypeMap;
private int start = -1;
private int totalAlleleCount = 0;
public VCFVariant(VariantContext variantContext, String chr) {
this.variantContext = variantContext;
this.chr = chr;
init();
}
private void init() {
// Copy the genotype map. Calls to variantContext.getGenotype() are expensive
genotypeMap = new HashMap<String, VCFGenotype>();
for (String sample : getSampleNames()) {
htsjdk.variant.variantcontext.Genotype genotype = variantContext.getGenotype(sample);
VCFGenotype vcfGenotype = genotype == null ? null : new VCFGenotype(genotype);
genotypeMap.put(sample, vcfGenotype);
}
// zygosityCount = new ZygosityCount();
// for (String sample : getSampleNames()) {
// Genotype genotype = getGenotype(sample);
// zygosityCount.incrementCount(genotype);
// }
String afString = null;
String[] alleleFreqKeys = {"AF", "GMAF"};
try {
for (String alleleFreqKey : alleleFreqKeys) {
afString = variantContext.getAttributeAsString(alleleFreqKey, "-1");
alleleFreqs = parseDoubleArrayString(afString);
if (alleleFreqs[0] >= 0) break;
}
} catch (NumberFormatException e) {
alleleFreqs = new double[]{-1};
log.error("Error parsing allele frequency: " + afString);
}
String acKey = "AC";
String acString = variantContext.getAttributeAsString(acKey, null);
if (acString != null) {
try {
alleleCounts = parseIntArrayString(acString);
} catch (NumberFormatException e) {
log.error("Error parsing allele counts:" + acString);
}
}
String anKey = "AN";
String anString = variantContext.getAttributeAsString(anKey, null);
if(anString != null) {
try {
totalAlleleCount = Integer.parseInt(anString);
} catch (NumberFormatException e) {
log.error("Error parsing 'AN' attribute: " + anString);
}
}
}
/**
* Allele frequency is a comma separated list of doubles
* We strip away brackets and parentheses
*
* @param afString
* @return
*/
private double[] parseDoubleArrayString(String afString) throws NumberFormatException {
afString = afString.replaceAll("[\\[\\]\\(\\)]", "");
String[] tokens = afString.split(",");
double[] result = new double[tokens.length];
for (int ii = 0; ii < tokens.length; ii++) {
result[ii] = Double.parseDouble(tokens[ii].trim());
}
return result;
}
private int[] parseIntArrayString(String afString) throws NumberFormatException {
afString = afString.replaceAll("[\\[\\]\\(\\)]", "");
String[] tokens = afString.split(",");
int[] result = new int[tokens.length];
for (int ii = 0; ii < tokens.length; ii++) {
result[ii] = Integer.parseInt(tokens[ii].trim());
}
return result;
}
/**
* Compute the average methylation rate for those samples with data (i.e. with methylation rate recorded).
*/
private void computeMethylationRate() {
double methTotal = 0;
int samplesWithData = 0;
final int size = getSampleNames().size();
if (size > 0) {
for (String sample : getSampleNames()) {
Genotype genotype = getGenotype(sample);
double mr = genotype.getAttributeAsDouble("MR");
double goodBaseCount = genotype.getAttributeAsDouble("MR");
if (!Double.isNaN(mr) && !Double.isNaN(goodBaseCount) && goodBaseCount > VariantTrack.METHYLATION_MIN_BASE_COUNT) {
methTotal += mr;
samplesWithData++;
}
}
methylationRate = samplesWithData == 0 ? 0 : methTotal / samplesWithData;
coveredSampleFraction = ((double) samplesWithData) / size;
}
}
public String getID() {
return variantContext.getID();
}
public boolean isFiltered() {
return variantContext.isFiltered();
}
public String getAttributeAsString(String key) {
return variantContext.getAttributeAsString(key, null);
}
public String getReference() {
return variantContext.getReference().toString();
}
public List<Allele> getAlternateAlleles() {
if (alternateAlleles == null) {
List<htsjdk.variant.variantcontext.Allele> tmp = variantContext.getAlternateAlleles();
alternateAlleles = new ArrayList<Allele>(tmp.size());
for (htsjdk.variant.variantcontext.Allele a : tmp) {
alternateAlleles.add(new VCFAllele(a.getBases()));
}
}
return alternateAlleles;
}
public double getPhredScaledQual() {
return variantContext.getPhredScaledQual();
}
public String getType() {
return variantContext.getType().toString();
}
/**
* Return the allele frequency as annotated with an AF or GMAF attribute. A value of -1 indicates
* no annotation (unknown allele frequency).
*/
public double[] getAlleleFreqs() {
return alleleFreqs;
}
@Override
public double getAlternateAlleleFrequency() {
double af = 0;
double[] afreqs = getAlleleFreqs();
if (afreqs != null) {
for (int i = 0; i < afreqs.length; i++) {
af += afreqs[i];
}
}
return af;
}
@Override
public int[] getAlleleCounts() {
return alleleCounts;
}
public int getTotalAlleleCount() {
return totalAlleleCount;
}
public double getAlleleFraction() {
if(alleleCounts != null && alleleCounts.length > 0 && totalAlleleCount > 0) {
double ac = 0;
for(int i=0; i<alleleCounts.length; i++) {
ac += alleleCounts[i];
}
return ac / totalAlleleCount;
}
else {
return -1;
}
}
/**
* Return the methylation rate as annoted with a MR attribute. A value of -1 indicates
* no annotation (unknown methylation rate). This option is only applicable for dna methylation data.
*/
public double getMethlationRate() {
if (Double.isNaN(methylationRate)) {
computeMethylationRate();
}
return methylationRate;
}
public double getCoveredSampleFraction() {
if (Double.isNaN(coveredSampleFraction)) {
computeMethylationRate();
}
return coveredSampleFraction;
}
public Collection<String> getSampleNames() {
return variantContext.getSampleNames();
}
public Map<String, Object> getAttributes() {
return variantContext.getAttributes();
}
@Override
public Genotype getGenotype(String sample) {
return genotypeMap.get(sample);
}
public Collection<String> getFilters() {
return variantContext.getFilters();
}
@Override
public String getChr() {
return chr;
}
@Override
public String getContig() {
return chr;
}
@Override
public int getStart() {
if (this.start < 0) {
calcStart();
}
return this.start;
}
@Override
public int getEnd() {
return variantContext.getEnd();
}
@Override
public String toString() {
return String.format("VCFVariant[%s:%d-%d]", getChr(), getStart(), getEnd());
}
@Override
public String getPositionString() {
if (variantContext.getStart() == variantContext.getEnd()) {
return String.valueOf(variantContext.getStart());
} else {
return String.format("%d-%d", variantContext.getStart(), variantContext.getEnd());
}
}
public String getSource() {
return variantContext.getSource();
}
public VariantContext getVariantContext() {
return variantContext;
}
public static VariantContext getVariantContext(Variant variant) {
if (variant instanceof VCFVariant) {
return ((VCFVariant) variant).getVariantContext();
} else {
List<htsjdk.variant.variantcontext.Allele> alleleList = new ArrayList<htsjdk.variant.variantcontext.Allele>(variant.getAlternateAlleles().size() + 1);
alleleList.add(htsjdk.variant.variantcontext.Allele.create(variant.getReference(), true));
for (Allele all : variant.getAlternateAlleles()) {
alleleList.add(htsjdk.variant.variantcontext.Allele.create(all.getBases(), false));
}
VariantContextBuilder vcb = new VariantContextBuilder(variant.getID(), variant.getChr(), variant.getStart(), variant.getEnd(), alleleList);
return vcb.make();
}
}
/**
* VCFs specify padding bases at the beginning of indels so they can be positioned properly.
* We display the variant only where it actually differs from the reference. So we find the longest
* common prefix between the reference and variants
*/
private void calcStart() {
int prefixLength = 0;
if (variantContext.getType() == VariantContext.Type.INDEL || variantContext.getType() == VariantContext.Type.MIXED) {
prefixLength = findCommonPrefixLength();
}
this.start = (variantContext.getStart() - 1) + prefixLength;
}
/**
* Find the length of the common prefix between the reference and ALL
* variant alleles
*
* @return
*/
private int findCommonPrefixLength() {
String ref = variantContext.getReference().getDisplayString();
int prefixLength = 0;
boolean foundmisMatch = false;
for (int refPos = 0; refPos < ref.length(); refPos++) {
char refChar = ref.charAt(refPos);
for (Allele var : getAlternateAlleles()) {
byte[] varBases = var.getBases();
if (refPos >= varBases.length || varBases[refPos] != refChar) {
foundmisMatch = true;
break;
}
}
if (foundmisMatch) {
break;
} else {
prefixLength++;
}
}
return prefixLength;
}
}