/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.variantcontext.writer;
import htsjdk.variant.bcf2.BCF2Type;
import htsjdk.variant.bcf2.BCF2Utils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
*
* @author Mark DePristo
* @since 06/12
*/
public abstract class BCF2FieldWriter {
private final VCFHeader header;
private final BCF2FieldEncoder fieldEncoder;
protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
this.header = header;
this.fieldEncoder = fieldEncoder;
}
protected VCFHeader getHeader() { return header; }
protected BCF2FieldEncoder getFieldEncoder() {
return fieldEncoder;
}
protected String getField() { return getFieldEncoder().getField(); }
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
fieldEncoder.writeFieldKey(encoder);
}
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
@Override
public String toString() {
return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
}
// --------------------------------------------------------------------------------
//
// Sites writers
//
// --------------------------------------------------------------------------------
public static abstract class SiteWriter extends BCF2FieldWriter {
protected SiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
}
public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
}
public static class GenericSiteWriter extends SiteWriter {
public GenericSiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
}
@Override
public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
final Object rawValue = vc.getAttribute(getField(), null);
final BCF2Type type = getFieldEncoder().getType(rawValue);
if ( rawValue == null ) {
// the value is missing, just write in null
encoder.encodeType(0, type);
} else {
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
encoder.encodeType(valueCount, type);
getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
}
}
}
// --------------------------------------------------------------------------------
//
// Genotypes writers
//
// --------------------------------------------------------------------------------
public static abstract class GenotypesWriter extends BCF2FieldWriter {
int nValuesPerGenotype = -1;
BCF2Type encodingType = null;
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
if ( fieldEncoder.hasConstantNumElements() ) {
nValuesPerGenotype = getFieldEncoder().numElements();
}
}
@Override
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
// writes the key information
super.start(encoder, vc);
// only update if we need to
if ( ! getFieldEncoder().hasConstantNumElements() ) {
if ( getFieldEncoder().hasContextDeterminedNumElements() )
// we are cheap -- just depends on genotype of allele counts
nValuesPerGenotype = getFieldEncoder().numElements(vc);
else
// we have to go fishing through the values themselves (expensive)
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
}
encoder.encodeType(nValuesPerGenotype, encodingType);
}
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
final Object fieldValue = g.getExtendedAttribute(getField(), null);
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
}
protected int numElements(final VariantContext vc, final Genotype g) {
return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField()));
}
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
int size = -1;
for ( final Genotype g : vc.getGenotypes() ) {
size = Math.max(size, numElements(vc, g));
}
return size;
}
}
public static class StaticallyTypeGenotypesWriter extends GenotypesWriter {
public StaticallyTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
encodingType = getFieldEncoder().getStaticType();
}
}
public static class IntegerTypeGenotypesWriter extends GenotypesWriter {
public IntegerTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
}
@Override
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
// the only value that is dynamic are integers
final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
for ( final Integer i : BCF2Utils.toList(Integer.class, g.getExtendedAttribute(getField(), null)) ) {
if ( i != null ) values.add(i);
}
}
encodingType = BCF2Utils.determineIntegerType(values);
super.start(encoder, vc);
}
}
public static class IGFGenotypesWriter extends GenotypesWriter {
final IntGenotypeFieldAccessors.Accessor ige;
public IGFGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder, final IntGenotypeFieldAccessors.Accessor ige) {
super(header, fieldEncoder);
this.ige = ige;
if ( ! (fieldEncoder instanceof BCF2FieldEncoder.IntArray) )
throw new IllegalArgumentException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField());
}
@Override
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
// TODO
// TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration
// TODO
encodingType = BCF2Type.INT8;
for ( final Genotype g : vc.getGenotypes() ) {
final int[] pls = ige.getValues(g);
final BCF2Type plsType = getFieldEncoder().getType(pls);
encodingType = BCF2Utils.maxIntegerType(encodingType, plsType);
if ( encodingType == BCF2Type.INT32 )
break; // stop early
}
super.start(encoder, vc);
}
@Override
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
getFieldEncoder().encodeValue(encoder, ige.getValues(g), encodingType, nValuesPerGenotype);
}
@Override
protected int numElements(final VariantContext vc, final Genotype g) {
return ige.getSize(g);
}
}
public static class FTGenotypesWriter extends StaticallyTypeGenotypesWriter {
public FTGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
}
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
final String fieldValue = g.getFilters();
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
}
@Override
protected int numElements(final VariantContext vc, final Genotype g) {
return getFieldEncoder().numElements(vc, g.getFilters());
}
}
public static class GTWriter extends GenotypesWriter {
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
Allele ref, alt1;
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
super(header, fieldEncoder);
}
@Override
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
throw new IllegalStateException("Current BCF2 encoder cannot handle sites " +
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
encodingType = BCF2Type.INT8;
buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy(2);
super.start(encoder, vc);
}
@Override
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
final int samplePloidy = g.getPloidy();
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
if ( i < samplePloidy ) {
// we encode the actual allele
final Allele a = g.getAllele(i);
final int offset = getAlleleOffset(a);
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
encoder.encodeRawBytes(encoded, encodingType);
} else {
// we need to pad with missing as we have ploidy < max for this sample
encoder.encodeRawBytes(encodingType.getMissingBytes(), encodingType);
}
}
}
/**
* Fast path code to determine the offset.
*
* Inline tests for == against ref (most common, first test)
* == alt1 (second most common, second test)
* == NO_CALL (third)
* and finally in the map from allele => offset for all alt 2+ alleles
*
* @param a the allele whose offset we wish to determine
* @return the offset (from 0) of the allele in the list of variant context alleles (-1 means NO_CALL)
*/
private final int getAlleleOffset(final Allele a) {
if ( a == ref ) return 0;
else if ( a == alt1 ) return 1;
else if ( a == Allele.NO_CALL ) return -1;
else {
final Integer o = alleleMapForTriPlus.get(a);
if ( o == null ) throw new IllegalStateException("BUG: Couldn't find allele offset for allele " + a);
return o;
}
}
private final void buildAlleleMap(final VariantContext vc) {
// these are fast path options to determine the offsets for
final int nAlleles = vc.getNAlleles();
ref = vc.getReference();
alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
if ( nAlleles > 2 ) {
// for multi-allelics we need to clear the map, and add additional looks
alleleMapForTriPlus.clear();
final List<Allele> alleles = vc.getAlleles();
for ( int i = 2; i < alleles.size(); i++ ) {
alleleMapForTriPlus.put(alleles.get(i), i);
}
}
}
}
}