/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.variantcontext;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* Lazy-loading GenotypesContext. A lazy-loading context has access to the
* VCFParser and a unparsed string of genotype data. If the user attempts to manipulate
* the genotypes contained in this context, we decode the data and become a full blown
* GenotypesContext. However, if the user never does this we are spared a lot of expense
* decoding the genotypes unnecessarily.
*/
public class LazyGenotypesContext extends GenotypesContext {
/** The LazyParser we'll use to decode unparsedGenotypeData if necessary */
final LazyParser parser;
Object unparsedGenotypeData;
/**
* nUnparsedGenotypes the number of genotypes contained in the unparsedGenotypes data
* (known already in the parser). Useful for isEmpty and size() optimizations
*/
final int nUnparsedGenotypes;
/**
* True if we've already decoded the values in unparsedGenotypeData
*/
boolean loaded = false;
private final static ArrayList<Genotype> EMPTY = new ArrayList<Genotype>(0);
/**
* Simple lazy parser interface. Provide an object implementing this
* interface to LazyGenotypesContext, and it's parse method will be called
* when the use of the lazy context requires the underlying genotypes data
* be parsed into Genotype objects. The data argument is the data provided
* to the LazyGenotypesContext holding encoded genotypes data
*/
public interface LazyParser {
public LazyData parse(Object data);
}
/**
* Returns the data used in the full GenotypesContext constructor
*
* {@link GenotypesContext#GenotypesContext(java.util.ArrayList, java.util.Map, java.util.List)}
*/
public static class LazyData {
final ArrayList<Genotype> genotypes;
final Map<String, Integer> sampleNameToOffset;
final List<String> sampleNamesInOrder;
public LazyData(final ArrayList<Genotype> genotypes,
final List<String> sampleNamesInOrder,
final Map<String, Integer> sampleNameToOffset) {
this.genotypes = genotypes;
this.sampleNamesInOrder = sampleNamesInOrder;
this.sampleNameToOffset = sampleNameToOffset;
}
}
/**
* Creates a new lazy loading genotypes context using the LazyParser to create
* genotypes data on demand.
*
* @param parser the parser to be used to load on-demand genotypes data
* @param unparsedGenotypeData the encoded genotypes data that we will decode if necessary
* @param nUnparsedGenotypes the number of genotypes that will be produced if / when we actually decode the genotypes data
*/
public LazyGenotypesContext(final LazyParser parser, final Object unparsedGenotypeData, final int nUnparsedGenotypes) {
super(EMPTY);
this.parser = parser;
this.unparsedGenotypeData = unparsedGenotypeData;
this.nUnparsedGenotypes = nUnparsedGenotypes;
}
/**
* Overrides the genotypes accessor. If we haven't already, decode the genotypes data
* and store the decoded results in the appropriate variables. Otherwise we just
* returned the decoded result directly. Note some care needs to be taken here as
* the value in notToBeDirectlyAccessedGenotypes may diverge from what would be produced
* by decode, if after the first decode the genotypes themselves are replaced
* @return
*/
@Override
protected ArrayList<Genotype> getGenotypes() {
decode();
return notToBeDirectlyAccessedGenotypes;
}
/**
* Force us to decode the genotypes, if not already done
*/
public void decode() {
if ( ! loaded ) {
//System.out.printf("Loading genotypes... %s:%d%n", contig, start);
LazyData parsed = parser.parse(unparsedGenotypeData);
notToBeDirectlyAccessedGenotypes = parsed.genotypes;
sampleNamesInOrder = parsed.sampleNamesInOrder;
sampleNameToOffset = parsed.sampleNameToOffset;
loaded = true;
unparsedGenotypeData = null; // don't hold the unparsed data any longer
// warning -- this path allows us to create a VariantContext that doesn't run validateGenotypes()
// That said, it's not such an important routine -- it's just checking that the genotypes
// are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec
}
}
/**
* Overrides the ensure* functionality. If the data hasn't been loaded
* yet and we want to build the cache, just decode it and we're done. If we've
* already decoded the data, though, go through the super class
*/
@Override
protected synchronized void ensureSampleNameMap() {
if ( ! loaded ) {
decode(); // will load up all of the necessary data
} else {
super.ensureSampleNameMap();
}
}
@Override
protected synchronized void ensureSampleOrdering() {
if ( ! loaded ) {
decode(); // will load up all of the necessary data
} else {
super.ensureSampleOrdering();
}
}
@Override
protected void invalidateSampleNameMap() {
// if the cache is invalidated, and we haven't loaded our data yet, do so
if ( ! loaded ) decode();
super.invalidateSampleNameMap();
}
@Override
protected void invalidateSampleOrdering() {
// if the cache is invalidated, and we haven't loaded our data yet, do so
if ( ! loaded ) decode();
super.invalidateSampleOrdering();
}
@Override
public boolean isEmpty() {
// optimization -- we know the number of samples in the unparsed data, so use it here to
// avoid parsing just to know if the genotypes context is empty
return loaded ? super.isEmpty() : nUnparsedGenotypes == 0;
}
@Override
public int size() {
// optimization -- we know the number of samples in the unparsed data, so use it here to
// avoid parsing just to know the size of the context
return loaded ? super.size() : nUnparsedGenotypes;
}
public Object getUnparsedGenotypeData() {
return unparsedGenotypeData;
}
}