/*
* The MIT License
*
* Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.vcf;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.vcf.VCFFileReader;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.VcfOrBcf;
import picard.vcf.GenotypeConcordanceStates.*;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static htsjdk.variant.variantcontext.VariantContext.Type.*;
/**
* Calculates the concordance between genotype data for two samples in two different VCFs - one being considered the truth (or reference)
* the other being the call. The concordance is broken into separate results sections for SNPs and indels. Summary and detailed statistics
* are reported
*
* @author Tim Fennell
* @author George Grant
*/
@CommandLineProgramProperties(
usage = "Calculates the concordance between genotype data for two samples in two different VCFs - one being considered the truth (or reference) " +
"the other being considered the call. The concordance is broken into separate results sections for SNPs and indels. Summary and detailed statistics are reported\n\n" +
"Note that for any pair of variants to compare, only the alleles for the samples under interrogation are considered " +
"and MNP, Symbolic, and Mixed classes of variants are not included.",
usageShort = "Calculates the concordance between genotype data for two samples in two different VCFs",
programGroup = VcfOrBcf.class
)
public class GenotypeConcordance extends CommandLineProgram {
@Option(shortName = "TV", doc="The VCF containing the truth sample")
public File TRUTH_VCF;
@Option(shortName = "CV", doc="The VCF containing the call sample")
public File CALL_VCF;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Basename for the two metrics files that are to be written." +
" Resulting files will be <OUTPUT>" + SUMMARY_METRICS_FILE_EXTENSION + " and <OUTPUT>" + DETAILED_METRICS_FILE_EXTENSION + ".")
public File OUTPUT;
@Option(shortName = "TS", doc="The name of the truth sample within the truth VCF")
public String TRUTH_SAMPLE;
@Option(shortName = "CS", doc="The name of the call sample within the call VCF")
public String CALL_SAMPLE;
@Option(doc="One or more interval list files that will be used to limit the genotype concordance.")
public List<File> INTERVALS;
@Option(doc="If true, multiple interval lists will be intersected. If false multiple lists will be unioned.")
public boolean INTERSECT_INTERVALS = true;
@Option(doc="Genotypes below this genotype quality will have genotypes classified as LowGq.")
public int MIN_GQ = 0;
@Option(doc="Genotypes below this depth will have genotypes classified as LowDp.")
public int MIN_DP = 0;
@Option(doc="If true, output all rows in detailed statistics even when count == 0. When false only output rows with non-zero counts.")
public boolean OUTPUT_ALL_ROWS = false;
@Option(doc="If true, use the VCF index, else iterate over the entire VCF.", optional = true)
public boolean USE_VCF_INDEX = false;
private final Log log = Log.getInstance(GenotypeConcordance.class);
private final ProgressLogger progress = new ProgressLogger(log, 10000, "checked", "variants");
public static final String SUMMARY_METRICS_FILE_EXTENSION = ".genotype_concordance_summary_metrics";
public static final String DETAILED_METRICS_FILE_EXTENSION = ".genotype_concordance_detail_metrics";
public static final String CONTINGENCY_METRICS_FILE_EXTENSION = ".genotype_concordance_contingency_metrics";
protected GenotypeConcordanceCounts snpCounter;
public GenotypeConcordanceCounts getSnpCounter() { return snpCounter; }
protected GenotypeConcordanceCounts indelCounter;
public GenotypeConcordanceCounts getIndelCounter() { return indelCounter; }
// TODO: add optimization if the samples are in the same file
// TODO: add option for auto-detect pairs based on same sample name
// TODO: allow multiple sample-pairs in one pass
public static void main(final String[] args) {
new GenotypeConcordance().instanceMainWithExit(args);
}
@Override protected int doWork() {
IOUtil.assertFileIsReadable(TRUTH_VCF);
IOUtil.assertFileIsReadable(CALL_VCF);
final File summaryMetricsFile = new File(OUTPUT + SUMMARY_METRICS_FILE_EXTENSION);
final File detailedMetricsFile = new File(OUTPUT + DETAILED_METRICS_FILE_EXTENSION);
final File contingencyMetricsFile = new File(OUTPUT + CONTINGENCY_METRICS_FILE_EXTENSION);
IOUtil.assertFileIsWritable(summaryMetricsFile);
IOUtil.assertFileIsWritable(detailedMetricsFile);
IOUtil.assertFileIsWritable(contingencyMetricsFile);
final boolean usingIntervals = this.INTERVALS != null && this.INTERVALS.size() > 0;
IntervalList intervals = null;
SAMSequenceDictionary intervalsSamSequenceDictionary = null;
if (usingIntervals) {
log.info("Loading up region lists.");
long genomeBaseCount = 0;
for (final File f : INTERVALS) {
IOUtil.assertFileIsReadable(f);
final IntervalList tmpIntervalList = IntervalList.fromFile(f);
if (genomeBaseCount == 0) { // Don't count the reference length more than once.
intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
}
if (intervals == null) intervals = tmpIntervalList;
else if (INTERSECT_INTERVALS) intervals = IntervalList.intersection(intervals, tmpIntervalList);
else intervals = IntervalList.union(intervals, tmpIntervalList);
}
if (intervals != null) {
intervals = intervals.uniqued();
}
log.info("Finished loading up region lists.");
}
final VCFFileReader truthReader = new VCFFileReader(TRUTH_VCF, USE_VCF_INDEX);
final VCFFileReader callReader = new VCFFileReader(CALL_VCF, USE_VCF_INDEX);
// Check that the samples actually exist in the files!
if (!truthReader.getFileHeader().getGenotypeSamples().contains(TRUTH_SAMPLE)) {
throw new PicardException("File " + TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + TRUTH_SAMPLE);
}
if (!callReader.getFileHeader().getGenotypeSamples().contains(CALL_SAMPLE)) {
throw new PicardException("File " + CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + CALL_SAMPLE);
}
// Verify that both VCFs have the same Sequence Dictionary
SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());
if (usingIntervals) {
// If using intervals, verify that the sequence dictionaries agree with those of the VCFs
SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
}
// Build the pair of iterators over the regions of interest
final Iterator<VariantContext> truthIterator, callIterator;
if (usingIntervals) {
truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
}
else {
truthIterator = truthReader.iterator();
callIterator = callReader.iterator();
}
// Now do the iteration and count things up
final PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator(truthIterator, TRUTH_SAMPLE, callIterator, CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
snpCounter = new GenotypeConcordanceCounts();
indelCounter = new GenotypeConcordanceCounts();
// A map to keep track of the count of Truth/Call States which we could not successfully classify
final Map<String, Integer> unClassifiedStatesMap = new HashMap<String, Integer>();
log.info("Starting iteration over variants.");
while (pairedIterator.hasNext()) {
final VcTuple tuple = pairedIterator.next();
final VariantContext.Type truthVariantContextType = tuple.truthVariantContext != null ? tuple.truthVariantContext.getType() : NO_VARIATION;
final VariantContext.Type callVariantContextType = tuple.callVariantContext != null ? tuple.callVariantContext.getType() : NO_VARIATION;
// A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
// Unclassified include MIXED/MNP/Symbolic...
boolean stateClassified = false;
final TruthAndCallStates truthAndCallStates = determineState(tuple.truthVariantContext, TRUTH_SAMPLE, tuple.callVariantContext, CALL_SAMPLE, MIN_GQ, MIN_DP);
if (truthVariantContextType == SNP) {
if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
// Note. If truth is SNP and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
snpCounter.increment(truthAndCallStates);
stateClassified = true;
}
}
else if (truthVariantContextType == INDEL) {
// Note. If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
}
else if (truthVariantContextType == MIXED) {
// Note. If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
if (callVariantContextType == SNP) {
snpCounter.increment(truthAndCallStates);
stateClassified = true;
}
// Note. If truth is MIXED and call is INDEL, the event will be logged in the snpCounter, with column = MIXED
else if (callVariantContextType == INDEL) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
}
else if (truthVariantContextType == NO_VARIATION) {
if (callVariantContextType == SNP) {
snpCounter.increment(truthAndCallStates);
stateClassified = true;
}
else if (callVariantContextType == INDEL) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
}
if (!stateClassified) {
final String condition = truthVariantContextType + " " + callVariantContextType;
Integer count = unClassifiedStatesMap.get(condition);
if (count == null) count = 0;
unClassifiedStatesMap.put(condition, ++count);
}
final VariantContext variantContextForLogging = tuple.truthVariantContext != null ? tuple.truthVariantContext : tuple.callVariantContext;
progress.record(variantContextForLogging.getChr(), variantContextForLogging.getStart());
}
// Calculate and store the summary-level metrics
final MetricsFile<GenotypeConcordanceSummaryMetrics,?> genotypeConcordanceSummaryMetricsFile = getMetricsFile();
GenotypeConcordanceSummaryMetrics summaryMetrics = new GenotypeConcordanceSummaryMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
summaryMetrics = new GenotypeConcordanceSummaryMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
genotypeConcordanceSummaryMetricsFile.write(summaryMetricsFile);
// Calculate and store the detailed metrics for both SNP and indels
final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetrics = getMetricsFile();
outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceDetailMetrics.write(detailedMetricsFile);
// Calculate and score the contingency metrics
final MetricsFile<GenotypeConcordanceContingencyMetrics,?> genotypeConcordanceContingencyMetricsFile = getMetricsFile();
GenotypeConcordanceContingencyMetrics contingencyMetrics = new GenotypeConcordanceContingencyMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
contingencyMetrics = new GenotypeConcordanceContingencyMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
genotypeConcordanceContingencyMetricsFile.write(contingencyMetricsFile);
for (final String condition : unClassifiedStatesMap.keySet()) {
log.info("Uncovered truth/call Variant Context Type Counts: " + condition + " " + unClassifiedStatesMap.get(condition));
}
return 0;
}
/**
* Outputs the detailed statistics tables for SNP and Indel match categories.
**/
private void outputDetailMetricsFile(final VariantContext.Type variantType, final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile,
final GenotypeConcordanceCounts counter, final String truthSampleName, final String callSampleName) {
final GenotypeConcordanceScheme scheme = new GenotypeConcordanceScheme();
for (final TruthState truthState : TruthState.values()) {
for (final CallState callState : CallState.values()) {
final int count = counter.getCount(truthState, callState);
final String contingencyValues = scheme.getContingencyStateString(truthState, callState);
if (count > 0 || OUTPUT_ALL_ROWS) {
final GenotypeConcordanceDetailMetrics detailMetrics = new GenotypeConcordanceDetailMetrics();
detailMetrics.VARIANT_TYPE = variantType;
detailMetrics.TRUTH_SAMPLE = truthSampleName;
detailMetrics.CALL_SAMPLE = callSampleName;
detailMetrics.TRUTH_STATE = truthState;
detailMetrics.CALL_STATE = callState;
detailMetrics.COUNT = count;
detailMetrics.CONTINGENCY_VALUES = contingencyValues;
genotypeConcordanceDetailMetricsFile.addMetric(detailMetrics);
}
}
}
}
/**
* A method to determine the truth and call states for a pair of variant contexts representing truth and call.
* A variety of variant and genotype-level checks are first used to determine if either of the the variant contexts
* are filtered and after that a comparison of the called genotype alleles to determine appropriate truth and call state
*
* Note that this method does NOT check for SNP versus Indel. It is assumed that that check is done by the caller and the results
* of this method are stored by SNP/Indel.
* Note that if a variant context has BOTH GQ and DP less than the specified threshold, then it will be of Truth/Call State LOW_GQ
*
* @param truthContext A variant context representing truth
* @param truthSample The name of the truth sample
* @param callContext A variant context representing the call
* @param callSample The name of the call sample
* @param minGq Threshold for filtering by genotype attribute GQ
* @param minDp Threshold for filtering by genotype attribute DP
* @return TruthAndCallStates object containing the TruthState and CallState determined here.
*/
final TruthAndCallStates determineState(final VariantContext truthContext, final String truthSample, final VariantContext callContext, final String callSample, final int minGq, final int minDp) {
TruthState truthState = null;
CallState callState = null;
// TODO: what about getPloidy()
Genotype truthGenotype = null, callGenotype = null;
// Site level checks
if (truthContext == null) truthState = TruthState.MISSING;
else if (truthContext.isMixed()) truthState = TruthState.IS_MIXED;
else if (truthContext.isFiltered()) truthState = TruthState.VC_FILTERED;
else {
// Genotype level checks
truthGenotype = truthContext.getGenotype(truthSample);
if (truthGenotype.isNoCall()) truthState = TruthState.NO_CALL;
else if (truthGenotype.isFiltered()) truthState = TruthState.GT_FILTERED;
else if ((truthGenotype.getGQ() != -1) && (truthGenotype.getGQ() < minGq)) truthState = TruthState.LOW_GQ;
else if ((truthGenotype.getDP() != -1) && (truthGenotype.getDP() < minDp)) truthState = TruthState.LOW_DP;
// Note. Genotype.isMixed means that it is called on one chromosome and NOT on the other
else if ((truthGenotype.isMixed())) truthState = TruthState.NO_CALL;
}
// Site level checks
if (callContext == null) callState = CallState.MISSING;
else if (callContext.isMixed()) callState = CallState.IS_MIXED;
else if (callContext.isFiltered()) callState = CallState.VC_FILTERED;
else {
// Genotype level checks
callGenotype = callContext.getGenotype(callSample);
if (callGenotype.isNoCall()) callState = CallState.NO_CALL;
else if (callGenotype.isFiltered()) callState = CallState.GT_FILTERED;
else if ((callGenotype.getGQ() != -1) && (callGenotype.getGQ() < minGq)) callState = CallState.LOW_GQ;
else if ((callGenotype.getDP() != -1) && (callGenotype.getDP() < minDp)) callState = CallState.LOW_DP;
// Note. Genotype.isMixed means that it is called on one chromosome and NOT on the other
else if ((callGenotype.isMixed())) callState = CallState.NO_CALL;
}
// initialize the reference
String truthRef = (truthContext != null) ? truthContext.getReference().getBaseString() : null;
String callRef = (callContext != null) ? callContext.getReference().getBaseString() : null;
String truthAllele1 = null;
String truthAllele2 = null;
if (null == truthState) {
// Truth State not yet determined - will need to use truth genotypes below
if (truthGenotype.getAlleles().size() != 2) {
throw new IllegalStateException("Genotype for Variant Context: " + truthContext + " does not have exactly 2 alleles");
}
truthAllele1 = truthGenotype.getAllele(0).getBaseString();
truthAllele2 = truthGenotype.getAllele(1).getBaseString();
}
String callAllele1 = null;
String callAllele2 = null;
if (null == callState) {
if (callGenotype.getAlleles().size() != 2) {
throw new IllegalStateException("Genotype for Variant Context: " + callContext + " does not have exactly 2 alleles");
}
callAllele1 = callGenotype.getAllele(0).getBaseString();
callAllele2 = callGenotype.getAllele(1).getBaseString();
}
if ((truthRef != null && callRef != null) && (!truthRef.equals(callRef))) {
// This is for dealing with indel conditions, where we can have truth being TCAA*/T, call being TCAACAA*/TCAA (*=ref)
// So, we want to verify that both refs start with the shorter substring of the two
// and then we want to pad the shorter's ref and alleles, so that TCAA*/T becomes TCAACAA*/TCAA (i.e. tacking on the CAA)
if (truthRef.length() < callRef.length()) {
// Truth reference is shorter than call reference
final String suffix = getStringSuffix(callRef, truthRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
truthRef = truthRef + suffix;
if (null == truthState) {
truthAllele1 = truthGenotype.getAllele(0).getBaseString() + suffix;
truthAllele2 = truthGenotype.getAllele(1).getBaseString() + suffix;
}
}
else if (truthRef.length() > callRef.length()) {
// call reference is shorter than truth:
final String suffix = getStringSuffix(truthRef, callRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
callRef = callRef + suffix;
if (null == callState) {
callAllele1 = callGenotype.getAllele(0).getBaseString() + suffix;
callAllele2 = callGenotype.getAllele(1).getBaseString() + suffix;
}
}
else {
// Same length - so they must just disagree...
throw new IllegalStateException("Ref alleles mismatch between: " + truthContext + " and " + callContext);
}
}
final OrderedSet<String> allAlleles = new OrderedSet<String>();
if (truthContext != null || callContext != null) {
// Store the refAllele as the first (0th index) allele in allAlleles (only can do if at least one context is non-null)
allAlleles.smartAdd(truthContext == null ? callRef : truthRef); // zeroth allele;
}
if (null == truthState) {
// If truthState is not null, it has not yet been determined, and the truthContext has genotypes (i.e. the alleles are valid)
allAlleles.smartAdd(truthAllele1);
allAlleles.smartAdd(truthAllele2);
}
/**
* if either of the call alleles is in allAlleles, with index > 1, we need to make sure that allele has index 1.
* this is because of the following situations:
*
* REF TRUTH CALL-GT TRUTH-STATE CALL-STATE
* A C/G C/A HET_VAR1_VAR2 HET_REF_VAR1
* A G/C C/A HET_VAR1_VAR2 HET_REF_VAR1
* A G/C G/A HET_VAR1_VAR2 HET_REF_VAR1
* A G/C G/A HET_VAR1_VAR2 HET_REF_VAR1
*
* so, in effect, the order of the alleles in the TRUTH doesn't determine the assignment of allele to Var1 and Var2,
* only once the call is known can this assignment be made.
*/
if (null == callState) {
// If callState is not null, it has not yet been determined, and the callContext has genotypes (i.e. the alleles are valid)
if (allAlleles.indexOf(callAllele1) > 1 || allAlleles.indexOf(callAllele2) > 1) {
allAlleles.remove(2);
allAlleles.remove(1);
allAlleles.smartAdd(truthAllele2);
allAlleles.smartAdd(truthAllele1);
}
allAlleles.smartAdd(callAllele1);
allAlleles.smartAdd(callAllele2);
}
// Truth
if (null == truthState) {
final int allele0idx = allAlleles.indexOf(truthAllele1);
final int allele1idx = allAlleles.indexOf(truthAllele2);
if (allele0idx == allele1idx) { //HOM
truthState = TruthState.getHom(allele0idx);
} else { //HET
truthState = TruthState.getVar(allele0idx, allele1idx);
}
}
// Call
if (null == callState) {
final int allele0idx = allAlleles.indexOf(callAllele1);
final int allele1idx = allAlleles.indexOf(callAllele2);
if (allele0idx == allele1idx) { //HOM
callState = CallState.getHom(allele0idx);
} else { //HET
callState = CallState.getHet(allele0idx, allele1idx);
}
if (null == callState) {
throw new IllegalStateException("This should never happen... Could not classify the call variant: " + callGenotype);
}
}
return new TruthAndCallStates(truthState, callState);
}
final String getStringSuffix(final String longerString, final String shorterString, final String errorMsg) {
// Truth reference is shorter than call reference
if (!longerString.startsWith(shorterString)) {
throw new IllegalStateException(errorMsg);
}
return longerString.substring(shorterString.length());
}
}
/** like a list, but if you ask for an index of an item, it will first add that item.
also, same item cannot be added more than once (like a set)
*/
class OrderedSet<T> extends ArrayList<T> {
public int smartIndexOf(final T o) {
smartAdd(o);
return super.indexOf(o);
}
public boolean smartAdd(final T o) {
if (!this.contains(o)) {
return add(o);
}
return false;
}
}
/** Little class to hold a pair of VariantContexts that are in sync with one another. */
class VcTuple {
public final VariantContext truthVariantContext;
public final VariantContext callVariantContext;
VcTuple(final VariantContext truthVariantContext, final VariantContext callVariantContext) {
this.truthVariantContext = truthVariantContext;
this.callVariantContext = callVariantContext;
}
}
/** Iterator that takes a pair of iterators over VariantContexts and iterates over them in tandem. */
class PairedVariantSubContextIterator implements Iterator<VcTuple> {
private final PeekableIterator<VariantContext> truthIterator;
private final String truthSample;
private final PeekableIterator<VariantContext> callIterator;
private final String callSample;
private final VariantContextComparator comparator;
PairedVariantSubContextIterator(final Iterator<VariantContext> truthIterator, final String truthSample,
final Iterator<VariantContext> callIterator, final String callSample,
final SAMSequenceDictionary dict) {
this.truthIterator = new PeekableIterator<VariantContext>(truthIterator);
this.truthSample = truthSample;
this.callIterator = new PeekableIterator<VariantContext>(callIterator);
this.callSample = callSample;
this.comparator = new VariantContextComparator(dict);
}
@Override
public boolean hasNext() {
return this.truthIterator.hasNext() || this.callIterator.hasNext();
}
@Override
public VcTuple next() {
if (!hasNext()) throw new IllegalStateException("next() called while hasNext() is false.");
final VariantContext truthVariantContext = this.truthIterator.hasNext() ? this.truthIterator.peek() : null;
final VariantContext callVariantContext = this.callIterator.hasNext() ? this.callIterator.peek() : null;
// If one or the other is null because there is no next, just return a one-sided tuple
if (truthVariantContext == null) {
return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
}
else if (callVariantContext == null) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
}
// Otherwise check the ordering and do the right thing
final int ordering = this.comparator.compare(truthVariantContext, callVariantContext);
if (ordering == 0) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), this.callIterator.next().subContextFromSample(callSample));
}
else if (ordering < 0) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
}
else {
return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
}
}
@Override public void remove() {
throw new UnsupportedOperationException();
}
}