GenotypeConcordance.java example

Explorer

picard-master
- src
  - main
    - java
      - picard
        PicardException.java
        Test.java
        analysis
        AbstractWgsMetricsCollector.java
        AdapterUtility.java
        AlignmentSummaryMetrics.java
        AlignmentSummaryMetricsCollector.java
        BaseDistributionByCycleMetrics.java
        ChimeraUtil.java
        CollectAlignmentSummaryMetrics.java
        CollectBaseDistributionByCycle.java
        CollectGcBiasMetrics.java
        CollectInsertSizeMetrics.java
        CollectJumpingLibraryMetrics.java
        CollectMultipleMetrics.java
        CollectOxoGMetrics.java
        CollectQualityYieldMetrics.java
        CollectRawWgsMetrics.java
        CollectRnaSeqMetrics.java
        CollectRrbsMetrics.java
        CollectWgsMetrics.java
        CollectWgsMetricsWithNonZeroCoverage.java
        CompareMetrics.java
        CounterManager.java
        FastWgsMetricsCollector.java
        FingerprintingDetailMetrics.java
        FingerprintingSummaryMetrics.java
        GcBiasDetailMetrics.java
        GcBiasMetricsCollector.java
        GcBiasSummaryMetrics.java
        GcBiasUtils.java
        InsertSizeMetrics.java
        JumpingLibraryMetrics.java
        MeanQualityByCycle.java
        MergeableMetricBase.java
        MetricAccumulationLevel.java
        QualityScoreDistribution.java
        RnaSeqMetrics.java
        RrbsCpgDetailMetrics.java
        RrbsMetrics.java
        RrbsMetricsCollector.java
        RrbsSummaryMetrics.java
        SinglePassSamProgram.java
        TheoreticalSensitivity.java
        WgsMetricsProcessor.java
        WgsMetricsProcessorImpl.java
        artifacts
        ArtifactCounter.java
        CollectSequencingArtifactMetrics.java
        ContextAccumulator.java
        ConvertSequencingArtifactToOxoG.java
        ErrorSummaryMetrics.java
        SequencingArtifactMetrics.java
        Transition.java
        directed
        CollectHsMetrics.java
        CollectTargetedMetrics.java
        CollectTargetedPcrMetrics.java
        HsMetricCollector.java
        HsMetrics.java
        InsertSizeMetricsCollector.java
        RnaSeqMetricsCollector.java
        TargetMetricsCollector.java
        TargetedPcrMetrics.java
        TargetedPcrMetricsCollector.java
        replicates
        CollectIndependentReplicateMetrics.java
        IndependentReplicateMetric.java
        annotation
        AnnotationException.java
        Gene.java
        GeneAnnotationReader.java
        LocusFunction.java
        RefFlatReader.java
        cmdline
        ClassFinder.java
        CommandLineDefaults.java
        CommandLineParseException.java
        CommandLineParser.java
        CommandLineParserDefinitionException.java
        CommandLineProgram.java
        CommandLineProgramGroup.java
        CommandLineProgramProperties.java
        CreateHtmlDocForProgram.java
        CreateHtmlDocForStandardOptions.java
        NestedOptions.java
        Option.java
        PicardCommandLine.java
        PositionalArguments.java
        StandardOptionDefinitions.java
        programgroups
        Alpha.java
        Fasta.java
        Fingerprinting.java
        Illumina.java
        Intervals.java
        Metrics.java
        None.java
        SamOrBam.java
        Testing.java
        VcfOrBcf.java
        fastq
        BamToBfq.java
        BamToBfqWriter.java
        Casava18ReadNameEncoder.java
        IlluminaReadNameEncoder.java
        ReadNameEncoder.java
        filter
        CountingDuplicateFilter.java
        CountingFilter.java
        CountingMapQFilter.java
        CountingPairedFilter.java
        fingerprint
        CheckFingerprint.java
        CrosscheckReadGroupFingerprints.java
        DiploidGenotype.java
        DiploidHaplotype.java
        Fingerprint.java
        FingerprintChecker.java
        FingerprintResults.java
        HaplotypeBlock.java
        HaplotypeMap.java
        HaplotypeProbabilities.java
        HaplotypeProbabilitiesFromContaminatorSequence.java
        HaplotypeProbabilitiesFromGenotype.java
        HaplotypeProbabilitiesFromGenotypeLikelihoods.java
        HaplotypeProbabilitiesFromSequence.java
        HaplotypeProbabilitiesUsingLogLikelihoods.java
        HaplotypeProbabilityOfNormalGivenTumor.java
        LocusResult.java
        MatchResults.java
        Snp.java
        illumina
        CheckIlluminaDirectory.java
        ClusterDataToSamConverter.java
        CollectIlluminaBasecallingMetrics.java
        CollectIlluminaLaneMetrics.java
        CustomAdapterPair.java
        ExtractIlluminaBarcodes.java
        IlluminaBasecallingMetrics.java
        IlluminaBasecallsConverter.java
        IlluminaBasecallsToFastq.java
        IlluminaBasecallsToSam.java
        IlluminaLaneMetrics.java
        IlluminaPhasingMetrics.java
        LanePhasingMetricsCollector.java
        MarkIlluminaAdapters.java
        parser
        BarcodeParser.java
        BclData.java
        BclParser.java
        ClusterData.java
        ClusterIntensityFileReader.java
        CycleIlluminaFileMap.java
        FilterParser.java
        FourChannelIntensityData.java
        IlluminaData.java
        IlluminaDataProvider.java
        IlluminaDataProviderFactory.java
        IlluminaDataType.java
        IlluminaFileMap.java
        IlluminaFileNotFoundException.java
        IlluminaFileUtil.java
        IlluminaMetricsCode.java
        IlluminaParser.java
        IlluminaTextIterator.java
        IntensityChannel.java
        MultiTileBclFileUtil.java
        MultiTileBclParser.java
        MultiTileFileUtil.java
        MultiTileFilterParser.java
        MultiTileLocsParser.java
        MultiTileParser.java
        OutputMapping.java
        ParameterizedFileUtil.java
        PerTileCycleParser.java
        PerTileFileUtil.java
        PerTileParser.java
        PerTilePerCycleFileUtil.java
        PosParser.java
        Range.java
        ReadData.java
        ReadDescriptor.java
        ReadStructure.java
        ReadType.java
        Tile.java
        TileIndex.java
        TileMetricsUtil.java
        TilePhasingValue.java
        TileTemplateRead.java
        fakers
        BarcodeFileFaker.java
        BciFileFaker.java
        BclFileFaker.java
        ClocsFileFaker.java
        FileFaker.java
        FilterFileFaker.java
        LocsFileFaker.java
        MultiTileBclFileFaker.java
        MultiTileLocsFileFaker.java
        PosFileFaker.java
        readers
        AbstractIlluminaPositionFileReader.java
        BarcodeFileReader.java
        BaseBclReader.java
        BclIndexReader.java
        BclQualityEvaluationStrategy.java
        BclReader.java
        CbclReader.java
        ClocsFileReader.java
        FilterFileReader.java
        LocsFileReader.java
        MMapBackedIteratorFactory.java
        PosFileReader.java
        TileMetricsOutReader.java
        quality
        CollectHiSeqXPfFailMetrics.java
        metrics
        GcBiasMetrics.java
        MultiLevelCollector.java
        MultilevelMetrics.java
        PerUnitMetricCollector.java
        SAMRecordAndReference.java
        SAMRecordAndReferenceMultiLevelCollector.java
        SAMRecordMultiLevelCollector.java
        pedigree
        PedFile.java
        Sex.java
        reference
        ExtractSequences.java
        NonNFastaSize.java
        NormalizeFasta.java
        sam
        AbstractAlignmentMerger.java
        AddCommentsToBam.java
        AddOrReplaceReadGroups.java
        BamIndexStats.java
        BestEndMapqPrimaryAlignmentStrategy.java
        BestMapqPrimaryAlignmentSelectionStrategy.java
        BuildBamIndex.java
        CalculateReadGroupChecksum.java
        CheckTerminatorBlock.java
        CleanSam.java
        CompareSAMs.java
        CreateSequenceDictionary.java
        DownsampleSam.java
        DuplicationMetrics.java
        EarliestFragmentPrimaryAlignmentSelectionStrategy.java
        FastqToSam.java
        FilterSamReads.java
        FixMateInformation.java
        GatherBamFiles.java
        HitsForInsert.java
        MergeBamAlignment.java
        MergeSamFiles.java
        MostDistantPrimaryAlignmentSelectionStrategy.java
        MultiHitAlignedReadIterator.java
        PositionBasedDownsampleSam.java
        PrimaryAlignmentSelectionStrategy.java
        ReorderSam.java
        ReplaceSamHeader.java
        RevertOriginalBaseQualitiesAndAddMateCigar.java
        RevertSam.java
        SamAlignmentMerger.java
        SamFormatConverter.java
        SamToFastq.java
        SetNmAndUqTags.java
        SetNmMdAndUqTags.java
        SortSam.java
        SplitSamByLibrary.java
        ValidateSamFile.java
        ViewSam.java
        markduplicates
        ElcDuplicatesFinder.java
        ElcDuplicatesFinderResolver.java
        ElcHashBasedDuplicatesFinder.java
        ElcIdenticalBasesDuplicatesFinder.java
        EstimateLibraryComplexity.java
        MarkDuplicates.java
        MarkDuplicatesWithMateCigar.java
        MarkDuplicatesWithMateCigarIterator.java
        SimpleMarkDuplicatesWithMateCigar.java
        UmiAwareDuplicateSetIterator.java
        UmiAwareMarkDuplicatesWithMateCigar.java
        UmiGraph.java
        UmiMetrics.java
        util
        AbstractMarkDuplicatesCommandLineProgram.java
        AbstractOpticalDuplicateFinderCommandLineProgram.java
        DiskBasedReadEndsForMarkDuplicatesMap.java
        LibraryIdGenerator.java
        MarkQueue.java
        MemoryBasedReadEndsForMarkDuplicatesMap.java
        OpticalDuplicateFinder.java
        PhysicalLocationForMateCigar.java
        PhysicalLocationForMateCigarSet.java
        ReadEnds.java
        ReadEndsForMarkDuplicates.java
        ReadEndsForMarkDuplicatesCodec.java
        ReadEndsForMarkDuplicatesMap.java
        ReadEndsForMarkDuplicatesWithBarcodes.java
        ReadEndsForMarkDuplicatesWithBarcodesCodec.java
        ReadEndsForMateCigar.java
        RepresentativeReadIndexerCodec.java
        SamRecordWithOrdinalAndSetDuplicateReadFlag.java
        util
        PhysicalLocation.java
        PhysicalLocationInt.java
        PhysicalLocationShort.java
        ReadNameParser.java
        RepresentativeReadIndexer.java
        util
        AbstractInputParser.java
        AdapterMarker.java
        AdapterPair.java
        AsyncIterator.java
        AtomicIterator.java
        BaitDesigner.java
        BasicInputParser.java
        BedToIntervalList.java
        CircularByteBuffer.java
        ClippingUtility.java
        CsvInputParser.java
        DbSnpBitSetUtil.java
        DelimitedTextFileWithHeaderIterator.java
        FifoBuffer.java
        FileChannelJDKBugWorkAround.java
        IlluminaUtil.java
        IntervalListScatterer.java
        IntervalListToBed.java
        IntervalListTools.java
        Iterators.java
        LiftOverIntervalList.java
        MathUtil.java
        MetricsDoclet.java
        QuerySortedReadPairIteratorUtil.java
        RExecutor.java
        ScatterIntervalsByNs.java
        TabbedInputParser.java
        TabbedTextFileWithHeaderParser.java
        UnsignedTypeUtil.java
        VariantType.java
        vcf
        AccumulateVariantCallingMetrics.java
        ByIntervalListVariantContextIterator.java
        CallingMetricAccumulator.java
        CollectVariantCallingMetrics.java
        FixVcfHeader.java
        GA4GHScheme.java
        GA4GHSchemeWithMissingAsHomRef.java
        GatherVcfs.java
        GenotypeConcordance.java
        GenotypeConcordanceContingencyMetrics.java
        GenotypeConcordanceCounts.java
        GenotypeConcordanceDetailMetrics.java
        GenotypeConcordanceScheme.java
        GenotypeConcordanceSchemeFactory.java
        GenotypeConcordanceStateCodes.java
        GenotypeConcordanceStates.java
        GenotypeConcordanceSummaryMetrics.java
        GvcfMetricAccumulator.java
        LiftoverVcf.java
        MakeSitesOnlyVcf.java
        MendelianViolations
        FindMendelianViolations.java
        MendelianViolationDetector.java
        MendelianViolationMetrics.java
        MendelianViolationsByFamily.java
        MergeVcfs.java
        PairedVariantSubContextIterator.java
        RenameSampleInVcf.java
        SortVcf.java
        SplitVcfs.java
        UpdateVcfSequenceDictionary.java
        VcfFormatConverter.java
        VcfToIntervalList.java
        filter
        AlleleBalanceFilter.java
        DepthFilter.java
        FilterApplyingVariantIterator.java
        FilterVcf.java
        FisherStrandFilter.java
        GenotypeFilter.java
        GenotypeQualityFilter.java
        QdFilter.java
        VariantFilter.java
        processor
        VariantAccumulatorExecutor.java
        VariantIteratorProducer.java
        VariantProcessor.java
        VcfFileSegment.java
        VcfFileSegmentGenerator.java
        util
        PredicateFilterDecoratingClosableIterator.java
  - test
    - java
      - picard
        analysis
        AbstractWgsMetricsCollectorTest.java
        CollectAlignmentSummaryMetricsTest.java
        CollectGcBiasMetricsTest.java
        CollectInsertSizeMetricsTest.java
        CollectJumpingLibraryMetricsTest.java
        CollectMultipleMetricsTest.java
        CollectOxoGMetricsTest.java
        CollectQualityYieldMetricsTest.java
        CollectRnaSeqMetricsTest.java
        CollectWgsMetricsTest.java
        CollectWgsMetricsTestUtils.java
        CollectWgsMetricsWithNonZeroCoverageTest.java
        CounterManagerTest.java
        FastWgsMetricsCollectorTest.java
        MergeableMetricBaseTest.java
        MultiLevelCollectorTest.java
        TheoreticalSensitivityTest.java
        WgsMetricsProcessorImplTest.java
        WgsMetricsTest.java
        artifacts
        CollectSequencingArtifactMetricsTest.java
        directed
        CollectHsMetricsTest.java
        CollectTargetedMetricsTest.java
        replicates
        CollectIndependentReplicatesMetricTest.java
        cmdline
        CommandLineParserTest.java
        CommandLineProgramTest.java
        PicardCommandLineTest.java
        fastq
        BamToBfqTest.java
        fingerprint
        FingerprintCheckerTest.java
        HaplotypeMapTest.java
        HaplotypeProbabilitiesTest.java
        HaplotypeProbabilityOfNormalGivenTumorTest.java
        illumina
        CheckIlluminaDirectoryTest.java
        CollectIlluminaBasecallingMetricsTest.java
        ExtractIlluminaBarcodesTest.java
        IlluminaBasecallsToFastqTest.java
        IlluminaBasecallsToSamAdapterClippingTest.java
        IlluminaBasecallsToSamTest.java
        IlluminaLaneMetricsCollectorTest.java
        ReadStructureTest.java
        parser
        BclParserTest.java
        BinTdUtil.java
        CycleIlluminaFileMapTest.java
        FilterParserTest.java
        IlluminaDataProviderFactoryTest.java
        IlluminaDataProviderTest.java
        IlluminaFileUtilTest.java
        PerTileParserTest.java
        PerTilePerCycleParserTest.java
        PosParserTest.java
        fakers
        BclFileFakerTest.java
        readers
        AbstractIlluminaPositionFileReaderTest.java
        BclReaderTest.java
        CbclReaderTest.java
        ClocsFileReaderTest.java
        FilterFileReaderTest.java
        IlluminaFileUtilTest.java
        LocsFileReaderTest.java
        MMapBackedIteratorFactoryTest.java
        PosFileReaderTest.java
        metrics
        CollectRrbsMetricsTest.java
        pedigree
        PedFileTest.java
        reference
        NonNFastaSizeTest.java
        sam
        AbstractAlignmentMergerTest.java
        AddCommentsToBamTest.java
        CleanSamTest.java
        CompareSAMsTest.java
        CreateSequenceDictionaryTest.java
        DuplicationMetricsTest.java
        FastqToSamTest.java
        FilterSamReadsTest.java
        FixMateInformationTest.java
        GatherBamFilesTest.java
        MergeBamAlignmentTest.java
        MergeSamFilesTest.java
        PositionBasedDownsampleSamTest.java
        RevertSamTest.java
        SamFileConverterTest.java
        SamToFastqTest.java
        SetNmMdAndUqTagsTest.java
        SplitSamByLibraryTest.java
        ViewSamTest.java
        markduplicates
        AbstractMarkDuplicatesCommandLineProgramTest.java
        AbstractMarkDuplicatesCommandLineProgramTester.java
        AsIsMarkDuplicatesTester.java
        BySumOfBaseQAndInOriginalOrderMDTester.java
        ElcHashBasedDuplicatesFinderTest.java
        ElcIdenticalBasesDuplicatesFinderTest.java
        EstimateLibraryComplexityTest.java
        MarkDuplicateWithMissingBarcodeTest.java
        MarkDuplicateWithMissingReadOneBarcodeTest.java
        MarkDuplicateWithMissingReadTwoBarcodeTest.java
        MarkDuplicateWithMissingSampleBarcodeTest.java
        MarkDuplicatesTagRepresentativeReadIndexTest.java
        MarkDuplicatesTagRepresentativeReadIndexTester.java
        MarkDuplicatesTest.java
        MarkDuplicatesTester.java
        MarkDuplicatesWithMateCigarTest.java
        MarkDuplicatesWithMateCigarTester.java
        QuerySortedMarkDuplicatesTester.java
        SimpleMarkDuplicatesWithMateCigarTest.java
        SimpleMarkDuplicatesWithMateCigarTester.java
        UmiAwareMarkDuplicatesWithMateCigarTest.java
        UmiAwareMarkDuplicatesWithMateCigarTester.java
        util
        OpticalDuplicateFinderTest.java
        testers
        CleanSamTester.java
        SamFileTester.java
        ValidateSamTester.java
        util
        ReadNameParserTests.java
        util
        BedToIntervalListTest.java
        ClippingUtilityTest.java
        DelimitedTextFileWithHeaderIteratorTest.java
        FifoBufferTest.java
        FileChannelJDKBugWorkAroundTest.java
        IlluminaUtilTest.java
        IntervalListScattererTest.java
        IntervalListToBedTest.java
        MathUtilTest.java
        MergingIteratorTest.java
        QuerySortedReadPairIteratorUtilTest.java
        RExecutorTest.java
        ScatterIntervalsByNsTest.java
        TabbedTextFileWithHeaderParserTest.java
        TestNGUtil.java
        TextFileParsersTest.java
        UnsignedTypeUtilTest.java
        vcf
        AbstractVcfMergingClpTester.java
        AccumulateVariantCallingMetricsTest.java
        ByIntervalListVariantContextIteratorTest.java
        CallingMetricAccumulatorTest.java
        CollectVariantCallingMetricsTest.java
        FixVcfHeaderTest.java
        GenotypeConcordanceGA4GHSchemeTest.java
        GenotypeConcordanceGA4GHSchemeWithMissingTest.java
        GenotypeConcordanceTest.java
        LiftoverVcfTest.java
        MendelianViolations
        FindMendelianViolationsTest.java
        MergeVcfsTest.java
        SortVcfsTest.java
        SplitVcfsTest.java
        UpdateVcfSequenceDictionaryTest.java
        VariantContextComparatorTest.java
        VcfFormatConverterTest.java
        VcfTestUtils.java
        filter
        TestFilterVcf.java
        processor
        AccumulatorExecutorTest.java
        ByWholeContigTest.java
        ThreadsafeTest.java
        VcfFileSegmentGeneratorTest.java
        WidthLimitingDecoratorTest.java

/*
 * The MIT License
 *
 * Copyright (c) 2015 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package picard.vcf;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.*;
import htsjdk.tribble.Tribble;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.*;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.VcfOrBcf;
import picard.vcf.GenotypeConcordanceStates.CallState;
import picard.vcf.GenotypeConcordanceStates.ContingencyState;
import picard.vcf.GenotypeConcordanceStates.TruthAndCallStates;
import picard.vcf.GenotypeConcordanceStates.TruthState;
import picard.vcf.PairedVariantSubContextIterator.VcfTuple;

import java.io.File;
import java.util.*;

import static htsjdk.variant.variantcontext.VariantContext.Type.*;
import static htsjdk.variant.vcf.VCFConstants.MISSING_VALUE_v4;
import static picard.vcf.GenotypeConcordanceStateCodes.*;

/**
 * Calculates the concordance between genotype data for two samples in two different VCFs - one being considered the truth (or reference)
 * the other being the call.  The concordance is broken into separate results sections for SNPs and indels.  Summary and detailed statistics
 * are reported
 *
 * @author Tim Fennell
 * @author George Grant
 */
@CommandLineProgramProperties(
        usage = GenotypeConcordance.USAGE_SUMMARY + GenotypeConcordance.USAGE_DETAILS,
        usageShort =  GenotypeConcordance.USAGE_SUMMARY,
        programGroup = VcfOrBcf.class
)
public class GenotypeConcordance extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Evaluate genotype concordance between callsets.";
    static final String USAGE_DETAILS = "This tool evaluates the concordance between genotype calls for samples in different " +
            "callsets where one is being considered as the truth (aka standard, or reference) and the other as the call that is being " +
            "evaluated for accuracy. <br />" +
            "<h4>Usage example:</h4>" +
            "<pre>" +
            "java -jar picard.jar GenotypeConcordance \\<br />" +
            "      CALL_VCF=input.vcf \\<br />" +
            "      CALL_SAMPLE=sample_name \\<br />" +
            "      O=gc_concordance.vcf \\<br />" +
            "      TRUTH_VCF=truth_set.vcf \\<br />" +
            "      TRUTH_SAMPLE=truth_sample#" +
            "</pre>" +
            "" +
            "<h4>Output Metrics:</h4>" +
            "<ul>" +
            "<li>Output metrics include GenotypeConcordanceContingencyMetrics, GenotypeConcordanceSummaryMetrics, and " +
            "GenotypeConcordanceDetailMetrics.  For each set of metrics, the data is broken into separate sections for " +
            "SNPs and INDELs.  Note that only SNP and INDEL variants are considered, MNP, Symbolic, and Mixed classes" +
            " of variants are not included. </li>" +
            "<li>GenotypeConcordanceContingencyMetrics enumerate the constituents of each contingent in a callset " +
            "including true-positive (TP), true-negative (TN), false-positive (FP), and false-negative (FN) calls. See " +
            "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceContingencyMetrics" +
            " for more details.</li>" +
            "<li>GenotypeConcordanceDetailMetrics include the numbers of SNPs and INDELs for each contingent genotype as well " +
            "as the number of validated genotypes. See " +
            "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceDetailMetrics for more details.</li>" +
            "<li>GenotypeConcordanceSummaryMetrics provide specific details for the variant caller performance on a callset including: " +
            "values for sensitivity, specificity, and positive predictive values. See " +
            "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceSummaryMetrics for more details.</li>" +
            "</ul>" +
            "<br /><br />" +
            "Useful definitions applicable to alleles and genotypes:<br /> " +
            "<ul>"+
            "<li>Truthset - A callset (typically in VCF format) containing variant calls and genotypes that have been cross-validated " +
            "with multiple technologies e.g. Genome In A Bottle Consortium (GIAB) (https://sites.stanford.edu/abms/giab)</li>" +
            "<li>TP - True positives are variant calls that match a 'truthset'</li>" +
            "<li>FP - False-positives are reference sites miscalled as variant</li>" +
            "<li>FN - False-negatives are variant sites miscalled as reference</li>" +
            "<li>TN - True negatives are correctly called reference sites</li>" +
            "<li>Validated genotypes - are TP sites where the exact genotype (HET or HOM-VAR) has been validated </li> "     +
            "</ul>"+
            "" +
            "<h4>VCF Output:</h4>" +
            "<ul>" +
            "<li>The concordance state will be stored in the \"CONC_ST\" tag in the INFO field.</li>" +
            "<li>The truth sample name will be \"truth\" and call sample name will be \"call\".</li>" +
            "</ul>" +
            "<hr />"
            ;
    @Option(shortName = "TV", doc="The VCF containing the truth sample")
    public File TRUTH_VCF;

    @Option(shortName = "CV", doc="The VCF containing the call sample")
    public File CALL_VCF;

    @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Basename for the two metrics files that are to be written." +
            " Resulting files will be <OUTPUT>" + SUMMARY_METRICS_FILE_EXTENSION + " and <OUTPUT>" + DETAILED_METRICS_FILE_EXTENSION + ".")
    public File OUTPUT;

    @Option(doc = "Output a VCF annotated with concordance information.")
    public boolean OUTPUT_VCF = false;

    @Option(shortName = "TS", doc="The name of the truth sample within the truth VCF. Not required if only one sample exists.", optional = true)
    public String TRUTH_SAMPLE = null;

    @Option(shortName = "CS", doc="The name of the call sample within the call VCF. Not required if only one sample exists.", optional = true)
    public String CALL_SAMPLE = null;

    @Option(doc="One or more interval list files that will be used to limit the genotype concordance.  Note - if intervals are specified, the VCF files must be indexed.")
    public List<File> INTERVALS;

    @Option(doc="If true, multiple interval lists will be intersected. If false multiple lists will be unioned.")
    public boolean INTERSECT_INTERVALS = true;

    @Option(doc="Genotypes below this genotype quality will have genotypes classified as LowGq.")
    public int MIN_GQ = 0;

    @Option(doc="Genotypes below this depth will have genotypes classified as LowDp.")
    public int MIN_DP = 0;

    @Option(doc="If true, output all rows in detailed statistics even when count == 0.  When false only output rows with non-zero counts.")
    public boolean OUTPUT_ALL_ROWS = false;

    @Option(doc="If true, use the VCF index, else iterate over the entire VCF.", optional = true)
    public boolean USE_VCF_INDEX = false;

    @Option(shortName = "MISSING_HOM", doc="Default is false, which follows the GA4GH Scheme. If true, missing sites in the truth set will be " +
            "treated as HOM_REF sites and sites missing in both the truth and call sets will be true negatives. Useful when hom ref sites are left out of the truth set. " +
            "This flag can only be used with a high confidence interval list.")
    public boolean MISSING_SITES_HOM_REF = false;

    private final Log log = Log.getInstance(GenotypeConcordance.class);
    private final ProgressLogger progress = new ProgressLogger(log, 10000, "checked", "variants");

    public static final String SUMMARY_METRICS_FILE_EXTENSION     = ".genotype_concordance_summary_metrics";
    public static final String DETAILED_METRICS_FILE_EXTENSION    = ".genotype_concordance_detail_metrics";
    public static final String CONTINGENCY_METRICS_FILE_EXTENSION = ".genotype_concordance_contingency_metrics";
    public static final String OUTPUT_VCF_FILE_EXTENSION          = ".genotype_concordance.vcf.gz";

    protected GenotypeConcordanceCounts snpCounter;
    public GenotypeConcordanceCounts getSnpCounter() { return snpCounter; }

    protected GenotypeConcordanceCounts indelCounter;
    public GenotypeConcordanceCounts getIndelCounter() { return indelCounter; }

    public static final String CONTINGENCY_STATE_TAG = "CONC_ST";
    public static final VCFHeaderLine CONTINGENCY_STATE_HEADER_LINE = new VCFInfoHeaderLine(CONTINGENCY_STATE_TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "The genotype concordance contingency state(s)");
    public static final String OUTPUT_VCF_TRUTH_SAMPLE_NAME = "truth";
    public static final String OUTPUT_VCF_CALL_SAMPLE_NAME = "call";

    public static void main(final String[] args) {
        new GenotypeConcordance().instanceMainWithExit(args);
    }

    @Override
    protected String[] customCommandLineValidation() {
        // Note - If the user specifies to use INTERVALS, the code will fail if the vcfs are not indexed, so we set USE_VCF_INDEX to true and check that the vcfs are indexed.
        IOUtil.assertFileIsReadable(TRUTH_VCF);
        IOUtil.assertFileIsReadable(CALL_VCF);
        final boolean usingIntervals = this.INTERVALS != null && !this.INTERVALS.isEmpty();
        final List<String> errors = new ArrayList<String>();
        if (usingIntervals) {
            USE_VCF_INDEX = true;
        }
        if (USE_VCF_INDEX) {
            // Index file is required either because we are using intervals, or because user-set parameter
            if (!indexExists(TRUTH_VCF)) {
                errors.add("The index file was not found for the TRUTH VCF.  Note that if intervals are specified, the VCF files must be indexed.");
            }
            if (!indexExists(CALL_VCF)) {
                errors.add("The index file was not found for the CALL VCF.  Note that if intervals are specified, the VCF files must be indexed.");
            }
        }
        if (MISSING_SITES_HOM_REF) {
            //If you are using this flag you must include a high confidence interval list where missing sites are hom_ref.
            if (!usingIntervals) {
                errors.add("You cannot use the MISSING_HOM option without also supplying an interval list over which missing " +
                        "sites are considered confident homozygous reference calls.");
            }
        }

        if (errors.isEmpty()) {
            return null;
        } else {
            return errors.toArray(new String[errors.size()]);
        }
    }

    /**
     * Determines whether an index file exists for the given vcf file using standard extension suffixes
     *
     * @param vcf  the vcf file to investigate
     * @return true if an index file exists, false otherwise
     */
    private boolean indexExists(final File vcf) {
        return Tribble.indexFile(vcf).exists() || Tribble.tabixIndexFile(vcf).exists();
    }

    @Override protected int doWork() {
        final File summaryMetricsFile     = new File(OUTPUT + SUMMARY_METRICS_FILE_EXTENSION);
        final File detailedMetricsFile    = new File(OUTPUT + DETAILED_METRICS_FILE_EXTENSION);
        final File contingencyMetricsFile = new File(OUTPUT + CONTINGENCY_METRICS_FILE_EXTENSION);
        IOUtil.assertFileIsWritable(summaryMetricsFile);
        IOUtil.assertFileIsWritable(detailedMetricsFile);
        IOUtil.assertFileIsWritable(contingencyMetricsFile);

        final GenotypeConcordanceSchemeFactory schemeFactory = new GenotypeConcordanceSchemeFactory();
        final GenotypeConcordanceScheme scheme = schemeFactory.getScheme(MISSING_SITES_HOM_REF);

        final boolean usingIntervals = this.INTERVALS != null && !this.INTERVALS.isEmpty();
        IntervalList intervals = null;
        SAMSequenceDictionary intervalsSamSequenceDictionary = null;
        if (usingIntervals) {
            log.info("Starting to load intervals list(s).");
            long genomeBaseCount = 0;
            for (final File f : INTERVALS) {
                IOUtil.assertFileIsReadable(f);
                final IntervalList tmpIntervalList = IntervalList.fromFile(f);
                if (genomeBaseCount == 0) {         // Don't count the reference length more than once.
                    intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
                    genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
                }

                if (intervals == null)        intervals = tmpIntervalList;
                else if (INTERSECT_INTERVALS) intervals = IntervalList.intersection(intervals, tmpIntervalList);
                else intervals =              IntervalList.union(intervals, tmpIntervalList);
            }
            if (intervals != null) {
                intervals = intervals.uniqued();
            }
            log.info("Finished loading up intervals list(s).");
        }
        final VCFFileReader truthReader = new VCFFileReader(TRUTH_VCF, USE_VCF_INDEX);
        final VCFFileReader callReader = new VCFFileReader(CALL_VCF, USE_VCF_INDEX);

        if (TRUTH_SAMPLE == null) {
            if (truthReader.getFileHeader().getNGenotypeSamples() > 1) {
                throw new PicardException("TRUTH_SAMPLE is required when the TRUTH_VCF has more than one sample");
            }
            TRUTH_SAMPLE = truthReader.getFileHeader().getGenotypeSamples().get(0);
        }

        if (CALL_SAMPLE == null) {
            if (callReader.getFileHeader().getNGenotypeSamples() > 1) {
                throw new PicardException("CALL_SAMPLE is required when the CALL_VCF has more than one sample");
            }
            CALL_SAMPLE = callReader.getFileHeader().getGenotypeSamples().get(0);
        }

        // Check that the samples actually exist in the files!
        if (!truthReader.getFileHeader().getGenotypeSamples().contains(TRUTH_SAMPLE)) {
            throw new PicardException("File " + TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + TRUTH_SAMPLE);
        }
        if (!callReader.getFileHeader().getGenotypeSamples().contains(CALL_SAMPLE)) {
            throw new PicardException("File " + CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + CALL_SAMPLE);
        }

        // Verify that both VCFs have the same Sequence Dictionary
        SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());

        final Optional<VariantContextWriter> writer = getVariantContextWriter(truthReader, callReader);

        if (usingIntervals) {
            // If using intervals, verify that the sequence dictionaries agree with those of the VCFs
            SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
        }

        // Build the pair of iterators over the regions of interest
        final Iterator<VariantContext> truthIterator, callIterator;
        if (usingIntervals) {
            truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
            callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
        }
        else {
            truthIterator = truthReader.iterator();
            callIterator = callReader.iterator();
        }

        // Now do the iteration and count things up
        final PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator(truthIterator, TRUTH_SAMPLE, callIterator, CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
        snpCounter   = new GenotypeConcordanceCounts();
        indelCounter = new GenotypeConcordanceCounts();

        // A map to keep track of the count of Truth/Call States which we could not successfully classify
        final Map<String, Integer> unClassifiedStatesMap = new HashMap<String, Integer>();

        log.info("Starting iteration over variants.");
        while (pairedIterator.hasNext()) {
            final VcfTuple tuple = pairedIterator.next();
            final VariantContext.Type truthVariantContextType = tuple.leftVariantContext.map(VariantContext::getType).orElse(NO_VARIATION);
            final VariantContext.Type callVariantContextType  = tuple.rightVariantContext.map(VariantContext::getType).orElse(NO_VARIATION);

            final boolean stateClassified = classifyVariants(tuple.leftVariantContext, TRUTH_SAMPLE,
                    tuple.rightVariantContext, CALL_SAMPLE,
                    Optional.of(snpCounter), Optional.of(indelCounter),
                    MIN_GQ, MIN_DP);

            if (!stateClassified) {
                final String condition = truthVariantContextType + " " + callVariantContextType;
                final Integer count = unClassifiedStatesMap.getOrDefault(condition, 0) + 1;
                unClassifiedStatesMap.put(condition, count);
            }

            // write to the output VCF
            writer.ifPresent(w -> writeVcfTuple(tuple, w, scheme));

            //final VariantContext variantContextForLogging = tuple.leftVariantContext.orElseGet(tuple.rightVariantContext::get); // FIXME
            final VariantContext variantContextForLogging = tuple.leftVariantContext.isPresent() ? tuple.leftVariantContext.get() : tuple.rightVariantContext.get();
            progress.record(variantContextForLogging.getContig(), variantContextForLogging.getStart());
        }

        //snp counter add in X number of missing-missing hom ref's (truth and call state)
        //missing missing is total interval size minus number of iterations in while loop
        if (MISSING_SITES_HOM_REF) {
            // need to know size of region called over (intervals or whole genome) to add missing-missing sites for NIST schema.
            final long baseCount = (intervals != null) ? intervals.getBaseCount() : truthReader.getFileHeader().getSequenceDictionary().getReferenceLength();
            addMissingTruthAndMissingCallStates(snpCounter.getCounterSize(), baseCount, snpCounter);
            addMissingTruthAndMissingCallStates(indelCounter.getCounterSize(), baseCount, indelCounter);
        }

        // Calculate and store the summary-level metrics
        final MetricsFile<GenotypeConcordanceSummaryMetrics,?> genotypeConcordanceSummaryMetricsFile = getMetricsFile();
        GenotypeConcordanceSummaryMetrics summaryMetrics = new GenotypeConcordanceSummaryMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF);
        genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
        summaryMetrics = new GenotypeConcordanceSummaryMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF);
        genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
        genotypeConcordanceSummaryMetricsFile.write(summaryMetricsFile);

        // Calculate and store the detailed metrics for both SNP and indels
        final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetrics = getMetricsFile();
        outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF, OUTPUT_ALL_ROWS);
        outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF, OUTPUT_ALL_ROWS);
        genotypeConcordanceDetailMetrics.write(detailedMetricsFile);

        // Calculate and score the contingency metrics
        final MetricsFile<GenotypeConcordanceContingencyMetrics,?> genotypeConcordanceContingencyMetricsFile = getMetricsFile();
        GenotypeConcordanceContingencyMetrics contingencyMetrics = new GenotypeConcordanceContingencyMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF);
        genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
        contingencyMetrics = new GenotypeConcordanceContingencyMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF);
        genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
        genotypeConcordanceContingencyMetricsFile.write(contingencyMetricsFile);

        for (final String condition : unClassifiedStatesMap.keySet()) {
            log.info("Uncovered truth/call Variant Context Type Counts: " + condition + " " + unClassifiedStatesMap.get(condition));
        }

        CloserUtil.close(callReader);
        CloserUtil.close(truthReader);
        writer.ifPresent(VariantContextWriter::close);

        return 0;
    }

    /** Gets the variant context writer if the output VCF is to be written, otherwise empty. */
    private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) {
        if (OUTPUT_VCF) {
            final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION);
            final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                    .setOutputFile(outputVcfFile)
                    .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary())
                    .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                    .setOption(Options.INDEX_ON_THE_FLY);
            final VariantContextWriter writer = builder.build();

            // create the output header
            final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME);
            final Set<VCFHeaderLine> headerLines = new HashSet<>();
            headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder());
            headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder());
            headerLines.add(CONTINGENCY_STATE_HEADER_LINE);
            writer.writeHeader(new VCFHeader(headerLines, sampleNames));
            return Optional.of(writer);
        }
        else {
            return Optional.empty();
        }
    }

    private void writeVcfTuple(final VcfTuple tuple, final VariantContextWriter writer, final GenotypeConcordanceScheme scheme) {
        VariantContext truthContext = null, callContext = null;
        final List<Genotype> genotypes = new ArrayList<>(2);

        // get the variant contexts and initialize the output variant context builder
        if (tuple.leftVariantContext.isPresent()) {
            truthContext = tuple.leftVariantContext.get();
        }
        if (tuple.rightVariantContext.isPresent()) {
            callContext = tuple.rightVariantContext.get();
        }

        // Get the alleles for each genotype.  No alleles will be extracted for a genotype if the genotype is
        // mixed, filtered, or missing.
        final Alleles alleles = normalizeAlleles(truthContext, TRUTH_SAMPLE, callContext, CALL_SAMPLE);

        // There will be no alleles if both genotypes are one of mixed, filtered, or missing.  Do not output any
        // variant context in this case.
        if (!alleles.allAlleles.isEmpty()) {
            if (truthContext == null && callContext == null) {
                throw new IllegalStateException("Both truth and call contexts are null!");
            }

            final VariantContextBuilder builder;
            final List<Allele> allAlleles   = alleles.asList();
            final List<Allele> truthAlleles = alleles.truthAlleles();
            final List<Allele> callAlleles  = alleles.callAlleles();

            // Get the alleles present at this site for both samples to use for the output variant context.
            final Set<Allele> siteAlleles = new HashSet<>();
            if (truthContext != null) {
                siteAlleles.addAll(truthContext.getAlleles());
            }
            if (callContext != null) {
                siteAlleles.addAll(callContext.getAlleles());
            }

            // Initialize the variant context builder
            final VariantContext initialContext = (callContext == null) ? truthContext : callContext;
            builder = new VariantContextBuilder(initialContext.getSource(), initialContext.getContig(), initialContext.getStart(), initialContext.getEnd(), siteAlleles);
            builder.computeEndFromAlleles(allAlleles, initialContext.getStart());
            builder.log10PError(initialContext.getLog10PError());

            // Add the genotypes
            addToGenotypes(genotypes, truthContext, TRUTH_SAMPLE, OUTPUT_VCF_TRUTH_SAMPLE_NAME, allAlleles, truthAlleles, MISSING_SITES_HOM_REF);
            addToGenotypes(genotypes, callContext, CALL_SAMPLE, OUTPUT_VCF_CALL_SAMPLE_NAME, allAlleles, callAlleles, false);

            // set the alleles and genotypes
            builder.genotypes(genotypes);

            // set the concordance state attribute
            final TruthAndCallStates state = GenotypeConcordance.determineState(truthContext, TRUTH_SAMPLE, callContext, CALL_SAMPLE, MIN_GQ, MIN_DP);
            final ContingencyState[] stateArray = scheme.getConcordanceStateArray(state.truthState, state.callState);
            builder.attribute(CONTINGENCY_STATE_TAG, Arrays.asList(stateArray));

            // write it
            writer.add(builder.make());
        }
    }

    /** Adds a new genotype to the provided list of genotypes.  If the given variant context is null or has no alleles
     * (ex. a no-call), then add a no-call, otherwise make a copy of the genotype for the sample with the input
     * sample name within the provided context.  In the former case, if missingSitesHomRef is true, a homozygous
     * reference genotype is created instead of a no-call.  In the latter case, the new genotype will have the output
     * sample name. */
    private void addToGenotypes(final List<Genotype> genotypes,
                                final VariantContext ctx,
                                final String inputSampleName,
                                final String outputSampleName,
                                final List<Allele> allAlleles,
                                final List<Allele> ctxAlleles,
                                final boolean missingSitesHomRef) {
        if (ctx != null && !ctxAlleles.isEmpty()) {
            final Genotype genotype = ctx.getGenotype(inputSampleName);
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype);
            genotypeBuilder.name(outputSampleName);
            genotypeBuilder.alleles(ctxAlleles);
            if (!genotype.hasAnyAttribute(VCFConstants.GENOTYPE_KEY)) {
                genotypeBuilder.attribute(VCFConstants.GENOTYPE_KEY, MISSING_VALUE_v4);
            }
            genotypes.add(genotypeBuilder.make());
        }
        else {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(outputSampleName);
            if (missingSitesHomRef) {
                genotypeBuilder.alleles(Arrays.asList(allAlleles.get(0), allAlleles.get(0)));
            }
            else {
                genotypeBuilder.alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
            }
            genotypes.add(genotypeBuilder.make());
        }
    }

    public static boolean classifyVariants(final Optional<VariantContext> truthContext,
                                           final String truthSample,
                                           final Optional<VariantContext> callContext,
                                           final String callSample,
                                           final int minGq, final int minDp) {
        return classifyVariants(truthContext, truthSample, callContext, callSample, Optional.empty(), Optional.empty(), minGq, minDp);
    }

    /**
     * Attempts to determine the concordance state given the truth and all variant context and optionally increments the genotype concordance
     * count for the given variant type (SNP or INDEL).  This will ignore cases where an indel was found in the truth and a SNP was found in
     * the call, and vice versa.  We typically fail to classify Mixed, Symbolic variants, or MNPs.
     *
     * @param truthContext A variant context representing truth
     * @param truthSample The name of the truth sample
     * @param callContext A variant context representing the call
     * @param callSample The name of the call sample
     * @param snpCounter optionally a place to increment the counts for SNP truth/call states
     * @param indelCounter optionally a place to increment the counts for INDEL truth/call states
     * @param minGq Threshold for filtering by genotype attribute GQ
     * @param minDp Threshold for filtering by genotype attribute DP
     * @return true if the concordance state could be classified.
     */
    public static boolean classifyVariants(final Optional<VariantContext> truthContext,
                                           final String truthSample,
                                           final Optional<VariantContext> callContext,
                                           final String callSample,
                                           final Optional<GenotypeConcordanceCounts> snpCounter,
                                           final Optional<GenotypeConcordanceCounts> indelCounter,
                                           final int minGq, final int minDp) {
        final VariantContext.Type truthVariantContextType = truthContext.map(VariantContext::getType).orElse(NO_VARIATION);
        final VariantContext.Type callVariantContextType  = callContext.map(VariantContext::getType).orElse(NO_VARIATION);

        // A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
        // Unclassified include MIXED/MNP/Symbolic...
        final TruthAndCallStates truthAndCallStates = determineState(truthContext.orElse(null), truthSample, callContext.orElse(null), callSample, minGq, minDp);
        if (truthVariantContextType == SNP) {
            if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
                // Note.  If truth is SNP and call is MIXED, the event will be logged in the snpCounter, with row = MIXED
                snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
        }
        else if (truthVariantContextType == INDEL) {
            // Note.  If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
            if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
                indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
        }
        else if (truthVariantContextType == MIXED) {
            // Note.  If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
            if (callVariantContextType == SNP) {
                snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
            // Note.  If truth is MIXED and call is INDEL, the event will be logged in the indelCounter, with column = MIXED
            else if (callVariantContextType == INDEL) {
                indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
        }
        else if (truthVariantContextType == NO_VARIATION) {
            if (callVariantContextType == SNP) {
                snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
            else if (callVariantContextType == INDEL) {
                indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
                return true;
            }
        }
        return false;
    }


    /**
     * Method to add missing sites that are KNOWN to be HOM_REF in the case of the NIST truth data set.
     */
    public static void addMissingTruthAndMissingCallStates(final double numVariants, final long intervalBaseCount, final GenotypeConcordanceCounts counter) {
        final double countMissingMissing = intervalBaseCount-numVariants;
        final TruthAndCallStates missingMissing = new TruthAndCallStates(TruthState.MISSING, CallState.MISSING);
        counter.increment(missingMissing, countMissingMissing);
    }

    /**
    * Outputs the detailed statistics tables for SNP and Indel match categories.
    **/
    public static void outputDetailMetricsFile(final VariantContext.Type variantType, final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile,
                                               final GenotypeConcordanceCounts counter, final String truthSampleName, final String callSampleName,
                                               final boolean missingSitesHomRef, final boolean outputAllRows) {
        final GenotypeConcordanceSchemeFactory schemeFactory = new GenotypeConcordanceSchemeFactory();
        final GenotypeConcordanceScheme scheme = schemeFactory.getScheme(missingSitesHomRef);
        scheme.validateScheme();
        for (final TruthState truthState : TruthState.values()) {
            for (final CallState callState : CallState.values()) {
                final long count = counter.getCount(truthState, callState);
                final String contingencyValues = scheme.getContingencyStateString(truthState, callState);
                if (count > 0 || outputAllRows) {
                    final GenotypeConcordanceDetailMetrics detailMetrics = new GenotypeConcordanceDetailMetrics();
                    detailMetrics.VARIANT_TYPE = variantType;
                    detailMetrics.TRUTH_SAMPLE = truthSampleName;
                    detailMetrics.CALL_SAMPLE = callSampleName;
                    detailMetrics.TRUTH_STATE = truthState;
                    detailMetrics.CALL_STATE = callState;
                    detailMetrics.COUNT = count;
                    detailMetrics.CONTINGENCY_VALUES = contingencyValues;
                    genotypeConcordanceDetailMetricsFile.addMetric(detailMetrics);
                }
            }
        }
    }

    /** A simple structure to return the results of getAlleles.  NB: truthAllele1/2 and callAllele1/2 may be null if
     * no first or second allele was present respectively. */
    protected static class Alleles {
        public final OrderedSet<String> allAlleles;
        public final String truthAllele1;
        public final String truthAllele2;
        public final String callAllele1;
        public final String callAllele2;

        public Alleles(final OrderedSet<String> allAlleles,
                       final String truthAllele1,
                       final String truthAllele2,
                       final String callAllele1,
                       final String callAllele2) {

            if (truthAllele1 == null && truthAllele2 != null) {
                throw new IllegalStateException("truthAllele2 should be null if truthAllele1 is null.");
            }
            if (callAllele1 == null && callAllele2 != null) {
                throw new IllegalStateException("callAllele2 should be null if callAllele1 is null.");
            }

            this.allAlleles   = allAlleles;
            this.truthAllele1 = (truthAllele1 == null) ? null : this.allAlleles.get(allAlleles.indexOf(truthAllele1));
            this.truthAllele2 = (truthAllele2 == null) ? null : this.allAlleles.get(allAlleles.indexOf(truthAllele2));
            this.callAllele1  = (callAllele1 == null) ? null : this.allAlleles.get(allAlleles.indexOf(callAllele1));
            this.callAllele2  = (callAllele2 == null) ? null : this.allAlleles.get(allAlleles.indexOf(callAllele2));
        }

        public List<Allele> asList() {
            if (allAlleles.isEmpty()) {
                return Collections.emptyList();
            }
            else {
                final List<Allele> alleles = new ArrayList<>(this.allAlleles.size());
                for (int idx = 0; idx < this.allAlleles.size(); idx++) {
                    alleles.add(Allele.create(this.allAlleles.get(idx), idx == 0));
                }
                return alleles;
            }
        }

        public List<Allele> truthAlleles() {
            if (allAlleles.isEmpty() || this.truthAllele1 == null) {
                return Collections.emptyList();
            }
            else {
                final Allele truthAllele1 = Allele.create(this.truthAllele1, this.allAlleles.indexOf(this.truthAllele1) == 0);
                final Allele truthAllele2 = Allele.create(this.truthAllele2, this.allAlleles.indexOf(this.truthAllele2) == 0);
                return Arrays.asList(truthAllele1, truthAllele2);
            }
        }

        public List<Allele> callAlleles() {
            if (allAlleles.isEmpty() || this.callAllele1 == null) {
                return Collections.emptyList();
            }
            else {
                final Allele callAllele1 = Allele.create(this.callAllele1, this.allAlleles.indexOf(this.callAllele1) == 0);
                final Allele callAllele2 = Allele.create(this.callAllele2, this.allAlleles.indexOf(this.callAllele2) == 0);
                return Arrays.asList(callAllele1, callAllele2);
            }
        }
    }

    /** Inserts the given string into the destination string at the given index.  If the index is past the end of the
     * destination string, the given string is appended to the destination.
     */
    static String spliceOrAppendString(final String destination, final String toInsert, final int insertIdx) {
        if (insertIdx <= destination.length()) {
            return destination.substring(0, insertIdx) + toInsert + destination.substring(insertIdx);
        }
        else {
            return destination + toInsert;
        }
    }

    /** Gets the alleles for the truth and call genotypes.  In particular, this handles the case where indels can have different
     * reference alleles. */
    final protected static Alleles normalizeAlleles(final VariantContext truthContext,
                                                    final String truthSample,
                                                    final VariantContext callContext,
                                                    final String callSample) {

        final Genotype truthGenotype, callGenotype;

        if (truthContext == null || truthContext.isMixed() || truthContext.isFiltered()) truthGenotype = null;
        else truthGenotype = truthContext.getGenotype(truthSample);

        if (callContext == null || callContext.isMixed() || callContext.isFiltered()) callGenotype = null;
        else callGenotype = callContext.getGenotype(callSample);

        // initialize the reference
        String truthRef = (truthGenotype != null) ? truthContext.getReference().getBaseString() : null;
        String callRef  = (callGenotype != null) ? callContext.getReference().getBaseString() : null;

        String truthAllele1 = null;
        String truthAllele2 = null;
        if (null != truthGenotype) {
            if (truthGenotype.getAlleles().size() != 2) {
                throw new IllegalStateException("Genotype for Variant Context: " + truthContext + " does not have exactly 2 alleles");
            }
            truthAllele1 = truthGenotype.getAllele(0).getBaseString();
            truthAllele2 = truthGenotype.getAllele(1).getBaseString();
        }

        String callAllele1 = null;
        String callAllele2 = null;
        if (null != callGenotype) {
            if (callGenotype.getAlleles().size() != 2) {
                throw new IllegalStateException("Genotype for Variant Context: " + callContext + " does not have exactly 2 alleles");
            }
            callAllele1 = callGenotype.getAllele(0).getBaseString();
            callAllele2 = callGenotype.getAllele(1).getBaseString();
        }

        if ((truthRef != null && callRef != null) && (!truthRef.equals(callRef))) {
            // We must handle different representations for indels based on their reference alleles.  For example:
            // - Deletion:  TCAA*/T versus TCAACAA*/TCAA
            // - Insertion: TCAA*/TCAAGG versus TCAACAA*/TCAACAAGG
            // We do the following:
            // 1. Verify that the shorter reference allele is a prefix of the longer reference allele.
            // 2. Add the extra reference bases to the variant allele.
            // 3. Update the shorter reference allele to be the longer reference allele.
            if (truthRef.length() < callRef.length()) {
                // Truth reference is shorter than call reference
                final String suffix = getStringSuffix(callRef, truthRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
                final int insertIdx = truthRef.length();
                truthAllele1 = spliceOrAppendString(truthAllele1, suffix, insertIdx);
                truthAllele2 = spliceOrAppendString(truthAllele2, suffix, insertIdx);
                truthRef = truthRef + suffix;

            }
            else if (truthRef.length() > callRef.length()) {
                // call reference is shorter than truth:
                final String suffix = getStringSuffix(truthRef, callRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
                final int insertIdx = callRef.length();
                callAllele1 = spliceOrAppendString(callAllele1, suffix, insertIdx);
                callAllele2 = spliceOrAppendString(callAllele2, suffix, insertIdx);
                callRef = callRef + suffix;
            }
            else {
                // Same length - so they must just disagree...
                throw new IllegalStateException("Ref alleles mismatch between: " + truthContext + " and " + callContext);
            }
        }

        final OrderedSet<String> allAlleles = new OrderedSet<String>();

        if (truthGenotype != null || callGenotype != null) {
            // Store the refAllele as the first (0th index) allele in allAlleles (only can do if at least one genotype is non-null)
            allAlleles.smartAdd(truthGenotype == null ? callRef : truthRef); // zeroth allele;
        }

        if (null != truthGenotype) {
            allAlleles.smartAdd(truthAllele1);
            allAlleles.smartAdd(truthAllele2);
        }

        /**
         *  if either of the call alleles is in allAlleles, with index > 1, we need to make sure that allele has index 1.
         *  this is because of the following situations:
         *
         *      REF TRUTH   CALL-GT TRUTH-STATE     CALL-STATE
         *      A   C/G     C/A     HET_VAR1_VAR2   HET_REF_VAR1
         *      A   G/C     C/A     HET_VAR1_VAR2   HET_REF_VAR1
         *      A   G/C     G/A     HET_VAR1_VAR2   HET_REF_VAR1
         *      A   G/C     G/A     HET_VAR1_VAR2   HET_REF_VAR1
         *
         *  so, in effect, the order of the alleles in the TRUTH doesn't determine the assignment of allele to Var1 and Var2,
         *  only once the call is known can this assignment be made.
         */

        if (null != callGenotype) {
            if (allAlleles.indexOf(callAllele1) > 1 || allAlleles.indexOf(callAllele2) > 1) {
                allAlleles.remove(2);
                allAlleles.remove(1);
                allAlleles.smartAdd(truthAllele2);
                allAlleles.smartAdd(truthAllele1);
            }

            allAlleles.smartAdd(callAllele1);
            allAlleles.smartAdd(callAllele2);
        }

        return new Alleles(allAlleles, truthAllele1, truthAllele2, callAllele1, callAllele2);
    }

    private static GenotypeConcordanceStateCodes getStateCode(final VariantContext ctx, final String sample, final int minGq, final int minDp) {
        // Site level checks
        // Added potential to include missing sites as hom ref.
        if (ctx == null) return MISSING_CODE;
        else if (ctx.isMixed()) return IS_MIXED_CODE;
        else if (ctx.isFiltered()) return VC_FILTERED_CODE;
        else {
            // Genotype level checks
            final Genotype genotype = ctx.getGenotype(sample);
            if (genotype.isNoCall())           return NO_CALL_CODE;
            else if (genotype.isFiltered())    return GT_FILTERED_CODE;
            else if ((genotype.getGQ() != -1) && (genotype.getGQ() < minGq)) return LOW_GQ_CODE;
            else if ((genotype.getDP() != -1) && (genotype.getDP() < minDp)) return LOW_DP_CODE;
                // Note.  Genotype.isMixed means that it is called on one chromosome and NOT on the other
            else if ((genotype.isMixed())) return NO_CALL_CODE;
        }
        return null;
    }

    /**
     * A method to determine the truth and call states for a pair of variant contexts representing truth and call.
     * A variety of variant and genotype-level checks are first used to determine if either of the the variant contexts
     * are filtered and after that a comparison of the called genotype alleles to determine appropriate truth and call state
     *
     * Note that this method does NOT check for SNP versus Indel.  It is assumed that that check is done by the caller and the results
     * of this method are stored by SNP/Indel.
     * Note that if a variant context has BOTH GQ and DP less than the specified threshold, then it will be of Truth/Call State LOW_GQ
     *
     * @param truthContext A variant context representing truth
     * @param truthSample The name of the truth sample
     * @param callContext A variant context representing the call
     * @param callSample The name of the call sample
     * @param minGq Threshold for filtering by genotype attribute GQ
     * @param minDp Threshold for filtering by genotype attribute DP
     * @return TruthAndCallStates object containing the TruthState and CallState determined here.
     */
    final public static TruthAndCallStates determineState(final VariantContext truthContext, final String truthSample, final VariantContext callContext, final String callSample, final int minGq, final int minDp) {
        TruthState truthState = null;
        CallState callState = null;

        // TODO: what about getPloidy()

        // Get truth and call states if they are filtered or are not going to be compared (ex. depth is less than minDP).
        final GenotypeConcordanceStateCodes truthStateCode = getStateCode(truthContext, truthSample, minGq, minDp);
        if (null != truthStateCode) {
            truthState = GenotypeConcordanceStates.truthMap.get(truthStateCode.ordinal());
        }
        final GenotypeConcordanceStateCodes callStateCode = getStateCode(callContext, callSample, minGq, minDp);
        if (null != callStateCode) {
            callState = GenotypeConcordanceStates.callMap.get(callStateCode.ordinal());
        }

        final Alleles alleles = normalizeAlleles(
                truthState == null ? truthContext : null,
                truthSample,
                callState == null ? callContext : null,
                callSample);
        final OrderedSet<String> allAlleles = alleles.allAlleles;
        final String truthAllele1           = alleles.truthAllele1;
        final String truthAllele2           = alleles.truthAllele2;
        final String callAllele1            = alleles.callAllele1;
        final String callAllele2            = alleles.callAllele2;

        // Truth
        if (null == truthState) {
            final int allele0idx = allAlleles.indexOf(truthAllele1);
            final int allele1idx = allAlleles.indexOf(truthAllele2);

            if (allele0idx == allele1idx) { //HOM
                truthState = TruthState.getHom(allele0idx);
            } else { //HET
                truthState = TruthState.getVar(allele0idx, allele1idx);
            }
        }

        // Call
        if (null == callState) {
            final int allele0idx = allAlleles.indexOf(callAllele1);
            final int allele1idx = allAlleles.indexOf(callAllele2);

            if (allele0idx == allele1idx) { //HOM
                callState = CallState.getHom(allele0idx);
            } else { //HET
                callState = CallState.getHet(allele0idx, allele1idx);
            }

            if (null == callState) {
                throw new IllegalStateException("This should never happen...  Could not classify the call variant: " + callContext.getGenotype(callSample));
            }
        }

        return new TruthAndCallStates(truthState, callState);
    }

    final static String getStringSuffix(final String longerString, final String shorterString, final String errorMsg) {
        // Truth reference is shorter than call reference
        if (!longerString.startsWith(shorterString)) {
            throw new IllegalStateException(errorMsg);
        }
        return longerString.substring(shorterString.length());
    }
}

/** like a list, but if you ask for an index of an item, it will first add that item.
also, same item cannot be added more than once (like a set)
 */
class OrderedSet<T> extends ArrayList<T> {

    public int smartIndexOf(final T o) {
        smartAdd(o);
        return super.indexOf(o);
    }

    public boolean smartAdd(final T o) {
        if (!this.contains(o)) {
            return add(o);
        }
        return false;
    }
}