SAMUtils.java example

Explorer

picard-tools-master
- htsjdk
  - src
    - java
      - htsjdk
        samtools
        AbstractBAMFileIndex.java
        AbstractSAMHeaderRecord.java
        AlignmentBlock.java
        AsyncSAMFileWriter.java
        BAMFileConstants.java
        BAMFileReader.java
        BAMFileWriter.java
        BAMIndex.java
        BAMIndexContent.java
        BAMIndexMetaData.java
        BAMIndexWriter.java
        BAMIndexer.java
        BAMRecord.java
        BAMRecordCodec.java
        BamFileIoUtils.java
        BamIndexValidator.java
        Bin.java
        BinList.java
        BinaryBAMIndexWriter.java
        BinaryCigarCodec.java
        BinaryTagCodec.java
        BinningIndexBuilder.java
        BinningIndexContent.java
        BrowseableBAMIndex.java
        CachingBAMFileIndex.java
        Chunk.java
        Cigar.java
        CigarElement.java
        CigarOperator.java
        ComparableSamRecordIterator.java
        CoordinateSortedPairInfoMap.java
        CustomReaderFactory.java
        DefaultSAMRecordFactory.java
        Defaults.java
        DiskBasedBAMFileIndex.java
        DuplicateScoringStrategy.java
        FileTruncatedException.java
        FixBAMFile.java
        GenomicIndexUtil.java
        LinearIndex.java
        MergingSamRecordIterator.java
        NotPrimarySkippingIterator.java
        QueryInterval.java
        ReservedTagConstants.java
        SAMBinaryTagAndUnsignedArrayValue.java
        SAMBinaryTagAndValue.java
        SAMException.java
        SAMFileHeader.java
        SAMFileReader.java
        SAMFileSource.java
        SAMFileSpan.java
        SAMFileTruncatedReader.java
        SAMFileWriter.java
        SAMFileWriterFactory.java
        SAMFileWriterImpl.java
        SAMFormatException.java
        SAMHeaderRecordComparator.java
        SAMLineParser.java
        SAMProgramRecord.java
        SAMReadGroupRecord.java
        SAMRecord.java
        SAMRecordComparator.java
        SAMRecordCoordinateComparator.java
        SAMRecordFactory.java
        SAMRecordIterator.java
        SAMRecordQueryNameComparator.java
        SAMRecordSetBuilder.java
        SAMRecordUtil.java
        SAMSequenceDictionary.java
        SAMSequenceRecord.java
        SAMSortOrderChecker.java
        SAMTag.java
        SAMTagUtil.java
        SAMTestUtil.java
        SAMTextHeaderCodec.java
        SAMTextReader.java
        SAMTextWriter.java
        SAMTools.java
        SAMUtils.java
        SAMValidationError.java
        SQTagUtil.java
        SamFileHeaderMerger.java
        SamFileValidator.java
        SamFiles.java
        SamInputResource.java
        SamPairUtil.java
        SamReader.java
        SamReaderFactory.java
        SamStreams.java
        SecondaryOrSupplementarySkippingIterator.java
        StreamInflatingIndexingOutputStream.java
        TagValueAndUnsignedArrayFlag.java
        TextCigarCodec.java
        TextTagCodec.java
        TextualBAMIndexWriter.java
        ValidationStringency.java
        apps
        TimeChannel.java
        TimeRandomAccessFile.java
        example
        ExampleSamUsage.java
        fastq
        AsyncFastqWriter.java
        BasicFastqWriter.java
        FastqConstants.java
        FastqReader.java
        FastqRecord.java
        FastqWriter.java
        FastqWriterFactory.java
        filter
        AggregateFilter.java
        AlignedFilter.java
        DuplicateReadFilter.java
        FailsVendorReadQualityFilter.java
        FilteringIterator.java
        IntervalFilter.java
        NotPrimaryAlignmentFilter.java
        ReadNameFilter.java
        SamRecordFilter.java
        SecondaryAlignmentFilter.java
        SecondaryOrSupplementaryFilter.java
        SolexaNoiseFilter.java
        TagFilter.java
        WholeReadClippedFilter.java
        liftover
        Chain.java
        LiftOver.java
        metrics
        Header.java
        MetricBase.java
        MetricsFile.java
        StringHeader.java
        VersionHeader.java
        reference
        AbstractFastaSequenceFile.java
        FastaSequenceFile.java
        FastaSequenceIndex.java
        IndexedFastaSequenceFile.java
        ReferenceSequence.java
        ReferenceSequenceFile.java
        ReferenceSequenceFileFactory.java
        ReferenceSequenceFileWalker.java
        seekablestream
        ISeekableStreamFactory.java
        SeekableBufferedStream.java
        SeekableFTPStream.java
        SeekableFTPStreamHelper.java
        SeekableFileStream.java
        SeekableHTTPStream.java
        SeekableStream.java
        SeekableStreamFactory.java
        UserPasswordInput.java
        util
        AbstractAsyncWriter.java
        AbstractIterator.java
        AsciiWriter.java
        BinaryCodec.java
        BlockCompressedFilePointerUtil.java
        BlockCompressedInputStream.java
        BlockCompressedOutputStream.java
        BlockCompressedStreamConstants.java
        BlockGunzipper.java
        BufferedLineReader.java
        CigarUtil.java
        CloseableIterator.java
        CloserUtil.java
        CollectionUtil.java
        CoordMath.java
        DateParser.java
        DelegatingIterator.java
        DiskBackedQueue.java
        FastLineReader.java
        FastqQualityFormat.java
        FileAppendStreamLRUCache.java
        FormatUtil.java
        Histogram.java
        HttpUtils.java
        IOUtil.java
        Interval.java
        IntervalList.java
        IntervalListReferenceSequenceMask.java
        IntervalTree.java
        IntervalTreeMap.java
        IntervalUtil.java
        Iso8601Date.java
        IterableAdapter.java
        IterableOnceIterator.java
        Iterables.java
        Lazy.java
        LineReader.java
        ListMap.java
        LocationAware.java
        Locus.java
        LocusComparator.java
        LocusImpl.java
        Log.java
        Md5CalculatingInputStream.java
        Md5CalculatingOutputStream.java
        MergingIterator.java
        Objects.java
        OverlapDetector.java
        PeekIterator.java
        PeekableIterator.java
        ProcessExecutor.java
        ProgressLogger.java
        ProgressLoggerInterface.java
        QualityEncodingDetector.java
        QualityUtil.java
        ReferenceSequenceMask.java
        RelativeIso8601Date.java
        ResourceLimitedMap.java
        ResourceLimitedMapFunctor.java
        RuntimeEOFException.java
        RuntimeIOException.java
        SamLocusIterator.java
        SamRecordIntervalIteratorFactory.java
        SamRecordTrackingBuffer.java
        SamRecordWithOrdinal.java
        SequenceUtil.java
        SnappyLoader.java
        SolexaQualityConverter.java
        SortingCollection.java
        SortingLongCollection.java
        StopWatch.java
        StringLineReader.java
        StringUtil.java
        TempStreamFactory.java
        TestUtil.java
        Tuple.java
        WholeGenomeReferenceSequenceMask.java
        ftp
        FTPClient.java
        FTPReply.java
        FTPStream.java
        FTPUtils.java
        zip
        DeflaterFactory.java
        IntelDeflater.java
        ZStreamRef.java
        tribble
        AbstractFeatureCodec.java
        AbstractFeatureReader.java
        AsciiFeatureCodec.java
        BasicFeature.java
        BinaryFeatureCodec.java
        CloseableTribbleIterator.java
        Feature.java
        FeatureCodec.java
        FeatureCodecHeader.java
        FeatureReader.java
        NameAwareCodec.java
        TabixFeatureReader.java
        Tribble.java
        TribbleException.java
        TribbleIndexedFeatureReader.java
        annotation
        Strand.java
        bed
        BEDCodec.java
        BEDFeature.java
        FullBEDFeature.java
        SimpleBEDFeature.java
        dbsnp
        OldDbSNPCodec.java
        OldDbSNPFeature.java
        example
        CountRecords.java
        ExampleBinaryCodec.java
        IndexToTable.java
        IndicesAreEqual.java
        ProfileIndexReading.java
        exception
        CodecLineParsingException.java
        UnsortedFileException.java
        gelitext
        DiploidGenotype.java
        GeliTextCodec.java
        GeliTextFeature.java
        index
        AbstractIndex.java
        Block.java
        ChrIndex.java
        DynamicIndexCreator.java
        Index.java
        IndexCreator.java
        IndexFactory.java
        MutableIndex.java
        TribbleIndexCreator.java
        interval
        Interval.java
        IntervalIndexCreator.java
        IntervalTree.java
        IntervalTreeIndex.java
        linear
        LinearIndex.java
        LinearIndexCreator.java
        tabix
        TabixFormat.java
        TabixIndex.java
        TabixIndexCreator.java
        readers
        AsciiLineReader.java
        AsciiLineReaderIterator.java
        AsynchronousLineReader.java
        LineIterator.java
        LineIteratorImpl.java
        LineReader.java
        LineReaderUtil.java
        LongLineBufferedReader.java
        Positional.java
        PositionalBufferedStream.java
        TabixIteratorLineReader.java
        TabixReader.java
        util
        FTPHelper.java
        HTTPHelper.java
        LittleEndianInputStream.java
        LittleEndianOutputStream.java
        MathUtils.java
        ParsingUtils.java
        RemoteURLHelper.java
        TabixUtils.java
        URLHelper.java
        popgen
        HardyWeinbergCalculation.java
        variant
        bcf2
        BCF2Codec.java
        BCF2Decoder.java
        BCF2GenotypeFieldDecoders.java
        BCF2LazyGenotypesDecoder.java
        BCF2Type.java
        BCF2Utils.java
        BCFVersion.java
        utils
        GeneralUtils.java
        SAMSequenceDictionaryExtractor.java
        variantcontext
        Allele.java
        CommonInfo.java
        FastGenotype.java
        Genotype.java
        GenotypeBuilder.java
        GenotypeLikelihoods.java
        GenotypeType.java
        GenotypesContext.java
        JEXLMap.java
        LazyGenotypesContext.java
        VariantContext.java
        VariantContextBuilder.java
        VariantContextComparator.java
        VariantContextUtils.java
        VariantJEXLContext.java
        writer
        AsyncVariantContextWriter.java
        BCF2Encoder.java
        BCF2FieldEncoder.java
        BCF2FieldWriter.java
        BCF2FieldWriterManager.java
        BCF2Writer.java
        IndexingVariantContextWriter.java
        IntGenotypeFieldAccessors.java
        Options.java
        SortingVariantContextWriter.java
        SortingVariantContextWriterBase.java
        VCFWriter.java
        VariantContextWriter.java
        VariantContextWriterBuilder.java
        VariantContextWriterFactory.java
        vcf
        AbstractVCFCodec.java
        VCF3Codec.java
        VCFCodec.java
        VCFCompoundHeaderLine.java
        VCFConstants.java
        VCFContigHeaderLine.java
        VCFEncoder.java
        VCFFileReader.java
        VCFFilterHeaderLine.java
        VCFFormatHeaderLine.java
        VCFHeader.java
        VCFHeaderLine.java
        VCFHeaderLineCount.java
        VCFHeaderLineTranslator.java
        VCFHeaderLineType.java
        VCFHeaderVersion.java
        VCFIDHeaderLine.java
        VCFInfoHeaderLine.java
        VCFRecordCodec.java
        VCFSimpleHeaderLine.java
        VCFStandardHeaderLines.java
        VCFUtils.java
    - tests
      - java
        htsjdk
        samtools
        AbstractBAMFileIndexTest.java
        BAMFileIndexTest.java
        BAMFileWriterTest.java
        BAMIndexWriterTest.java
        BAMIteratorTest.java
        BAMRemoteFileTest.java
        BinTest.java
        ChunkTest.java
        CigarCodecTest.java
        CigarTest.java
        MergingSamRecordIteratorGroupCollisionTest.java
        MergingSamRecordIteratorTest.java
        ProgramRecordChainingTest.java
        SAMCloneTest.java
        SAMFileReaderTest.java
        SAMFileWriterFactoryTest.java
        SAMIntegerTagTest.java
        SAMTextReaderTest.java
        SAMTextWriterTest.java
        SAMUtilsTest.java
        SamFileHeaderMergerTest.java
        SamHeaderRecordComparatorTest.java
        SamPairUtilTest.java
        SamReaderFactoryTest.java
        SamReaderSortTest.java
        SequenceNameTruncationAndValidationTest.java
        ValidateSamFileTest.java
        filter
        FailsVendorReadQualityFilterTest.java
        SolexaNoiseFilterTest.java
        TagFilterTest.java
        liftover
        LiftOverTest.java
        metrics
        MetricsFileTest.java
        reference
        FastaSequenceFileTest.java
        FastaSequenceIndexTest.java
        IndexedFastaSequenceFileTest.java
        ReferenceSequenceFileWalkerTest.java
        ReferenceSequenceTests.java
        seekablestream
        SeekableBufferedStreamTest.java
        SeekableFTPStreamTest.java
        SeekableFileStreamTest.java
        SeekableStreamFactoryTest.java
        util
        BinaryCodecTest.java
        BlockCompressedFilePointerUtilTest.java
        BlockCompressedOutputStreamTest.java
        BlockCompressedTerminatorTest.java
        CigarUtilTest.java
        DiskBackedQueueTest.java
        HistogramTest.java
        IntervalListTest.java
        IntervalTreeMapTest.java
        IntervalTreeTest.java
        IoUtilTest.java
        Iso8601DateTest.java
        IupacTest.java
        MergingIteratorTest.java
        QualityEncodingDetectorTest.java
        RelativeIso8601DateTest.java
        SamLocusIteratorTest.java
        SequenceUtilTest.java
        SolexaQualityConverterTest.java
        SortingCollectionTest.java
        SortingLongCollectionTest.java
        StringLineReaderTest.java
        StringUtilTest.java
        tribble
        AbstractFeatureReaderTest.java
        BinaryFeaturesTest.java
        FeatureReaderTest.java
        TestUtils.java
        bed
        BEDCodecTest.java
        dbsnp
        OldDbSNPCodecTest.java
        gelitext
        GeliTextTest.java
        index
        IndexFactoryTest.java
        IndexTest.java
        interval
        IntervalTreeTest.java
        linear
        LinearIndexTest.java
        tabix
        TabixIndexTest.java
        readers
        AsciiLineReaderTest.java
        AsynchronousLineReaderTest.java
        LineReaderUtilTest.java
        LongLineBufferedReaderTest.java
        PositionalBufferedStreamTest.java
        ReaderTest.java
        TabixReaderTest.java
        util
        ParsingUtilsTest.java
        ftp
        FTPClientTest.java
        FTPUtilsTest.java
        popgen
        HardyWeinbergCalculationTest.java
        variant
        VariantBaseTest.java
        bcf2
        BCF2EncoderDecoderUnitTest.java
        BCF2UtilsUnitTest.java
        utils
        SAMSequenceDictionaryExtractorTest.java
        variantcontext
        AlleleUnitTest.java
        GenotypeLikelihoodsUnitTest.java
        GenotypeUnitTest.java
        GenotypesContextUnitTest.java
        VariantContextTestProvider.java
        VariantContextUnitTest.java
        VariantJEXLContextUnitTest.java
        writer
        TabixOnTheFlyIndexCreationTest.java
        VCFWriterUnitTest.java
        VariantContextWriterBuilderUnitTest.java
        VariantContextWritersUnitTest.java
        vcf
        AbstractVCFCodecTest.java
        IndexFactoryUnitTest.java
        VCFCompoundHeaderLineUnitTest.java
        VCFEncoderTest.java
        VCFHeaderUnitTest.java
        VCFStandardHeaderLinesUnitTest.java
- src
  - java
    - picard
  - tests
    - java
      - picard
        analysis
        CollectAlignmentSummaryMetricsTest.java
        CollectInsertSizeMetricsTest.java
        CollectRnaSeqMetricsTest.java
        MultiLevelCollectorTest.java
        cmdline
        CommandLineParserTest.java
        CommandLineProgramTest.java
        illumina
        CheckIlluminaDirectoryTest.java
        CollectIlluminaBasecallingMetricsTest.java
        CollectIlluminaSummaryMetricsTest.java
        ExtractIlluminaBarcodesTest.java
        IlluminaBasecallsToFastqTest.java
        IlluminaBasecallsToSamAdapterClippingTest.java
        IlluminaBasecallsToSamTest.java
        IlluminaLaneMetricsCollectorTest.java
        ReadStructureTest.java
        parser
        BclParserTest.java
        BinTdUtil.java
        CycleIlluminaFileMapTest.java
        FilterParserTest.java
        IlluminaDataProviderFactoryTest.java
        IlluminaDataProviderTest.java
        IlluminaFileUtilTest.java
        PerTileParserTest.java
        PerTilePerCycleParserTest.java
        PosParserTest.java
        fakers
        BclFileFakerTest.java
        readers
        AbstractIlluminaPositionFileReaderTest.java
        BclReaderTest.java
        ClocsFileReaderTest.java
        FilterFileReaderTest.java
        IlluminaFileUtilTest.java
        LocsFileReaderTest.java
        MMapBackedIteratorFactoryTest.java
        PosFileReaderTest.java
        metrics
        CollectRrbsMetricsTest.java
        pedigree
        PedFileTest.java
        sam
        AddCommentsToBamTest.java
        CleanSamTest.java
        CompareSAMsTest.java
        CreateSequenceDictionaryTest.java
        FastqToSamTest.java
        GatherBamFilesTest.java
        MergeBamAlignmentTest.java
        MergeSamFilesTest.java
        RevertSamTest.java
        SamToFastqTest.java
        SplitSamByLibraryTest.java
        ViewSamTest.java
        markduplicates
        AbstractMarkDuplicatesCommandLineProgramTest.java
        AbstractMarkDuplicatesCommandLineProgramTester.java
        MarkDuplicatesTest.java
        MarkDuplicatesTester.java
        MarkDuplicatesWithMateCigarTest.java
        MarkDuplicatesWithMateCigarTester.java
        util
        OpticalDuplicateFinderTest.java
        testers
        CleanSamTester.java
        SamFileTester.java
        ValidateSamTester.java
        util
        BedToIntervalListTest.java
        ClippingUtilityTest.java
        DelimitedTextFileWithHeaderIteratorTest.java
        FifoBufferTest.java
        FileChannelJDKBugWorkAroundTest.java
        IlluminaUtilTest.java
        IntervalListScattererTest.java
        MathUtilTest.java
        MergingIteratorTest.java
        RExecutorTest.java
        ScatterIntervalsByNsTest.java
        TabbedTextFileWithHeaderParserTest.java
        TextFileParsersTest.java
        UnsignedTypeUtilTest.java
        vcf
        AbstractVcfMergingClpTester.java
        GenotypeConcordanceTest.java
        MergeVcfsTest.java
        SortVcfsTest.java
        SplitVcfsTest.java
        UpdateVcfSequenceDictionaryTest.java
        VariantContextComparatorTest.java
        VcfFormatConverterTest.java

/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package htsjdk.samtools;

import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.RuntimeEOFException;
import htsjdk.samtools.util.StringUtil;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


/**
 * Utilty methods.
 */
public final class SAMUtils
{
    // Representation of bases, one for when in low-order nybble, one for when in high-order nybble.
    private static final byte COMPRESSED_EQUAL_LOW = 0;
    private static final byte COMPRESSED_A_LOW = 1;
    private static final byte COMPRESSED_C_LOW = 2;
    private static final byte COMPRESSED_M_LOW = 3;
    private static final byte COMPRESSED_G_LOW = 4;
    private static final byte COMPRESSED_R_LOW = 5;
    private static final byte COMPRESSED_S_LOW = 6;
    private static final byte COMPRESSED_V_LOW = 7;
    private static final byte COMPRESSED_T_LOW = 8;
    private static final byte COMPRESSED_W_LOW = 9;
    private static final byte COMPRESSED_Y_LOW = 10;
    private static final byte COMPRESSED_H_LOW = 11;
    private static final byte COMPRESSED_K_LOW = 12;
    private static final byte COMPRESSED_D_LOW = 13;
    private static final byte COMPRESSED_B_LOW = 14;
    private static final byte COMPRESSED_N_LOW = 15;
    private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4;
    private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4;
    private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4;
    private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4;
    private static final byte COMPRESSED_T_HIGH = (byte)(COMPRESSED_T_LOW << 4);
    private static final byte COMPRESSED_N_HIGH = (byte)(COMPRESSED_N_LOW << 4);

    private static final byte COMPRESSED_M_HIGH = (byte)(COMPRESSED_M_LOW << 4);
    private static final byte COMPRESSED_R_HIGH = (byte)(COMPRESSED_R_LOW << 4);
    private static final byte COMPRESSED_S_HIGH = (byte)(COMPRESSED_S_LOW << 4);
    private static final byte COMPRESSED_V_HIGH = (byte)(COMPRESSED_V_LOW << 4);
    private static final byte COMPRESSED_W_HIGH = (byte)(COMPRESSED_W_LOW << 4);
    private static final byte COMPRESSED_Y_HIGH = (byte)(COMPRESSED_Y_LOW << 4);
    private static final byte COMPRESSED_H_HIGH = (byte)(COMPRESSED_H_LOW << 4);
    private static final byte COMPRESSED_K_HIGH = (byte)(COMPRESSED_K_LOW << 4);
    private static final byte COMPRESSED_D_HIGH = (byte)(COMPRESSED_D_LOW << 4);
    private static final byte COMPRESSED_B_HIGH = (byte)(COMPRESSED_B_LOW << 4);


    public static final int MAX_PHRED_SCORE = 93;

    /**
     * Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long,
     * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15.
     * @param readBases Bases as ASCII bytes.
     * @return New byte array with bases represented as nybbles, in BAM binary format.
     */
    static byte[] bytesToCompressedBases(final byte[] readBases) {
        final byte[] compressedBases = new byte[(readBases.length + 1)/2];
        int i;
        for (i = 1; i < readBases.length; i+=2) {
            compressedBases[i/2] = (byte)(charToCompressedBaseHigh(readBases[i-1]) |
                                    charToCompressedBaseLow(readBases[i]));
        }
        // Last nybble
        if (i == readBases.length) {
            compressedBases[i/2] = charToCompressedBaseHigh((char)readBases[i-1]);
        }
        return compressedBases;
    }

    /**
     * Convert from a byte array with basese stored in nybbles, with =, A, C, G, T represented as 0, 1, 2, 4, 8, 15,
     * to a a byte array containing =AaCcGgTtNn represented as ASCII.
     * @param length Number of bases (not bytes) to convert.
     * @param compressedBases Bases represented as nybbles, in BAM binary format.
     * @param compressedOffset Byte offset in compressedBases to start.
     * @return New byte array with bases as ASCII bytes.
     */
    public static byte[] compressedBasesToBytes(final int length, final byte[] compressedBases, final int compressedOffset) {
        final byte[] ret = new byte[length];
        int i;
        for (i = 1; i < length; i+=2) {
            final int compressedIndex = i / 2 + compressedOffset;
            ret[i-1] = compressedBaseToByteHigh(compressedBases[compressedIndex]);
            ret[i] = compressedBaseToByteLow(compressedBases[compressedIndex]);
        }
        // Last nybble
        if (i == length) {
            ret[i-1] = compressedBaseToByteHigh(compressedBases[i/2 + compressedOffset]);
        }
        return ret;
    }

    /**
     * Convert from ASCII byte to BAM nybble representation of a base in low-order nybble.
     * @param base One of =AaCcGgTtNn.
     * @return Low-order nybble-encoded equivalent.
     */
    private static byte charToCompressedBaseLow(final int base) {
        switch (base) {
            case '=':
                return COMPRESSED_EQUAL_LOW;
            case 'a':
            case 'A':
                return COMPRESSED_A_LOW;
            case 'c':
            case 'C':
                return COMPRESSED_C_LOW;
            case 'g':
            case 'G':
                return COMPRESSED_G_LOW;
            case 't':
            case 'T':
                return COMPRESSED_T_LOW;
            case 'n':
            case 'N':
            case '.':
                return COMPRESSED_N_LOW;

            // IUPAC ambiguity codes
            case 'M':
            case 'm':
                return COMPRESSED_M_LOW;
            case 'R':
            case 'r':
                return COMPRESSED_R_LOW;
            case 'S':
            case 's':
                return COMPRESSED_S_LOW;
            case 'V':
            case 'v':
                return COMPRESSED_V_LOW;
            case 'W':
            case 'w':
                return COMPRESSED_W_LOW;
            case 'Y':
            case 'y':
                return COMPRESSED_Y_LOW;
            case 'H':
            case 'h':
                return COMPRESSED_H_LOW;
            case 'K':
            case 'k':
                return COMPRESSED_K_LOW;
            case 'D':
            case 'd':
                return COMPRESSED_D_LOW;
            case 'B':
            case 'b':
                return COMPRESSED_B_LOW;
            default:
                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }

    /**
     * Convert from ASCII byte to BAM nybble representation of a base in high-order nybble.
     * @param base One of =AaCcGgTtNn.
     * @return High-order nybble-encoded equivalent.
     */
    private static byte charToCompressedBaseHigh(final int base) {
        switch (base) {
            case '=':
                return COMPRESSED_EQUAL_HIGH;
            case 'a':
            case 'A':
                return COMPRESSED_A_HIGH;
            case 'c':
            case 'C':
                return COMPRESSED_C_HIGH;
            case 'g':
            case 'G':
                return COMPRESSED_G_HIGH;
            case 't':
            case 'T':
                return COMPRESSED_T_HIGH;
            case 'n':
            case 'N':
            case '.':
                return COMPRESSED_N_HIGH;

            // IUPAC ambiguity codes
            case 'M':
            case 'm':
                return COMPRESSED_M_HIGH;
            case 'R':
            case 'r':
                return COMPRESSED_R_HIGH;
            case 'S':
            case 's':
                return COMPRESSED_S_HIGH;
            case 'V':
            case 'v':
                return COMPRESSED_V_HIGH;
            case 'W':
            case 'w':
                return COMPRESSED_W_HIGH;
            case 'Y':
            case 'y':
                return COMPRESSED_Y_HIGH;
            case 'H':
            case 'h':
                return COMPRESSED_H_HIGH;
            case 'K':
            case 'k':
                return COMPRESSED_K_HIGH;
            case 'D':
            case 'd':
                return COMPRESSED_D_HIGH;
            case 'B':
            case 'b':
                return COMPRESSED_B_HIGH;
            default:
                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }

    /**
     * Convert from BAM nybble representation of a base in low-order nybble to ASCII byte.
     * @param base One of COMPRESSED_*_LOW, a low-order nybble encoded base.
     * @return ASCII base, one of ACGTN=.
     */
    private static byte compressedBaseToByteLow(final int base) {
        switch (base & 0xf) {
            case COMPRESSED_EQUAL_LOW:
                return '=';
            case COMPRESSED_A_LOW:
                return 'A';
            case COMPRESSED_C_LOW:
                return 'C';
            case COMPRESSED_G_LOW:
                return 'G';
            case COMPRESSED_T_LOW:
                return 'T';
            case COMPRESSED_N_LOW:
                return 'N';

            // IUPAC ambiguity codes
            case COMPRESSED_M_LOW: return 'M';
            case COMPRESSED_R_LOW: return 'R';
            case COMPRESSED_S_LOW: return 'S';
            case COMPRESSED_V_LOW: return 'V';
            case COMPRESSED_W_LOW: return 'W';
            case COMPRESSED_Y_LOW: return 'Y';
            case COMPRESSED_H_LOW: return 'H';
            case COMPRESSED_K_LOW: return 'K';
            case COMPRESSED_D_LOW: return 'D';
            case COMPRESSED_B_LOW: return 'B';


            default:
                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }

    /**
     * Convert from BAM nybble representation of a base in high-order nybble to ASCII byte.
     * @param base One of COMPRESSED_*_HIGH, a high-order nybble encoded base.
     * @return ASCII base, one of ACGTN=.
     */
    private static byte compressedBaseToByteHigh(final int base) {
        switch ((byte)(base & 0xf0)) {
            case COMPRESSED_EQUAL_HIGH:
                return '=';
            case COMPRESSED_A_HIGH:
                return 'A';
            case COMPRESSED_C_HIGH:
                return 'C';
            case COMPRESSED_G_HIGH:
                return 'G';
            case COMPRESSED_T_HIGH:
                return 'T';
            case COMPRESSED_N_HIGH:
                return 'N';

            // IUPAC ambiguity codes
            case COMPRESSED_M_HIGH: return 'M';
            case COMPRESSED_R_HIGH: return 'R';
            case COMPRESSED_S_HIGH: return 'S';
            case COMPRESSED_V_HIGH: return 'V';
            case COMPRESSED_W_HIGH: return 'W';
            case COMPRESSED_Y_HIGH: return 'Y';
            case COMPRESSED_H_HIGH: return 'H';
            case COMPRESSED_K_HIGH: return 'K';
            case COMPRESSED_D_HIGH: return 'D';
            case COMPRESSED_B_HIGH: return 'B';

            default:
                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }

    /**
     * Convert bases in place into canonical form, upper case, and with no-call represented as N.
     * @param bases
     */
    static void normalizeBases(final byte[] bases) {
        for (int i = 0; i < bases.length; ++i) {
            bases[i] = StringUtil.toUpperCase(bases[i]);
            if (bases[i] == '.') {
                bases[i] = 'N';
            }
        }
    }

    /**
     * Convert an array of bytes, in which each byte is a binary phred quality score, to
     * printable ASCII representation of the quality scores, ala FASTQ format.
     *
     * Equivalent to phredToFastq(data, 0, data.length)
     *
     * @param data Array of bytes in which each byte is a binar phred score.
     * @return String with ASCII representation of those quality scores.
     */
    public static String phredToFastq(final byte[] data) {
        if (data == null) {
            return null;
        }
        return phredToFastq(data, 0, data.length);
    }

    /**
     * Convert an array of bytes, in which each byte is a binary phred quality score, to
     * printable ASCII representation of the quality scores, ala FASTQ format.
     * @param buffer Array of bytes in which each byte is a binar phred score.
     * @param offset Where in buffer to start conversion.
     * @param length How many bytes of buffer to convert.
     * @return String with ASCII representation of those quality scores.
     */
    public static String phredToFastq(final byte[] buffer, final int offset, final int length) {
        final char[] chars = new char[length];
        for (int i = 0; i < length; i++) {
            chars[i] = phredToFastq(buffer[offset+i] & 0xFF);
        }
        return new String(chars);
    }

    /**
     * Convert a single binary phred score to printable ASCII representation, ala FASTQ format.
     * @param phredScore binary phred score.
     * @return Printable ASCII representation of phred score.
     */
    public static char phredToFastq(final int phredScore) {
        if (phredScore < 0 || phredScore > MAX_PHRED_SCORE) {
            throw new IllegalArgumentException("Cannot encode phred score: " + phredScore);
        }
        return (char) (33 + phredScore);
    }

    /**
     * Convert a string with phred scores in printable ASCII FASTQ format to an array
     * of binary phred scores.
     * @param fastq Phred scores in FASTQ printable ASCII format.
     * @return byte array of binary phred scores in which each byte corresponds to a character in the input string.
     */
    public static byte[] fastqToPhred(final String fastq) {
        if (fastq == null) {
            return null;
        }
        final int length = fastq.length();
        final byte[] scores = new byte[length];
        for (int i = 0; i < length; i++) {
            scores[i] = (byte) fastqToPhred(fastq.charAt(i));
        }
        return scores;
    }

    /**
     * Converts printable qualities in Sanger fastq format to binary phred scores.
     */
    public static void fastqToPhred(final byte[] fastq) {
        for (int i = 0; i < fastq.length; ++i) {
            fastq[i] = (byte)fastqToPhred((char)(fastq[i] & 0xff));
        }
    }

    /**
     * Convert a single printable ASCII FASTQ format phred score to binary phred score.
     * @param ch Printable ASCII FASTQ format phred score.
     * @return Binary phred score.
     */
    public static int fastqToPhred(final char ch) {
        if (ch < 33 || ch > 126) {
            throw new IllegalArgumentException("Invalid fastq character: " + ch);
        }
        return (ch - 33);
    }

    /**
     * calculate the bin given an alignment in [beg,end)
     * Copied from SAM spec.
     * @param beg 0-based start of read (inclusive)
     * @param end 0-based end of read (exclusive)
     * @deprecated Use GenomicIndexUtil.reg2bin
     */
    static int reg2bin(final int beg, final int end)
    {
        return GenomicIndexUtil.reg2bin(beg, end);
    }

    /**
     * Handle a list of validation errors according to the validation stringency.
     * @param validationErrors List of errors to report, or null if there are no errors.
     * @param samRecordIndex Record number of the SAMRecord corresponding to the validation errors, or -1 if
     * the record number is not known.
     * @param validationStringency If STRICT, throw a SAMFormatException.  If LENIENT, print the validation
     * errors to stderr.  If SILENT, do nothing.
     */
    static void processValidationErrors(final List<SAMValidationError> validationErrors,
                                        final long samRecordIndex,
                                        final ValidationStringency validationStringency) {
        if (validationErrors != null && validationErrors.size() > 0) {
            for (final SAMValidationError validationError : validationErrors) {
                validationError.setRecordNumber(samRecordIndex);
            }
            if (validationStringency == ValidationStringency.STRICT) {
                throw new SAMFormatException("SAM validation error: " + validationErrors.get(0));
            }
            else if (validationStringency == ValidationStringency.LENIENT) {
                for (final SAMValidationError error : validationErrors) {
                    System.err.println("Ignoring SAM validation error: " + error);
                }
            }
        }
    }

    public static void processValidationError(final SAMValidationError validationError,
                                       final ValidationStringency validationStringency) {
        if (validationStringency == ValidationStringency.STRICT) {
            throw new SAMFormatException("SAM validation error: " + validationError);
        }
        else if (validationStringency == ValidationStringency.LENIENT) {
            System.err.println("Ignoring SAM validation error: " + validationError);
        }
        
    }

	private static final SAMHeaderRecordComparator<SAMReadGroupRecord> HEADER_RECORD_COMPARATOR =
			new SAMHeaderRecordComparator<SAMReadGroupRecord>(
					SAMReadGroupRecord.PLATFORM_UNIT_TAG,
					SAMReadGroupRecord.LIBRARY_TAG,
					SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
					SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG,
					SAMReadGroupRecord.SEQUENCING_CENTER_TAG,
					SAMReadGroupRecord.PLATFORM_TAG,
					SAMReadGroupRecord.DESCRIPTION_TAG,
					SAMReadGroupRecord.READ_GROUP_ID_TAG    // We don't actually want to compare with ID but it's suitable
					// "just in case" since it's the only one that's actually required
			);

	/**
	 * Calculate a hash code from identifying information in the RG (read group) records in a SAM file's
	 * header. This hash code changes any time read groups are added or removed. Comparing one file's
	 * hash code to another's tells you if the read groups in the BAM files are different.
	 */
	public static String calculateReadGroupRecordChecksum(final File input) {
		final String ENCODING = "UTF-8";

		final MessageDigest digest;
		try {
			digest = MessageDigest.getInstance("MD5");
		} catch (final NoSuchAlgorithmException nsae) {
			throw new Error("No MD5 algorithm was available in a Java JDK? Unheard-of!");
		}

		// Sort the read group records by their first
		final SAMFileReader reader = new SAMFileReader(input);
		final List<SAMReadGroupRecord> sortedRecords = new ArrayList<SAMReadGroupRecord>(reader.getFileHeader().getReadGroups());
		Collections.sort(sortedRecords, HEADER_RECORD_COMPARATOR);

		for (final SAMReadGroupRecord rgRecord : sortedRecords) {
			final TreeMap<String, String> sortedAttributes = new TreeMap<String, String>();
			for (final Map.Entry<String, String> attributeEntry : rgRecord.getAttributes()) {
				sortedAttributes.put(attributeEntry.getKey(), attributeEntry.getValue());
			}

			try {
				for (final Map.Entry<String, String> sortedEntry : sortedAttributes.entrySet()) {
					if ( ! sortedEntry.getKey().equals(SAMReadGroupRecord.READ_GROUP_ID_TAG)) { // Redundant check, safety first
						digest.update(sortedEntry.getKey().getBytes(ENCODING));
						digest.update(sortedEntry.getValue().getBytes(ENCODING));
					}
				}
			} catch (final UnsupportedEncodingException uee) {
				throw new Error("No " + ENCODING + "!? WTH?");
			}
		}

		// Convert to a String and pad to get the full 32 chars.
		final StringBuilder hashText = new StringBuilder((new BigInteger(1, digest.digest())).toString(16));
		while (hashText.length() < 32 ) hashText.insert(0, "0");

		return hashText.toString();
	}

    /**
     * Chains <code>program</code> in front of the first "head" item in the list of
     * SAMProgramRecords in <code>header</code>.  This method should not be used
     * when there are multiple chains of program groups in a header, only when
     * it can safely be assumed that there is only one chain.  It correctly handles
     * the case where <code>program</code> has already been added to the header, so
     * it can be used whether creating a SAMProgramRecord with a constructor or when
     * calling SAMFileHeader.createProgramRecord().
     */
    public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMProgramRecord program) {

        final List<SAMProgramRecord> pgs = header.getProgramRecords();
        if (pgs.size() > 0) {
            final List<String> referencedIds = new ArrayList<String>();
            for (final SAMProgramRecord pg : pgs) {
                if (pg.getPreviousProgramGroupId() != null) {
                    referencedIds.add(pg.getPreviousProgramGroupId());
                }
            }
            for (final SAMProgramRecord pg : pgs) {
                // if record being chained has already been added, ignore it
                if (pg.getProgramGroupId().equals(program.getProgramGroupId())) {
                    continue;
                }
                if (!referencedIds.contains(pg.getProgramGroupId())) {
                    program.setPreviousProgramGroupId(pg.getProgramGroupId());
                    break;
                }
            }
        }
    }

    public static void makeReadUnmapped(final SAMRecord rec) {
        if (rec.getReadNegativeStrandFlag()) {
            SAMRecordUtil.reverseComplement(rec);
            rec.setReadNegativeStrandFlag(false);
        }
        rec.setDuplicateReadFlag(false);
        rec.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
        rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
        rec.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
        rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
        rec.setInferredInsertSize(0);
        rec.setNotPrimaryAlignmentFlag(false);
        rec.setProperPairFlag(false);
        rec.setReadUnmappedFlag(true);
    }


    /**
     * Determines if a cigar has any element that both consumes read bases and consumes reference bases
     * (e.g. is not all soft-clipped)
     */
    public static boolean cigarMapsNoBasesToRef(final Cigar cigar) {
        for (final CigarElement el : cigar.getCigarElements()) {
            if (el.getOperator().consumesReadBases() && el.getOperator().consumesReferenceBases()) {
                return false;
            }
        }
        return true;
    }

    /**
     * Tests if the provided record is mapped entirely beyond the end of the reference (i.e., the alignment start is greater than the
     * length of the sequence to which the record is mapped).
     */
    public static boolean recordMapsEntirelyBeyondEndOfReference(final SAMRecord record) {
        return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
    }
    
    /**
     *
     * @return negative if mapq1 < mapq2, etc.
     * Note that MAPQ(0) < MAPQ(255) < MAPQ(1)
     */
    public static int compareMapqs(final int mapq1, final int mapq2) {
        if (mapq1 == mapq2) return 0;
        if (mapq1 == 0)  return -1;
        else if (mapq2 == 0) return 1;
        else if (mapq1 == 255) return -1;
        else if (mapq2 == 255) return 1;
        else return mapq1 - mapq2;
    }


    /**
     * Hokey algorithm for combining two MAPQs into values that are comparable, being cognizant of the fact
     * that in MAPQ world, 1 > 255 > 0. In this algorithm, 255 is treated as if it were 0.01, so that
     * CombinedMapq(1,0) > CombinedMapq(255, 255) > CombinedMapq(0, 0).
     * The return value should not be used for anything other than comparing to the return value of other
     * invocations of this method.
     */
    public static int combineMapqs(int m1, int m2) {
        if (m1 == 255) m1 = 1;
        else m1 *= 100;

        if (m2 == 255) m2 = 1;
        else m2 *= 100;

        return m1 + m2;

    }

    /**
     * Returns the virtual file offset of the first record in a BAM file - i.e. the virtual file
     * offset after skipping over the text header and the sequence records.
     */
    public static long findVirtualOffsetOfFirstRecordInBam(final File bamFile) {
        try { return BAMFileReader.findVirtualOffsetOfFirstRecord(bamFile); }
        catch (final IOException ioe) { throw new RuntimeEOFException(ioe); }
    }

    /**
     * Given a Cigar, Returns blocks of the sequence that have been aligned directly to the
     * reference sequence. Note that clipped portions, and inserted and deleted bases (vs. the reference)
     * are not represented in the alignment blocks.
     *
     * @param cigar The cigar containing the alignment information
     * @param alignmentStart The start (1-based) of the alignment
     * @param cigarTypeName The type of cigar passed - for error logging.
     * @return List of alignment blocks
     */
    public static List<AlignmentBlock> getAlignmentBlocks(final Cigar cigar, final int alignmentStart, final String cigarTypeName) {
        if (cigar == null) return Collections.emptyList();

        final List<AlignmentBlock> alignmentBlocks = new ArrayList<AlignmentBlock>();
        int readBase = 1;
        int refBase  = alignmentStart;

        for (final CigarElement e : cigar.getCigarElements()) {
            switch (e.getOperator()) {
                case H : break; // ignore hard clips
                case P : break; // ignore pads
                case S : readBase += e.getLength(); break; // soft clip read bases
                case N : refBase += e.getLength(); break;  // reference skip
                case D : refBase += e.getLength(); break;
                case I : readBase += e.getLength(); break;
                case M :
                case EQ :
                case X :
                    final int length = e.getLength();
                    alignmentBlocks.add(new AlignmentBlock(readBase, refBase, length));
                    readBase += length;
                    refBase  += length;
                    break;
                default : throw new IllegalStateException("Case statement didn't deal with " + cigarTypeName + " op: " + e.getOperator());
            }
        }
        return Collections.unmodifiableList(alignmentBlocks);
    }

    /**
     * @param alignmentStart The start (1-based) of the alignment
     * @param cigar The cigar containing the alignment information
     * @return the alignment start (1-based, inclusive) adjusted for clipped bases.  For example if the read
     * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped)
     * then this method will return 96.
     *
     * Invalid to call on an unmapped read.
     * Invalid to call with cigar = null
     */
    public static int getUnclippedStart(final int alignmentStart, final Cigar cigar) {
        int unClippedStart = alignmentStart;
        for (final CigarElement cig : cigar.getCigarElements()) {
            final CigarOperator op = cig.getOperator();
            if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
                unClippedStart -= cig.getLength();
            }
            else {
                break;
            }
        }

        return unClippedStart;
    }

    /**
     * @param alignmentEnd The end (1-based) of the alignment
     * @param cigar The cigar containing the alignment information
     * @return the alignment end (1-based, inclusive) adjusted for clipped bases.  For example if the read
     * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped)
     * then this method will return 107.
     *
     * Invalid to call on an unmapped read.
     * Invalid to call with cigar = null
     */
    public static int getUnclippedEnd(final int alignmentEnd, final Cigar cigar) {
        int unClippedEnd = alignmentEnd;
        final List<CigarElement> cigs = cigar.getCigarElements();
        for (int i=cigs.size() - 1; i>=0; --i) {
            final CigarElement cig = cigs.get(i);
            final CigarOperator op = cig.getOperator();

            if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
                unClippedEnd += cig.getLength();
            }
            else {
                break;
            }
        }

        return unClippedEnd;
    }

    /**
     * Returns the Mate Cigar String as stored in the attribute 'MC'.
     * @param rec the SAM record
     * @return Mate Cigar String, or null if there is none.
     */
    public static String getMateCigarString(final SAMRecord rec) {
        return rec.getStringAttribute(SAMTag.MC.name());
    }

    /**
     * Returns the Mate Cigar or null if there is none.
     * @param rec the SAM record
     * @param withValidation true if we are to validate the mate cigar before returning, false otherwise.
     * @return Cigar object for the read's mate, or null if there is none.
     */
    public static Cigar getMateCigar(final SAMRecord rec, final boolean withValidation) {
        final String mateCigarString = getMateCigarString(rec);
        Cigar mateCigar = null;
        if (mateCigarString != null) {
            mateCigar = TextCigarCodec.getSingleton().decode(mateCigarString);
            if (withValidation && rec.getValidationStringency() != ValidationStringency.SILENT) {
                final List<AlignmentBlock> alignmentBlocks = getAlignmentBlocks(mateCigar, rec.getMateAlignmentStart(), "mate cigar");
                SAMUtils.processValidationErrors(validateCigar(rec, mateCigar, rec.getMateReferenceIndex(), alignmentBlocks, -1, "Mate CIGAR"), -1L, rec.getValidationStringency());
            }
        }
        return mateCigar;
    }

    /**
     * Returns the Mate Cigar or null if there is none.  No validation is done on the returned cigar.
     * @param rec the SAM record
     * @return Cigar object for the read's mate, or null if there is none.
     */
    public static Cigar getMateCigar(final SAMRecord rec) {
        return getMateCigar(rec, false);
    }

    /**
     * @param rec the SAM record
     * @return number of cigar elements (number + operator) in the mate cigar string.
     */
    public static int getMateCigarLength(final SAMRecord rec) {
        final Cigar mateCigar = getMateCigar(rec);
        return (mateCigar != null) ? mateCigar.numCigarElements() : 0;
    }

    /**
     * This method uses the MateCigar value as determined from the attribute MC.  It must be non-null.
     * @param rec the SAM record
     * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped.
     */
    public static int getMateAlignmentEnd(final SAMRecord rec) {
        if (rec.getMateUnmappedFlag()) {
            throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate.");
        }
        final Cigar mateCigar = SAMUtils.getMateCigar(rec);
        if (mateCigar == null) {
            throw new SAMException("Mate CIGAR (Tag MC) not found.");
        }
        return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength());
    }

    /**
     * @param rec the SAM record
     * @return the mate alignment start (1-based, inclusive) adjusted for clipped bases.  For example if the mate
     * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped)
     * then this method will return 96.
     *
     * Invalid to call on an unmapped read.
     */
    public static int getMateUnclippedStart(final SAMRecord rec) {
        if (rec.getMateUnmappedFlag())
            throw new RuntimeException("getMateUnclippedStart called on an unmapped mate.");
        final Cigar mateCigar = getMateCigar(rec);
        if (mateCigar == null) {
            throw new SAMException("Mate CIGAR (Tag MC) not found.");
        }
        return SAMUtils.getUnclippedStart(rec.getMateAlignmentStart(), mateCigar);
    }


    /**
     * @param rec the SAM record
     * @return the mate alignment end (1-based, inclusive) adjusted for clipped bases.  For example if the mate
     * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped)
     * then this method will return 107.
     *
     * Invalid to call on an unmapped read.
     */
    public static int getMateUnclippedEnd(final SAMRecord rec) {
        if (rec.getMateUnmappedFlag()) {
            throw new RuntimeException("getMateUnclippedEnd called on an unmapped mate.");
        }
        final Cigar mateCigar = SAMUtils.getMateCigar(rec);
        if (mateCigar == null) {
            throw new SAMException("Mate CIGAR (Tag MC) not found.");
        }
        return SAMUtils.getUnclippedEnd(getMateAlignmentEnd(rec), mateCigar);
    }

    /**
     * @param rec the SAM record
     * Returns blocks of the mate sequence that have been aligned directly to the
     * reference sequence. Note that clipped portions of the mate and inserted and
     * deleted bases (vs. the reference) are not represented in the alignment blocks.
     */
    public static List<AlignmentBlock> getMateAlignmentBlocks(final SAMRecord rec) {
        return getAlignmentBlocks(getMateCigar(rec), rec.getMateAlignmentStart(), "mate cigar");
    }

    /**
     * Run all validations of the mate's CIGAR.  These include validation that the CIGAR makes sense independent of
     * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference.
     * @param rec the SAM record
     * @param cigar The cigar containing the alignment information
     * @param referenceIndex The reference index
     * @param alignmentBlocks The alignment blocks (parsed from the cigar)
     * @param recordNumber For error reporting.  -1 if not known.
     * @param cigarTypeName For error reporting.  "Read CIGAR" or "Mate Cigar"
     * @return List of errors, or null if no errors.
     */

    public static List<SAMValidationError> validateCigar(final SAMRecord rec,
                                                  final Cigar cigar,
                                                   final Integer referenceIndex,
                                                   final List<AlignmentBlock> alignmentBlocks,
                                                   final long recordNumber,
                                                   final String cigarTypeName) {
        // Don't know line number, and don't want to force read name to be decoded.
        List<SAMValidationError> ret = cigar.isValid(rec.getReadName(), recordNumber);
        if (referenceIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
            final SAMSequenceRecord sequence = rec.getHeader().getSequence(referenceIndex);
            final int referenceSequenceLength = sequence.getSequenceLength();
            for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
                if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
                    if (ret == null) ret = new ArrayList<SAMValidationError>();
                    ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
                            cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber));
                    break;
                }
            }
        }
        return ret;
    }

    /**
     * Run all validations of the mate's CIGAR.  These include validation that the CIGAR makes sense independent of
     * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference.
     * @param rec the SAM record
     * @param recordNumber For error reporting.  -1 if not known.
     * @return List of errors, or null if no errors.
     */
    public static List<SAMValidationError> validateMateCigar(final SAMRecord rec, final long recordNumber) {
        List<SAMValidationError> ret = null;

        if (rec.getValidationStringency() != ValidationStringency.SILENT) {
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {      // The mateCigar will be defined if the mate is mapped
                if (getMateCigarString(rec) != null) {
                    ret = SAMUtils.validateCigar(rec, getMateCigar(rec), rec.getMateReferenceIndex(), getMateAlignmentBlocks(rec), recordNumber, "Mate CIGAR");
                }
            } else {
                if (getMateCigarString(rec) != null) {
                    ret = new ArrayList<SAMValidationError>();
                    if (rec.getMateUnmappedFlag()) {
                        // If the Mate is unmapped, and the Mate Cigar String (MC Attribute) exists, that is a validation error.
                        ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
                                "Mate CIGAR String (MC Attribute) present for a read whose mate is unmapped", rec.getReadName(), recordNumber));
                    }
                    else {
                        // If the Mate is not paired, and the Mate Cigar String (MC Attribute) exists, that is a validation error.
                        ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
                                "Mate CIGAR String (MC Attribute) present for a read that is not paired", rec.getReadName(), recordNumber));
                    }
                }
            }
        }

        return ret;
    }

    /**
     * Checks to see if it is valid for this record to have a mate CIGAR (MC) and then if there is a mate CIGAR available.  This is done by
     * checking that this record is paired, its mate is mapped, and that it returns a non-null mate CIGAR.
     * @param rec
     * @return
     */
    public static boolean hasMateCigar(SAMRecord rec) {
        // NB: use getMateCigarString rather than getMateCigar to avoid validation.
        return (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && null != SAMUtils.getMateCigarString(rec));
    }

    /**
     * Returns a string that is the the read group ID and read name separated by a colon.  This is meant to cannonically
     * identify a given record within a set of records.
     * @param record
     * @return
     */
    public static String getCanonicalRecordName(final SAMRecord record) {
        String name = record.getStringAttribute(ReservedTagConstants.READ_GROUP_ID);
        if (null == name) name = record.getReadName();
        else name = name + ":" + record.getReadName();
        return name;
    }
}