/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package htsjdk.samtools.util; import htsjdk.samtools.AlignmentBlock; import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SAMTag; import java.io.File; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class SequenceUtil { /** Byte typed variables for all normal bases. */ public static final byte a='a', c='c', g='g', t='t', n='n', A='A', C='C', G='G', T='T', N='N'; /** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); } /** Attempts to efficiently compare two bases stored as bytes for equality. */ public static boolean basesEqual(byte lhs, byte rhs) { if (lhs == rhs) return true; else { if (lhs > 90) lhs -= 32; if (rhs > 90) rhs -= 32; } return lhs == rhs; } /** * returns true if the value of base represents a no call */ public static boolean isNoCall(final byte base) { return base == 'N' || base == 'n' || base == '.'; } /** Returns true if the byte is in [acgtACGT]. */ public static boolean isValidBase(final byte b){ return b == a || b == A || b == c || b == C || b == g || b == G || b == t || b == T; } /** Calculates the fraction of bases that are G/C in the sequence. */ public static double calculateGc(final byte[] bases) { int gcs = 0; for (int i=0; i<bases.length; ++i) { final byte b = bases[i]; if (b == 'C' || b == 'G' || b == 'c' || b == 'g') ++gcs; } return gcs / (double) bases.length; } /** * Throws an exception only if both parameters are not null * @param s1 a list of sequence headers * @param s2 a second list of sequence headers */ public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2) { if (s1 != null && s2 != null) { if (s1.size() != s2.size()) { throw new SequenceListsDifferException( "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() + ")"); } for (int i = 0; i < s1.size(); ++i) { if (!s1.get(i).isSameSequence(s2.get(i))) { String s1Attrs = ""; for (final java.util.Map.Entry<String, String> entry : s1.get(i) .getAttributes()) { s1Attrs += "/" + entry.getKey() + "=" + entry.getValue(); } String s2Attrs = ""; for (final java.util.Map.Entry<String, String> entry : s2.get(i) .getAttributes()) { s2Attrs += "/" + entry.getKey() + "=" + entry.getValue(); } throw new SequenceListsDifferException( "Sequences at index " + i + " don't match: " + s1.get(i).getSequenceIndex() + "/" + s1.get(i).getSequenceLength() + "/" + s1.get(i).getSequenceName() + s1Attrs + " " + s2.get(i).getSequenceIndex() + "/" + s2.get(i).getSequenceLength() + "/" + s2.get(i).getSequenceName() + s2Attrs); } } } } public static class SequenceListsDifferException extends SAMException { public SequenceListsDifferException() { } public SequenceListsDifferException(final String s) { super(s); } public SequenceListsDifferException(final String s, final Throwable throwable) { super(s, throwable); } public SequenceListsDifferException(final Throwable throwable) { super(throwable); } } /** * Returns true if both parameters are null or equal, otherwise returns false */ public static boolean areSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) { if (s1 == null && s2 == null) return true; if (s1 == null || s2 == null) return false; try { assertSequenceListsEqual(s1.getSequences(), s2.getSequences()); return true; } catch (SequenceListsDifferException e) { return false; } } /** * Throws an exception if both parameters are non-null and unequal. */ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) { if (s1 == null || s2 == null) return; assertSequenceListsEqual(s1.getSequences(), s2.getSequences()); } /** * Throws an exception if both parameters are non-null and unequal, including the filenames. */ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final File f1, final File f2) { try { assertSequenceDictionariesEqual(s1, s2); } catch (SequenceListsDifferException e) { throw new SequenceListsDifferException("In files " + f1.getAbsolutePath() + " and " + f2.getAbsolutePath(), e); } } /** * Create a simple ungapped cigar string, which might have soft clipping at either end * @param alignmentStart raw aligment start, which may result in read hanging off beginning or end of read * @return cigar string that may have S operator at beginning or end, and has M operator for the rest of the read */ public static String makeCigarStringWithPossibleClipping(final int alignmentStart, final int readLength, final int referenceSequenceLength) { int start = alignmentStart; int leftSoftClip = 0; if (start < 1) { leftSoftClip = 1 - start; start = 1; } int rightSoftClip = 0; if (alignmentStart + readLength > referenceSequenceLength + 1) { rightSoftClip = alignmentStart + readLength - referenceSequenceLength - 1; } // CIGAR is trivial because there are no indels or clipping in Gerald final int matchLength = readLength - leftSoftClip - rightSoftClip; if (matchLength < 1) { throw new SAMException("Unexpected cigar string with no M op for read."); } return makeSoftClipCigar(leftSoftClip) + Integer.toString(matchLength) + "M" + makeSoftClipCigar(rightSoftClip); } /** * Create a cigar string for a gapped alignment, which may have soft clipping at either end * @param alignmentStart raw alignment start, which may result in read hanging off beginning or end of read * @param readLength * @param referenceSequenceLength * @param indelPosition number of matching bases before indel. Must be > 0 * @param indelLength length of indel. Positive for insertion, negative for deletion. * @return cigar string that may have S operator at beginning or end, has one or two M operators, and an I or a D. */ public static String makeCigarStringWithIndelPossibleClipping(final int alignmentStart, final int readLength, final int referenceSequenceLength, final int indelPosition, final int indelLength) { int start = alignmentStart; int leftSoftClip = 0; if (start < 1) { leftSoftClip = 1 - start; start = 1; } int rightSoftClip = 0; final int alignmentEnd = alignmentStart + readLength - indelLength; if (alignmentEnd > referenceSequenceLength + 1) { rightSoftClip = alignmentEnd - referenceSequenceLength - 1; } if (leftSoftClip >= indelPosition) { throw new IllegalStateException("Soft clipping entire pre-indel match. leftSoftClip: " + leftSoftClip + "; indelPosition: " + indelPosition); } // CIGAR is trivial because there are no indels or clipping in Gerald final int firstMatchLength = indelPosition - leftSoftClip; final int secondMatchLength = readLength - indelPosition - (indelLength > 0? indelLength: 0) - rightSoftClip; if (secondMatchLength < 1) { throw new SAMException("Unexpected cigar string with no M op for read."); } return makeSoftClipCigar(leftSoftClip) + Integer.toString(firstMatchLength) + "M" + Math.abs(indelLength) + (indelLength > 0? "I": "D") + Integer.toString(secondMatchLength) + "M" + makeSoftClipCigar(rightSoftClip); } public static String makeSoftClipCigar(final int clipLength) { if (clipLength == 0) { return ""; } return Integer.toString(clipLength) + "S"; } /** Calculates the number of mismatches between the read and the reference sequence provided. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases) { return countMismatches(read, referenceBases, 0, false); } /** Calculates the number of mismatches between the read and the reference sequence provided. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return countMismatches(read, referenceBases, referenceOffset, false); } /** * Calculates the number of mismatches between the read and the reference sequence provided. * * @param referenceBases Array of ASCII bytes that covers at least the the portion of the reference sequence * to which read is aligned from getReferenceStart to getReferenceEnd. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. * @param bisulfiteSequence If this is true, it is assumed that the reads were bisulfite treated * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { try { int mismatches = 0; final byte[] readBases = read.getReadBases(); for (final AlignmentBlock block : read.getAlignmentBlocks()) { final int readBlockStart = block.getReadStart() - 1; final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset; final int length = block.getLength(); for (int i=0; i<length; ++i) { if (!bisulfiteSequence) { if (!basesEqual(readBases[readBlockStart+i], referenceBases[referenceBlockStart+i])) { ++mismatches; } } else { if (!bisulfiteBasesEqual(read.getReadNegativeStrandFlag(), readBases[readBlockStart+i], referenceBases[referenceBlockStart+i])) { ++mismatches; } } } } return mismatches; } catch (Exception e) { throw new SAMException("Exception counting mismatches for read " + read, e); } } /** * Calculates the number of mismatches between the read and the reference sequence provided. * * @param referenceBases Array of ASCII bytes that covers at least the the portion of the reference sequence * to which read is aligned from getReferenceStart to getReferenceEnd. * @param bisulfiteSequence If this is true, it is assumed that the reads were bisulfite treated * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final boolean bisulfiteSequence) { return countMismatches(read, referenceBases, 0, bisulfiteSequence); } /** * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather * than byte[]. This is because GATK needs it this way. * * TODO: Remove this method when GATK map method is changed to take refseq as byte[]. */ private static int countMismatches(final SAMRecord read, final char[] referenceBases, final int referenceOffset) { int mismatches = 0; final byte[] readBases = read.getReadBases(); for (final AlignmentBlock block : read.getAlignmentBlocks()) { final int readBlockStart = block.getReadStart() - 1; final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset; final int length = block.getLength(); for (int i=0; i<length; ++i) { if (!basesEqual(readBases[readBlockStart+i], StringUtil.charToByte(referenceBases[referenceBlockStart+i]))) { ++mismatches; } } } return mismatches; } /** * Calculates the sum of qualities for mismatched bases in the read. * @param referenceBases Array of ASCII bytes in which the 0th position in the array corresponds * to the first element of the reference sequence to which read is aligned. */ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] referenceBases) { return sumQualitiesOfMismatches(read, referenceBases, 0, false); } /** * Calculates the sum of qualities for mismatched bases in the read. * @param referenceBases Array of ASCII bytes that covers at least the the portion of the reference sequence * to which read is aligned from getReferenceStart to getReferenceEnd. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return sumQualitiesOfMismatches(read, referenceBases, referenceOffset, false); } /** * Calculates the sum of qualities for mismatched bases in the read. * @param referenceBases Array of ASCII bytes that covers at least the the portion of the reference sequence * to which read is aligned from getReferenceStart to getReferenceEnd. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. * @param bisulfiteSequence If this is true, it is assumed that the reads were bisulfite treated * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { int qualities = 0; final byte[] readBases = read.getReadBases(); final byte[] readQualities = read.getBaseQualities(); if (read.getAlignmentStart() <= referenceOffset) { throw new IllegalArgumentException("read.getAlignmentStart(" + read.getAlignmentStart() + ") <= referenceOffset(" + referenceOffset + ")"); } for (final AlignmentBlock block : read.getAlignmentBlocks()) { final int readBlockStart = block.getReadStart() - 1; final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset; final int length = block.getLength(); for (int i=0; i<length; ++i) { if (!bisulfiteSequence) { if (!basesEqual(readBases[readBlockStart+i], referenceBases[referenceBlockStart+i])) { qualities += readQualities[readBlockStart+i]; } } else { if (!bisulfiteBasesEqual(read.getReadNegativeStrandFlag(), readBases[readBlockStart+i], referenceBases[referenceBlockStart+i])) { qualities += readQualities[readBlockStart+i]; } } } } return qualities; } /** * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather * than byte[]. This is because GATK needs it this way. * * TODO: Remove this method when GATK map method is changed to take refseq as byte[]. */ public static int sumQualitiesOfMismatches(final SAMRecord read, final char[] referenceBases, final int referenceOffset) { int qualities = 0; final byte[] readBases = read.getReadBases(); final byte[] readQualities = read.getBaseQualities(); if (read.getAlignmentStart() <= referenceOffset) { throw new IllegalArgumentException("read.getAlignmentStart(" + read.getAlignmentStart() + ") <= referenceOffset(" + referenceOffset + ")"); } for (final AlignmentBlock block : read.getAlignmentBlocks()) { final int readBlockStart = block.getReadStart() - 1; final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset; final int length = block.getLength(); for (int i=0; i<length; ++i) { if (!basesEqual(readBases[readBlockStart+i], StringUtil.charToByte(referenceBases[referenceBlockStart+i]))) { qualities += readQualities[readBlockStart+i]; } } } return qualities; } public static int countInsertedBases(final Cigar cigar) { int ret = 0; for (final CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.INSERTION) ret += element.getLength(); } return ret; } public static int countDeletedBases(final Cigar cigar) { int ret = 0; for (final CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.DELETION) ret += element.getLength(); } return ret; } public static int countInsertedBases(final SAMRecord read) { return countInsertedBases(read.getCigar()); } public static int countDeletedBases(final SAMRecord read) { return countDeletedBases(read.getCigar()); } /** * Calculates the for the predefined NM tag from the SAM spec. To the result of * countMismatches() it adds 1 for each indel. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases) { return calculateSamNmTag(read, referenceBases, 0, false); } /** * Calculates the for the predefined NM tag from the SAM spec. To the result of * countMismatches() it adds 1 for each indel. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return calculateSamNmTag(read, referenceBases, referenceOffset, false); } /** * Calculates the for the predefined NM tag from the SAM spec. To the result of * countMismatches() it adds 1 for each indel. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. * @param bisulfiteSequence If this is true, it is assumed that the reads were bisulfite treated * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence); for (final CigarElement el : read.getCigar().getCigarElements()) { if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) { samNm += el.getLength(); } } return samNm; } /** * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather * than byte[]. This is because GATK needs it this way. * * TODO: Remove this method when GATK map method is changed to take refseq as byte[]. */ public static int calculateSamNmTag(final SAMRecord read, final char[] referenceBases, final int referenceOffset) { int samNm = countMismatches(read, referenceBases, referenceOffset); for (final CigarElement el : read.getCigar().getCigarElements()) { if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) { samNm += el.getLength(); } } return samNm; } /** Returns the complement of a single byte. */ public static byte complement(final byte b) { switch (b) { case a: return t; case c: return g; case g: return c; case t: return a; case A: return T; case C: return G; case G: return C; case T: return A; default: return b; } } /** Reverses and complements the bases in place. */ public static void reverseComplement(final byte[] bases) { final int lastIndex = bases.length - 1; int i, j; for (i=0, j=lastIndex; i<j; ++i, --j) { final byte tmp = complement(bases[i]); bases[i] = complement(bases[j]); bases[j] = tmp; } if (bases.length % 2 == 1) { bases[i] = complement(bases[i]); } } /** Reverses the quals in place. */ public static void reverseQualities(final byte[] quals) { final int lastIndex = quals.length - 1; int i, j; for (i=0, j=lastIndex; i<j; ++i, --j) { final byte tmp = quals[i]; quals[i] = quals[j]; quals[j] = tmp; } } /** Returns true if the bases are equal OR if the mismatch cannot be accounted for by * bisfulite treatment. C->T on the positive strand and G->A on the negative strand * do not count as mismatches */ public static boolean bisulfiteBasesEqual(final boolean negativeStrand, final byte read, final byte reference) { return (basesEqual(read, reference)) || (isBisulfiteConverted(read, reference, negativeStrand)); } public static boolean bisulfiteBasesEqual(final byte read, final byte reference) { return bisulfiteBasesEqual(false, read, reference); } /** * Checks for bisulfite conversion, C->T on the positive strand and G-> on the negative strand. */ public static boolean isBisulfiteConverted(final byte read, final byte reference, final boolean negativeStrand) { if (negativeStrand) { if (basesEqual(reference, G) && basesEqual(read, A)) { return true; } } else { if (basesEqual(reference, C) && basesEqual(read, T)) { return true; } } return false; } public static boolean isBisulfiteConverted(final byte read, final byte reference) { return isBisulfiteConverted(read, reference, false); } /* * Regexp for MD string. * * \G = end of previous match. * (?:[0-9]+) non-capturing (why non-capturing?) group of digits. For this number of bases read matches reference. * - or - * Single reference base for case in which reference differs from read. * - or - * ^one or more reference bases that are deleted in read. * */ static final Pattern mdPat = Pattern.compile("\\G(?:([0-9]+)|([ACTGNactgn])|(\\^[ACTGNactgn]+))"); /** * Produce reference bases from an aligned SAMRecord with MD string and Cigar. * @param rec Must contain non-empty CIGAR and MD attribute. * @param includeReferenceBasesForDeletions If true, include reference bases that are deleted in the read. * This will make the returned array not line up with the read if there are deletions. * @return References bases corresponding to the read. If there is an insertion in the read, reference contains * '-'. If the read is soft-clipped, reference contains '0'. If there is a skipped region and * includeReferenceBasesForDeletions==true, reference will have Ns for the skipped region. */ public static byte[] makeReferenceFromAlignment(final SAMRecord rec, final boolean includeReferenceBasesForDeletions) { final String md = rec.getStringAttribute(SAMTag.MD.name()); if (md == null) { throw new SAMException("Cannot create reference from SAMRecord with no MD tag, read: " + rec.getReadName()); } // Not sure how long output will be, but it will be no longer than this. int maxOutputLength = 0; final Cigar cigar = rec.getCigar(); if (cigar == null) { throw new SAMException("Cannot create reference from SAMRecord with no CIGAR, read: " + rec.getReadName()); } for (final CigarElement cigarElement : cigar.getCigarElements()) { maxOutputLength += cigarElement.getLength(); } final byte[] ret = new byte[maxOutputLength]; int outIndex = 0; final Matcher match = mdPat.matcher(md); int curSeqPos = 0; int savedBases = 0; final byte[] seq = rec.getReadBases(); for (final CigarElement cigEl : cigar.getCigarElements()) { final int cigElLen = cigEl.getLength(); final CigarOperator cigElOp = cigEl.getOperator(); if (cigElOp == CigarOperator.SKIPPED_REGION) { // We've decided that MD tag will not contain bases for skipped regions, as they // could be megabases long, so just put N in there if caller wants reference bases, // otherwise ignore skipped regions. if (includeReferenceBasesForDeletions) { for (int i = 0; i < cigElLen; ++i) { ret[outIndex++] = N; } } } // If it consumes reference bases, it's either a match or a deletion in the sequence // read. Either way, we're going to need to parse through the MD. else if (cigElOp.consumesReferenceBases()) { // We have a match region, go through the MD int basesMatched = 0; // Do we have any saved matched bases? while ((savedBases>0) && (basesMatched < cigElLen)) { ret[outIndex++] = seq[curSeqPos++]; savedBases--; basesMatched++; } while (basesMatched < cigElLen) { boolean matched = match.find(); if (matched) { String mg; if ( ((mg = match.group(1)) !=null) && (mg.length() > 0) ) { // It's a number , meaning a series of matches final int num = Integer.parseInt(mg); for (int i = 0; i < num; i++) { if (basesMatched<cigElLen) { ret[outIndex++] = seq[curSeqPos++]; } else { savedBases++; } basesMatched++; } } else if ( ((mg = match.group(2)) !=null) && (mg.length() > 0) ) { // It's a single nucleotide, meaning a mismatch if (basesMatched<cigElLen) { ret[outIndex++] = StringUtil.charToByte(mg.charAt(0)); curSeqPos++; } else { throw new IllegalStateException("Should never happen."); } basesMatched++; } else if ( ((mg = match.group(3)) !=null) && (mg.length() > 0) ) { // It's a deletion, starting with a caret // don't include caret if (includeReferenceBasesForDeletions) { final byte[] deletedBases = StringUtil.stringToBytes(mg); System.arraycopy(deletedBases, 1, ret, outIndex, deletedBases.length - 1); outIndex += deletedBases.length - 1; } basesMatched += mg.length() - 1; // Check just to make sure. if (basesMatched != cigElLen) { throw new SAMException("Got a deletion in CIGAR (" + cigar + ", deletion " + cigElLen + " length) with an unequal ref insertion in MD (" + md + ", md " + basesMatched + " length"); } if (cigElOp != CigarOperator.DELETION) { throw new SAMException ("Got an insertion in MD ("+md+") without a corresponding deletion in cigar ("+cigar+")"); } } else { matched = false; } } if (!matched) { throw new SAMException("Illegal MD pattern: " + md + " for read " + rec.getReadName() + " with CIGAR " + rec.getCigarString()); } } } else if (cigElOp.consumesReadBases()) { // We have an insertion in read for (int i = 0; i < cigElLen; i++) { final char c = (cigElOp == CigarOperator.SOFT_CLIP) ? '0' : '-'; ret[outIndex++] = StringUtil.charToByte(c); curSeqPos++; } } else { // It's an op that consumes neither read nor reference bases. Do we just ignore?? } } if (outIndex < ret.length) { final byte[] shorter = new byte[outIndex]; System.arraycopy(ret, 0, shorter, 0, outIndex); return shorter; } return ret; } }