/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.sam; /** * @author Jim Robinson * @date 12/6/11 */ public class AlignmentUtils { public static final byte a = 'a'; public static final byte c = 'c'; public static final byte g = 'g'; public static final byte t = 't'; public static final byte A = 'A'; public static final byte C = 'C'; public static final byte G = 'G'; public static final byte T = 'T'; /** * Return true if the two bases can be considered a match. The comparison is case-insensitive, and * considers ambiguity codes in the reference. * * @param refbase * @param readbase * @return */ public static boolean compareBases(byte refbase, byte readbase) { if(readbase == 61) { return true; // By definition, 61 is "equals" } // Force both bases to upper case if (refbase > 90) { refbase = (byte) (refbase - 32); } if (readbase > 90) { readbase = (byte) (readbase - 32); } if (refbase == readbase) { return true; } switch (refbase) { case 'N': return true; // Everything matches 'N' case 'U': return readbase == 'T'; case 'M': return readbase == 'A' || readbase == 'C'; case 'R': return readbase == 'A' || readbase == 'G'; case 'W': return readbase == 'A' || readbase == 'T'; case 'S': return readbase == 'C' || readbase == 'G'; case 'Y': return readbase == 'C' || readbase == 'T'; case 'K': return readbase == 'G' || readbase == 'T'; case 'V': return readbase == 'A' || readbase == 'C' || readbase == 'G'; case 'H': return readbase == 'A' || readbase == 'C' || readbase == 'T'; case 'D': return readbase == 'A' || readbase == 'G' || readbase == 'T'; case 'B': return readbase == 'C' || readbase == 'G' || readbase == 'T'; default: return false; } } /** * Check whether there is a mismatch between {@code reference[idx]} and {@code read[idx]}, * guarding against {@code reference} being too short. * If we do not have a valid reference we assume a match, that is, NOT a misMatch. * * Note '=' means indicates a match by definition * @param reference * @param read * @param isSoftClipped * @param idx * @return */ static boolean isMisMatch(byte[] reference, byte[] read, boolean isSoftClipped, int idx){ if(reference == null) return false; boolean misMatch = false; if (isSoftClipped) { // Goby will return '=' characters when the soft-clip happens to match the reference. // It could actually be useful to see which part of the soft clipped bases match, to help detect // cases when an aligner clipped too much. final byte readbase = read[idx]; misMatch = readbase != '='; // mismatch, except when the soft-clip has an '=' base. } else { final int referenceLength = reference.length; final byte refbase = idx < referenceLength ? reference[idx] : 0; final byte readbase = read[idx]; misMatch = readbase != '=' && idx < referenceLength && refbase != 0 && !AlignmentUtils.compareBases(refbase, readbase); } return misMatch; } /** * Reverses and complements a copy of the original array */ public static byte[] reverseComplementCopy(final byte[] bases) { final int lastIndex = bases.length - 1; byte[] out = new byte[bases.length]; int i; for (i=0; i <= lastIndex; i++) { out[lastIndex-i] = complement(bases[i]); } return out; } /** * Reverses and complements the bases in place. */ public static void reverseComplement(final byte[] bases) { final int lastIndex = bases.length - 1; int i, j; for (i = 0, j = lastIndex; i < j; ++i, --j) { final byte tmp = complement(bases[i]); bases[i] = complement(bases[j]); bases[j] = tmp; } if (bases.length % 2 == 1) { bases[i] = complement(bases[i]); } } /** * Returns the complement of a single byte. */ public static final byte complement(final byte b) { switch (b) { case a: return t; case c: return g; case g: return c; case t: return a; case A: return T; case C: return G; case G: return C; case T: return A; default: return b; } } /** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); } }