/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.sam;
import org.broad.igv.feature.genome.Genome;
import java.util.HashMap;
import java.util.Map;
/**
* @author Jim Robinson
* @date 2/6/12
*/
public class BisulfiteCounts {
public static final AlignmentTrack.BisulfiteContext[] NOMESEQ_CONTEXTS =
{AlignmentTrack.BisulfiteContext.HCG, AlignmentTrack.BisulfiteContext.GCH};
Genome genome;
AlignmentTrack.BisulfiteContext bisulfiteContext;
Map<Integer, Count> counts = new HashMap<Integer, Count>();
public BisulfiteCounts(AlignmentTrack.BisulfiteContext bisulfiteContext, Genome genome) {
this.bisulfiteContext = bisulfiteContext;
this.genome = genome;
}
public void incrementCounts(Alignment baseAlignment) {
// Only works with block formats
if(baseAlignment.getAlignmentBlocks() == null) return;
String chrname = genome.getCanonicalChrName(baseAlignment.getChr());
boolean flipRead;
// We will only need reverse complement if the strand and paired end status don't match (2nd ends are G->A)
if (baseAlignment.isPaired()) {
flipRead = (baseAlignment.isPaired() && (baseAlignment.isNegativeStrand() ^ baseAlignment.isSecondOfPair()));
} else {
flipRead = baseAlignment.isNegativeStrand();
}
for (AlignmentBlock block : baseAlignment.getAlignmentBlocks()) {
if (block.isSoftClipped() || block.getBases() == null) continue;
int start = block.getStart();
int end = block.getEnd();
byte[] inReference = genome.getSequence(chrname, start, end);
if (inReference == null) continue;
byte[] reference = (flipRead ?
AlignmentUtils.reverseComplementCopy(inReference) :
inReference);
byte[] inRead = block.getBases();
byte[] read = (flipRead) ? AlignmentUtils.reverseComplementCopy(inRead) : inRead;
int alignmentLen = inRead.length;
final int idxEnd = alignmentLen - 1;
for (int idxFw = 0; idxFw < alignmentLen; idxFw++) {
//// Everything comes in relative to the positive strand.
int idx = flipRead ? (idxEnd - idxFw) : idxFw;
// Since we allow soft-clipping, the reference sequence can actually be shorter than the read. Not sure
// what to do in this case, just skip?
if (idx < 0 || idx >= reference.length) continue;
// The read base can be an equals sign, so change that to the actual ref base
byte refbase = reference[idx];
if (refbase > 90) {
refbase = (byte) (refbase - 32); // Uppercase
}
// Strand has already been accounted for
if (refbase == 'C') {
byte readbase = read[idx];
if (readbase == '=') {
readbase = refbase;
}
// Force both bases to upper case
if (readbase > 90) {
readbase = (byte) (readbase - 32);
}
if (AlignmentUtils.compareBases((byte) 'C', readbase) || AlignmentUtils.compareBases((byte) 'T', readbase)) {
AlignmentTrack.BisulfiteContext matchingContext = contextIsMatching(reference, read, idx, bisulfiteContext);
boolean matchesContext = (matchingContext != null);
if (matchesContext) {
//int extension = this.getBisulfiteSymmetricCytosineExtension(bisulfiteContext);
int extension = 0; // Don't extend
int step = flipRead ? -1 : 1;
for (int i = 0; i <= extension; i++) {
int position = start + idxFw + step*i;
Count count = counts.get(position);
if (count == null) {
count = new Count();
counts.put(position, count);
}
if (AlignmentUtils.compareBases((byte) 'T', readbase)) {
count.increment(false);
} else if (AlignmentUtils.compareBases((byte) 'C', readbase)) {
count.increment(true);
}
}
}
}
}
else {
// Non-informative
}
}
}
}
public Count getCount(int position) {
Count c = counts.get(position);
if (c == null) {
c = new Count();
counts.put(position, c);
}
return c;
}
/**
* @param reference
* @param read
* @param idx
* @param bisulfiteContext
* @return Returns the context that matched (in the case of the base class, this is always the same
* as the context passed in, derived classes might return a different context).
* If we don't match, return null.
*/
protected AlignmentTrack.BisulfiteContext contextIsMatching(byte[] reference, byte[] read, int idx,
AlignmentTrack.BisulfiteContext bisulfiteContext) {
// TODO -- NOMESEQ
// for (AlignmentTrack.BisulfiteContext context : NOMESEQ_CONTEXTS) {
// if (super.contextIsMatching(reference, read, idx, context) != null) return context;
// }
// return null;
// Get the context and see if it matches our desired context.
byte[] preContext = AlignmentTrack.getBisulfiteContextPreContext(bisulfiteContext);
byte[] postContext = AlignmentTrack.getBisulfiteContextPostContext(bisulfiteContext);
boolean matchesContext = true;
// First do the "post" context
int minLen = Math.min(reference.length, read.length);
if ((idx + postContext.length) >= minLen) {
matchesContext = false;
} else {
// Cut short whenever we don't match
for (int posti = 0; matchesContext && (posti < postContext.length); posti++) {
byte contextb = postContext[posti];
int offsetidx = idx + 1 + posti;
matchesContext &= positionMatchesContext(contextb, reference[offsetidx], read[offsetidx]);
}
}
// Now do the pre context
if ((idx - preContext.length) < 0) {
matchesContext = false;
} else {
// Cut short whenever we don't match
for (int prei = 0; matchesContext && (prei < preContext.length); prei++) {
byte contextb = preContext[prei];
int offsetidx = idx - (preContext.length - prei);
matchesContext &= positionMatchesContext(contextb, reference[offsetidx], read[offsetidx]);
}
}
return (matchesContext) ? bisulfiteContext : null;
}
/**
* @param contextb The residue in the context string (IUPAC)
* @param referenceBase The reference sequence (already checked that offsetidx is within bounds)
* @param readBase The read sequence (already checked that offsetidx is within bounds)
* @return
*/
protected boolean positionMatchesContext(byte contextb, final byte referenceBase, final byte readBase) {
boolean matchesContext = AlignmentUtils.compareBases(contextb, referenceBase);
if (!matchesContext) {
return false; // Don't need to check any further
}
// For the read, we have to handle C separately
boolean matchesReadContext = AlignmentUtils.compareBases(contextb, readBase);
if (AlignmentUtils.compareBases((byte) 'T', readBase)) {
matchesReadContext |= AlignmentUtils.compareBases(contextb, (byte) 'C');
}
return matchesReadContext;
}
/**
* caller must be careful to shift relative to the strand the cytosine is on).
*/
protected int getBisulfiteSymmetricCytosineExtension(AlignmentTrack.BisulfiteContext item) {
int out;
switch (item) {
case CG:
case HCG:
case WCG:
out = 1;
break;
case CHG:
out = 2;
break;
case GCH: // Added by JTR, confirm?
out = -1;
break;
default:
out = 0;
break;
}
return out;
}
public static class Count {
int methylatedCount;
int unmethylatedCount;
public void increment(boolean methylated) {
if (methylated) methylatedCount++;
else unmethylatedCount++;
}
}
}