/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.sam;
import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.feature.Strand;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.genome.GenomeManager;
import org.broad.igv.prefs.Constants;
import org.broad.igv.prefs.PreferencesManager;
import org.broad.igv.track.WindowFunction;
import java.awt.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author jrobinso
*/
public abstract class SAMAlignment implements Alignment {
private static Logger log = Logger.getLogger(SAMAlignment.class);
public static final char DELETE_CHAR = '-';
public static final char SKIP_CHAR = '=';
public static final char MATCH = 'M';
public static final char PERFECT_MATCH = '=';
public static final char MISMATCH = 'X';
public static final char INSERTION = 'I';
public static final char DELETION = 'D';
public static final char SKIPPED_REGION = 'N';
public static final char SOFT_CLIP = 'S';
public static final char HARD_CLIP = 'H';
public static final char PADDING = 'P';
public static final char ZERO_GAP = 'O';
public static final char UNKNOWN = 0;
public static final String REDUCE_READS_TAG = "RR";
/**
* Converts a DNA integer value to its reverse compliment integer value.
*/
protected static final char[] NT2COMP = {
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'
};
private static final String FLOW_SIGNAL_TAG = "ZF";
protected int alignmentStart;
protected int alignmentEnd;
String chr;
protected int start; // <= Might differ from alignment start if soft clipping is considered
protected int end; // ditto
protected Color color = null;
ReadMate mate;
public AlignmentBlockImpl[] alignmentBlocks;
public AlignmentBlockImpl[] insertions;
List<Gap> gaps;
char[] gapTypes;
protected String mateSequence = null;
protected String pairOrientation = "";
private Strand firstOfPairStrand;
private Strand secondOfPairStrand;
public SAMAlignment() {
}
public String getChr() {
return chr;
}
@Override
public String getContig() {
return chr;
}
public String getDescription() {
return getReadName();
}
public ReadMate getMate() {
return mate;
}
public Color getColor() {
return color;
}
abstract public String getReadName();
abstract public int getMappingQuality();
abstract public int getInferredInsertSize();
abstract public String getCigarString();
abstract public int getReadLength();
abstract public String getReadSequence();
public AlignmentBlock[] getAlignmentBlocks() {
return alignmentBlocks;
}
public AlignmentBlockImpl[] getInsertions() {
return insertions;
}
public abstract boolean isNegativeStrand();
public boolean contains(double location) {
return location >= getStart() && location < getEnd();
}
public byte getBase(double position) {
int basePosition = (int) position;
for (AlignmentBlock block : this.alignmentBlocks) {
if (block.contains(basePosition)) {
int offset = basePosition - block.getStart();
byte base = block.getBase(offset);
return base;
}
}
return 0;
}
public byte getPhred(double position) {
int basePosition = (int) position;
for (AlignmentBlock block : this.alignmentBlocks) {
if (block.contains(basePosition)) {
int offset = basePosition - block.getStart();
byte qual = block.getQuality(offset);
return qual;
}
}
return 0;
}
/**
* Set pair strands. Used for strand specific libraries to recover strand of
* originating fragment.
*/
protected void setPairStrands() {
if (isPaired()) {
if (isFirstOfPair()) {
firstOfPairStrand = getReadStrand();
} else {
// If we have a mate, the mate must be the firstOfPair
ReadMate mate = getMate();
if (mate != null && mate.isMapped()) {
firstOfPairStrand = mate.getStrand();
} else {
// No Mate, or mate is not mapped, FOP strand is not defined
firstOfPairStrand = Strand.NONE;
}
}
if (isSecondOfPair()) {
secondOfPairStrand = isNegativeStrand() ? Strand.NEGATIVE : Strand.POSITIVE;
} else {
ReadMate mate = getMate();
if (mate != null && mate.isMapped() && isProperPair()) {
secondOfPairStrand = mate.isNegativeStrand() ? Strand.NEGATIVE : Strand.POSITIVE;
} else {
// No Mate, or mate is not mapped, FOP strand is not defined
secondOfPairStrand = Strand.NONE;
}
}
} else {
// This alignment is not paired -- by definition "firstOfPair" is this alignment
firstOfPairStrand = getReadStrand();
secondOfPairStrand = Strand.NONE;
}
}
private static boolean operatorIsMatch(boolean showSoftClipped, char operator) {
return operator == MATCH || operator == PERFECT_MATCH || operator == MISMATCH
|| (showSoftClipped && operator == SOFT_CLIP);
}
/**
* Create the alignment blocks from the read bases and alignment information in the CIGAR
* string. The CIGAR string encodes insertions, deletions, skipped regions, and padding.
*
* @param cigarString
* @param readBases
* @param readBaseQualities
*/
protected void createAlignmentBlocks(String cigarString, byte[] readBases, byte[] readBaseQualities) {
if (cigarString.equals("*")) {
alignmentBlocks = new AlignmentBlockImpl[1];
alignmentBlocks[0] = new AlignmentBlockImpl(getStart(), readBases, readBaseQualities);
return;
}
// Create list of cigar operators
java.util.List<CigarOperator> operators = buildOperators(cigarString);
boolean showSoftClipped = PreferencesManager.getPreferences().getAsBoolean(Constants.SAM_SHOW_SOFT_CLIPPED);
int nInsertions = 0;
int nBlocks = 0;
boolean firstOperator = true;
int softClippedBaseCount = 0;
int nGaps = 0;
int nRealGaps = 0;
char prevOp = 0;
for (CigarOperator operator : operators) {
char op = operator.operator;
if (op == HARD_CLIP) {
continue; // Just skip hardclips
}
int nBases = operator.nBases;
if (operatorIsMatch(showSoftClipped, op)) {
nBlocks++;
if (operatorIsMatch(showSoftClipped, prevOp)) {
nGaps++;
}
} else if (op == DELETION || op == SKIPPED_REGION) {
nGaps++;
nRealGaps++;
} else if (op == INSERTION) {
nInsertions++;
nGaps++; // "virtual" gap, account for artificial block split @ insertion
}
if (firstOperator && op == SOFT_CLIP) {
softClippedBaseCount += nBases;
}
if (op != SOFT_CLIP) {
firstOperator = false;
}
prevOp = op;
}
alignmentBlocks = new AlignmentBlockImpl[nBlocks];
insertions = new AlignmentBlockImpl[nInsertions];
if (nGaps > 0) {
gapTypes = new char[nGaps];
}
if (nRealGaps > 0) {
gaps = new ArrayList<Gap>();
}
// Adjust start to include soft clipped bases a
if (showSoftClipped) {
start -= softClippedBaseCount;
}
int fromIdx = showSoftClipped ? 0 : softClippedBaseCount;
int blockStart = start;
// Create blocks
int blockIdx = 0;
int insertionIdx = 0;
int gapIdx = 0;
int padding = 0;
prevOp = 0;
for (int i = 0; i < operators.size(); i++) {
CigarOperator op = operators.get(i);
try {
if (op.operator == HARD_CLIP) {
continue;
}
if (operatorIsMatch(showSoftClipped, op.operator)) {
AlignmentBlockImpl block = buildAlignmentBlock(readBases, readBaseQualities,
blockStart, fromIdx, op.nBases, true);
if (op.operator == SOFT_CLIP) {
block.setSoftClipped(true);
}
alignmentBlocks[blockIdx++] = block;
fromIdx += op.nBases;
blockStart += op.nBases;
if (operatorIsMatch(showSoftClipped, prevOp)) {
gapTypes[gapIdx++] = ZERO_GAP;
}
} else if (op.operator == DELETION) {
gaps.add(new Gap(blockStart, op.nBases, op.operator));
blockStart += op.nBases;
gapTypes[gapIdx++] = op.operator;
} else if (op.operator == SKIPPED_REGION) {
// Need the "flanking" regions, i.e. size of blocks either side of splice
int flankingLeft = 0;
int flankingRight = 0;
if (i > 0) {
flankingLeft = operators.get(i - 1).nBases;
}
if (i < operators.size() - 1) {
flankingRight = operators.get(i + 1).nBases;
}
gaps.add(new SpliceGap(blockStart, op.nBases, op.operator, flankingLeft, flankingRight));
blockStart += op.nBases;
gapTypes[gapIdx++] = op.operator;
} else if (op.operator == INSERTION) {
// This gap is between blocks split by insertion. It is a zero
// length gap but must be accounted for.
gapTypes[gapIdx++] = ZERO_GAP;
AlignmentBlockImpl block = buildAlignmentBlock(readBases, readBaseQualities,
blockStart, fromIdx, op.nBases, false);
block.setPadding(padding);
insertions[insertionIdx++] = block;
fromIdx += op.nBases;
padding = 0;
} else if (op.operator == PADDING) {
// Padding for insertion start, which should be the next operator
padding += op.nBases;
}
} catch (Exception e) {
log.error("Error processing CIGAR string", e);
}
prevOp = op.operator;
}
// Check for soft clipping at end
if (showSoftClipped && operators.size() > 0) {
CigarOperator last = operators.get(operators.size() - 1);
if (last.operator == SOFT_CLIP) {
end += last.nBases;
}
}
}
/**
* Build a list of cigar operators from a cigarString. Removes padding operators and concatenates consecutive
* operators of the same type
*
* @param cigarString
* @return
*/
public static List<CigarOperator> buildOperators(String cigarString) {
java.util.List<CigarOperator> operators = new ArrayList();
StringBuilder buffer = new StringBuilder(4);
// Create list of cigar operators
CigarOperator prevOp = null;
for (int i = 0; i < cigarString.length(); i++) {
char next = cigarString.charAt(i);
if (Character.isDigit(next)) {
buffer.append(next);
} else {
char op = next;
int nBases = Integer.parseInt(buffer.toString());
buffer.setLength(0);
if (prevOp != null && prevOp.operator == op) {
prevOp.nBases += nBases;
} else {
prevOp = new CigarOperator(nBases, op);
operators.add(prevOp);
}
}
}
return operators;
}
private static AlignmentBlockImpl buildAlignmentBlock(byte[] readBases, byte[] readBaseQualities, int blockStart,
int fromIdx, int nBases, boolean checkNBasesAvailable) {
byte[] blockBases = new byte[nBases];
byte[] blockQualities = new byte[nBases];
// TODO -- represent missing sequence ("*") explicitly for efficiency.
int nBasesAvailable = nBases;
if (checkNBasesAvailable) {
nBasesAvailable = readBases.length - fromIdx;
}
if (readBases == null || readBases.length == 0) {
Arrays.fill(blockBases, (byte) '=');
} else if (nBasesAvailable < nBases) {
Arrays.fill(blockBases, (byte) '?');
} else {
System.arraycopy(readBases, fromIdx, blockBases, 0, nBases);
}
nBasesAvailable = nBases;
if (checkNBasesAvailable) {
nBasesAvailable = readBaseQualities.length - fromIdx;
}
if (readBaseQualities == null || readBaseQualities.length == 0 || nBasesAvailable < nBases) {
Arrays.fill(blockQualities, (byte) 126);
} else {
System.arraycopy(readBaseQualities, fromIdx, blockQualities, 0, nBases);
}
AlignmentBlockImpl block = new AlignmentBlockImpl(blockStart, blockBases, blockQualities);
return block;
}
public String getClipboardString(double location, int mouseX) {
return getValueStringImpl(location, mouseX, false);
}
public String getValueString(double position, int mouseX, WindowFunction windowFunction) {
return getValueStringImpl(position, mouseX, true);
}
private String getValueStringImpl(double position, int mouseX, boolean truncate) {
int basePosition = (int) position;
StringBuffer buf = new StringBuffer();
// First check insertions. Position is zero based, block coords 1 based
if (this.insertions != null) {
for (AlignmentBlock block : this.insertions) {
if (block.containsPixel(mouseX)) {
byte[] bases = block.getBases();
if (bases == null) {
buf.append("Insertion: " + block.getLength() + " bases");
} else {
if (bases.length < 50) {
buf.append("Insertion (" + bases.length + " bases): " + new String(bases));
} else {
int len = bases.length;
buf.append("Insertion (" + bases.length + " bases): " + new String(Arrays.copyOfRange(bases, 0, 25)) + "..." +
new String(Arrays.copyOfRange(bases, len - 25, len)));
}
}
return buf.toString();
}
}
}
// Not over an insertion
buf.append("Read name = " + getReadName() + "<br>");
String sample = getSample();
if (sample != null) {
buf.append("Sample = " + sample + "<br>");
}
String library = getLibrary();
if (library != null) {
buf.append("Library = " + library + "<br>");
}
String readGroup = getReadGroup();
if (readGroup != null) {
buf.append("Read group = " + readGroup + "<br>");
}
buf.append("Read length = " + Globals.DECIMAL_FORMAT.format(getReadLength()) + "bp<br>");
String cigarString = getCigarString();
// Abbreviate long CIGAR strings. Retain the start and end of the CIGAR, which show
// clipping; trim the middle.
int maxCigarStringLength = 60;
if (cigarString.length() > maxCigarStringLength) {
// Match only full <length><operator> pairs at the beginning and end of the string.
Matcher lMatcher = Pattern.compile("^(.{1," + Integer.toString(maxCigarStringLength / 2 - 1) + "}[A-Z])").matcher(cigarString);
Matcher rMatcher = Pattern.compile("[A-Z](.{1," + Integer.toString(maxCigarStringLength / 2) + "})$").matcher(cigarString);
cigarString = (lMatcher.find() ? lMatcher.group(1) : "") + "..." + (rMatcher.find() ? rMatcher.group(1) : "");
}
buf.append("----------------------" + "<br>");
buf.append("Mapping = " + (isPrimary() ? (isSupplementary() ? "Supplementary" : "Primary") : "Secondary") +
(isDuplicate() ? " Duplicate" : "") + (isVendorFailedRead() ? " Failed QC" : "") +
" @ MAPQ " + Globals.DECIMAL_FORMAT.format(getMappingQuality()) + "<br>");
buf.append("Reference span = " + getChr() + ":" + Globals.DECIMAL_FORMAT.format(getAlignmentStart() + 1) + "-" +
Globals.DECIMAL_FORMAT.format(getAlignmentEnd()) + " (" + (isNegativeStrand() ? "-" : "+") + ")" +
" = " + Globals.DECIMAL_FORMAT.format(getAlignmentEnd() - getAlignmentStart()) + "bp<br>");
buf.append("Cigar = " + cigarString + "<br>");
buf.append("Clipping = ");
// Identify the number of hard and soft clipped bases.
Matcher lclipMatcher = Pattern.compile("^(([0-9]+)H)?(([0-9]+)S)?").matcher(cigarString);
Matcher rclipMatcher = Pattern.compile("(([0-9]+)S)?(([0-9]+)H)?$").matcher(cigarString);
int lclipHard = 0, lclipSoft = 0, rclipHard = 0, rclipSoft = 0;
if (lclipMatcher.find()) {
lclipHard = lclipMatcher.group(2) == null ? 0 : Integer.parseInt(lclipMatcher.group(2), 10);
lclipSoft = lclipMatcher.group(4) == null ? 0 : Integer.parseInt(lclipMatcher.group(4), 10);
}
if (rclipMatcher.find()) {
rclipHard = rclipMatcher.group(4) == null ? 0 : Integer.parseInt(rclipMatcher.group(4), 10);
rclipSoft = rclipMatcher.group(2) == null ? 0 : Integer.parseInt(rclipMatcher.group(2), 10);
}
if (lclipHard + lclipSoft + rclipHard + rclipSoft == 0) {
buf.append("None");
} else {
if (lclipHard + lclipSoft > 0) {
buf.append("Left");
if (lclipHard > 0) {
buf.append(" " + Globals.DECIMAL_FORMAT.format(lclipHard) + " hard");
}
if (lclipSoft > 0) {
buf.append(" " + Globals.DECIMAL_FORMAT.format(lclipSoft) + " soft");
}
}
if (rclipHard + rclipSoft > 0) {
buf.append((lclipHard + lclipSoft > 0 ? "; " : "") + "Right");
if (rclipHard > 0) {
buf.append(" " + Globals.DECIMAL_FORMAT.format(rclipHard) + " hard");
}
if (rclipSoft > 0) {
buf.append(" " + Globals.DECIMAL_FORMAT.format(rclipSoft) + " soft");
}
}
}
buf.append("<br>");
Genome genome = GenomeManager.getInstance().getCurrentGenome();
if (this.isPaired()) {
buf.append("----------------------<br>");
buf.append("Mate is mapped = " + (getMate().isMapped() ? "yes" : "no") + "<br>");
if (getMate().isMapped()) {
buf.append("Mate start = " + getMate().positionString() + "<br>");
//buf.append("Pair is proper = " + (getProperPairFlag() ? "yes" : "no") + "<br>");
if (getChr().equals(getMate().getChr())) {
buf.append("Insert size = " + getInferredInsertSize() + "<br>");
}
}
if (isFirstOfPair()) {
buf.append("First in pair<br>");
}
if (isSecondOfPair()) {
buf.append("Second in pair<br>");
}
if (getPairOrientation().length() > 0) {
buf.append("Pair orientation = " + getPairOrientation() + "<br>");
}
}
Object suppAlignment = this.getAttribute("SA");
if (suppAlignment != null) {
buf.append("----------------------<br>");
buf.append(getSupplAlignmentString(suppAlignment.toString()));
buf.append("<br>");
}
String attributeString = getAttributeString(truncate);
if (attributeString != null && attributeString.length() > 0) {
buf.append("----------------------");
buf.append(getAttributeString(truncate));
}
if (mateSequence != null) {
buf.append("----------------------<br>");
buf.append("Mate sequence: " + mateSequence);
}
// Specific base
for (AlignmentBlock block : this.alignmentBlocks) {
if (block.contains(basePosition)) {
buf.append("<hr>");
int offset = basePosition - block.getStart();
byte base = block.getBase(offset);
if (base == 0 && this.getReadSequence().equals("=") && !block.isSoftClipped() && genome != null) {
base = genome.getReference(chr, basePosition);
}
byte quality = block.getQuality(offset);
buf.append("Location = " + getChr() + ":" + Globals.DECIMAL_FORMAT.format(1 + (long) position) + "<br>");
buf.append("Base = " + (char) base + " @ QV " + Globals.DECIMAL_FORMAT.format(quality) + "<br>");
break;
}
}
return buf.toString();
}
// chr21,26002386,-,11785S1115M,60,0;chr21,26001844,+,1115S111M1D41M1D394M11239S,60,4;
private String getSupplAlignmentString(String sa) {
StringBuffer buf = new StringBuffer();
buf.append("SupplementaryAlignments");
String[] records = Globals.semicolonPattern.split(sa);
for (String rec : records) {
SupplementaryAlignment a = new SupplementaryAlignment(rec);
buf.append("<br>" + a.printString());
}
return buf.toString();
}
protected abstract String getAttributeString(boolean truncate);
public abstract boolean isFirstOfPair();
public abstract boolean isSecondOfPair();
public abstract boolean isDuplicate();
public abstract boolean isMapped();
public abstract boolean isPaired();
public abstract boolean isProperPair();
public abstract boolean isSupplementary();
public abstract boolean isVendorFailedRead();
public abstract boolean isPrimary();
/**
* @return the unclippedStart
*/
abstract public int getAlignmentStart();
abstract public int getAlignmentEnd();
public float getScore() {
return getMappingQuality();
}
/**
* @param mate the mate to set
*/
public void setMate(ReadMate mate) {
this.mate = mate;
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public abstract Object getAttribute(String key);
public java.util.List<Gap> getGaps() {
return gaps;
}
@Override
public void finish() {
}
@Override
public AlignmentBlock getInsertionAt(int position) {
for (AlignmentBlock block : insertions) {
if (block.getStart() == position) return block;
if (block.getStart() > position) return null; // Blocks increase lineraly
}
return null;
}
public Strand getReadStrand() {
return isNegativeStrand() ? Strand.NEGATIVE : Strand.POSITIVE;
}
@Override
public String getPairOrientation() {
return pairOrientation;
}
@Override
public void setMateSequence(String sequence) {
this.mateSequence = sequence;
}
/**
* Return the strand of the read marked "first-in-pair" for a paired alignment. This method can return
* Strand.NONE if the end marked first is unmapped.
*
* @return strand of first-of-pair
*/
public Strand getFirstOfPairStrand() {
return firstOfPairStrand;
}
/**
* Return the strand of the read marked "second-in-pair" for a paired alignment. The strand is
* undefined (Strand.NONE) for non-paired alignments
*
* @return strand of second-of-pair
*/
public Strand getSecondOfPairStrand() {
return secondOfPairStrand;
}
/**
* @return start index in the flow signal as specified by the ZF tag, or -1 if not present
* or non-numeric
*/
public int getFlowSignalsStart() {
Object attribute = getAttribute(FLOW_SIGNAL_TAG); // NB: from a TMAP optional tag
int toRet = -1;
if (attribute != null && attribute instanceof Integer) {
toRet = (Integer) attribute;
}
return toRet;
}
protected void setPairOrientation() {
/*
if (record.getReadPairedFlag() &&
!record.getReadUnmappedFlag() &&
!record.getMateUnmappedFlag() &&
record.getReferenceName().equals(record.getMateReferenceName())) {
*/
if (isPaired() && isMapped() && mate != null && mate.isMapped() && getChr().equals(mate.getChr())) { // && name === mate.name
char s1 = isNegativeStrand() ? 'R' : 'F';
char s2 = mate.isNegativeStrand() ? 'R' : 'F';
char o1 = ' ';
char o2 = ' ';
if (isFirstOfPair()) {
o1 = '1';
o2 = '2';
} else if (isSecondOfPair()) {
o1 = '2';
o2 = '1';
}
final char[] tmp = new char[4];
int isize = getInferredInsertSize();
int estReadLen = getAlignmentEnd() - getAlignmentStart();
if (isize == 0) {
//isize not recorded. Need to estimate. This calculation was validated against an Illumina
// -> <- library bam.
int estMateEnd = getAlignmentStart() < mate.getStart() ?
getMate().getStart() + estReadLen : mate.getStart() - estReadLen;
isize = estMateEnd - getAlignmentStart();
}
//if (isize > estReadLen) {
if (isize > 0) {
tmp[0] = s1;
tmp[1] = o1;
tmp[2] = s2;
tmp[3] = o2;
} else {
tmp[2] = s1;
tmp[3] = o1;
tmp[0] = s2;
tmp[1] = o2;
}
// }
pairOrientation = new String(tmp);
}
}
public void setChr(String chr) {
this.chr = chr;
}
static class CigarOperator {
int nBases;
char operator;
/**
* Constructs ...
*
* @param nBases
* @param operator
*/
public CigarOperator(int nBases, char operator) {
this.nBases = nBases;
this.operator = operator;
}
}
public static class SupplementaryAlignment {
public String chr;
public int start;
public char strand;
public int mapQ;
public int numMismatches;
public int lenOnRef;
public SupplementaryAlignment(String rec) {
String[] tokens = Globals.commaPattern.split(rec);
chr = tokens[0];
start = Integer.parseInt(tokens[1]);
strand = tokens[2].charAt(0);
mapQ = Integer.parseInt(tokens[4]);
numMismatches = Integer.parseInt(tokens[5]);
lenOnRef = computeLengthOnReference(tokens[3]);
}
public String printString() {
// chr6:43,143,415-43,149,942 (-) @ MAPQ 60 NM 763
return chr + ":" + Globals.DECIMAL_FORMAT.format(start) + "-" + Globals.DECIMAL_FORMAT.format(start + lenOnRef)
+ " (" + strand + ") = " + Globals.DECIMAL_FORMAT.format(lenOnRef) + "bp @MAPQ " + mapQ + " NM" + numMismatches;
}
int computeLengthOnReference(String cigarString) {
int len = 0;
StringBuffer buf = new StringBuffer();
for (char c : cigarString.toCharArray()) {
if (c > 47 && c < 58) {
buf.append(c);
} else {
switch (c) {
case 'N':
case 'D':
case 'M':
case '=':
case 'X':
len += Integer.parseInt(buf.toString());
}
buf.setLength(0);
}
}
return len;
}
}
public String getSynopsisString() {
char st = isNegativeStrand() ? '-' : '+';
Object nm = getAttribute("NM");
String numMismatches = nm == null ? "?" : nm.toString();
int lenOnRef = getAlignmentEnd() - getAlignmentStart();
int[] clipping = getClipping(getCigarString());
String clippingString = "";
if (clipping[0] + clipping[1] + clipping[2] + clipping[3] > 0) {
if (clipping[0] > 0) clippingString += clipping[0] + "H";
if (clipping[1] > 0) clippingString += clipping[1] + "S";
clippingString += " ... ";
if (clipping[3] > 0) clippingString += clipping[3] + "S";
if (clipping[2] > 0) clippingString += clipping[2] + "H";
}
return chr + ":" + Globals.DECIMAL_FORMAT.format(getAlignmentStart()) + "-" +
Globals.DECIMAL_FORMAT.format(getAlignmentEnd())
+ " (" + st + ") = " + Globals.DECIMAL_FORMAT.format(lenOnRef) + "BP @MAPQ=" + getMappingQuality() +
" NM=" + numMismatches + " CLIPPING=" + clippingString;
}
public static int[] getClipping(String cigarString) {
// Identify the number of hard and soft clipped bases.
Matcher lclipMatcher = Pattern.compile("^(([0-9]+)H)?(([0-9]+)S)?").matcher(cigarString);
Matcher rclipMatcher = Pattern.compile("(([0-9]+)S)?(([0-9]+)H)?$").matcher(cigarString);
int lclipHard = 0, lclipSoft = 0, rclipHard = 0, rclipSoft = 0;
if (lclipMatcher.find()) {
lclipHard = lclipMatcher.group(2) == null ? 0 : Integer.parseInt(lclipMatcher.group(2), 10);
lclipSoft = lclipMatcher.group(4) == null ? 0 : Integer.parseInt(lclipMatcher.group(4), 10);
}
if (rclipMatcher.find()) {
rclipHard = rclipMatcher.group(4) == null ? 0 : Integer.parseInt(rclipMatcher.group(4), 10);
rclipSoft = rclipMatcher.group(2) == null ? 0 : Integer.parseInt(rclipMatcher.group(2), 10);
}
return new int[]{lclipHard, lclipSoft, rclipHard, rclipSoft};
}
}