/*******************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package htsjdk.samtools.cram.encoding.reader;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
public class BAMRecordView {
public static final int BLOCK_SIZE = 0;
public static final int REF_ID = 4;
public static final int POS = 8;
public static final int READ_NAME_LEN = 12;
public static final int MAP_Q = 13;
public static final int INDEX_BIN = 14;
public static final int CIGAR_LEN = 16;
public static final int FLAGS = 18;
public static final int READ_LEN = 20;
public static final int MATE_REF_ID = 24;
public static final int MATE_AL_START = 28;
public static final int INS_SIZE = 32;
public static final int READ_NAME = 36;
private static final long MAX_UINT = ((long) Integer.MAX_VALUE * 2) + 1;
private int CIGAR = -1;
private int BASES = -1;
private int SCORES = -1;
private int TAGS = -1;
public int END = -1;
byte[] buf;
/**
* Pointer to the beginning of the currently written record.
*/
int start;
public BAMRecordView(byte[] buf) {
this.buf = buf;
}
public void setData(byte[] buf) {
this.buf = buf;
reset();
}
public void reset() {
start = 0;
CIGAR = -1;
BASES = -1;
SCORES = -1;
TAGS = -1;
END = -1;
}
public void setRefID(int id) {
writeInt(id, REF_ID);
}
public int getRefID() {
return getInt(REF_ID);
}
public void setAlignmentStart(int alignmenStart) {
writeInt(alignmenStart - 1, POS);
}
public void setReadName(String readName) {
writeUByte((short) (readName.length() + 1), READ_NAME_LEN);
CIGAR = READ_NAME + wrteZString(readName, READ_NAME);
}
public boolean isReadNameSet() {
return CIGAR > -1;
}
public void setReadName(byte[] readName) {
writeUByte((short) (readName.length + 1), READ_NAME_LEN);
System.arraycopy(readName, 0, buf, start + READ_NAME, readName.length);
buf[start + READ_NAME + readName.length] = 0;
CIGAR = READ_NAME + readName.length + 1;
}
public void setMappingScore(int score) {
writeUByte((short) score, MAP_Q);
}
public void setIndexBin(int bin) {
writeUShort(bin, INDEX_BIN);
}
public void setCigar(Cigar cigar) {
if (CIGAR < 0)
throw new RuntimeException("Premature setting of cigar.");
writeUShort(cigar.numCigarElements(), CIGAR_LEN);
final int[] binaryCigar = encodeBinaryCigar(cigar);
int at = CIGAR;
for (final int cigarElement : binaryCigar) {
// Assumption that this will fit into an integer, despite the fact
// that it is specced as a uint.
writeInt(cigarElement, at);
at += 4;
}
BASES = at;
}
public void setFlags(int flags) {
// writeInt(flags, FLAGS);
writeUShort(flags, FLAGS);
}
public void setReadLength(int readLength) {
writeInt(readLength, READ_LEN);
}
public int getReadLength() {
return getInt(READ_LEN);
}
public void setMateRefID(int mateRefID) {
writeInt(mateRefID, MATE_REF_ID);
}
public int getMateRefID() {
return getInt(MATE_REF_ID);
}
public void setMateAlStart(int mateAlStart) {
writeInt(mateAlStart - 1, MATE_AL_START);
}
public void setInsertSize(int insertSize) {
writeInt(insertSize, INS_SIZE);
}
public void setBases(byte[] bases, int offset, int length) {
if (BASES < 0)
throw new RuntimeException("Premature setting of bases.");
int i;
for (i = 1; i < length; i += 2)
buf[start + BASES + i / 2] = (byte) (charToCompressedBaseHigh(bases[offset + i - 1]) | charToCompressedBaseLow(bases[offset
+ i]));
// Last nybble
if (i == length)
buf[start + BASES + i / 2] = charToCompressedBaseHigh((char) bases[offset + i - 1]);
setReadLength(length);
SCORES = BASES + length / 2 + length % 2;
}
public void setBases(byte[] bases) {
setBases(bases, 0, bases.length);
}
public void setQualityScores(byte[] qualities, int offset, int length) {
if (SCORES < 0)
throw new RuntimeException("Premature setting of scores.");
if (length == 0) {
int len = getReadLength();
for (int i = 0; i < len; i++)
buf[start + SCORES + i] = (byte) 0xFF;
TAGS = SCORES + len;
} else {
System.arraycopy(qualities, offset, buf, start + SCORES, length);
TAGS = SCORES + length;
}
}
public void setQualityScores(byte[] qualities) {
if (SCORES < 0)
throw new RuntimeException("Premature setting of scores.");
if (qualities.length == 0) {
int len = getReadLength();
for (int i = 0; i < len; i++)
buf[start + SCORES + i] = (byte) 0xFF;
TAGS = SCORES + len;
} else {
System.arraycopy(qualities, 0, buf, start + SCORES, qualities.length);
TAGS = SCORES + qualities.length;
}
}
public void addTag(int id, byte[] data, int offset, int len) {
if (TAGS < 0)
throw new RuntimeException("Premature addition of tag.");
if (END < 0)
END = TAGS;
buf[start + END++] = (byte) (id & 0xFF);
buf[start + END++] = (byte) ((id >> 8) & 0xFF);
buf[start + END++] = (byte) ((id >> 16) & 0xFF);
System.arraycopy(data, offset, buf, start + END, len);
END += len;
}
public void setTagData(byte[] data, int offset, int length) {
if (TAGS < 0)
throw new RuntimeException("Premature addition of tag.");
if (END < 0)
END = TAGS;
try {
System.arraycopy(data, offset, buf, start + TAGS, length);
} catch (ArrayIndexOutOfBoundsException e) {
throw e;
}
END += length;
}
public int finish() {
int blockSize = END;
if (blockSize < 0)
blockSize = TAGS;
if (blockSize < 0)
throw new RuntimeException("Incomplete record.");
writeInt(blockSize - 4, BLOCK_SIZE);
position(start + END);
return blockSize;
}
public void position(int start) {
this.start = start;
CIGAR = -1;
BASES = -1;
SCORES = -1;
TAGS = -1;
END = -1;
}
public int position() {
return start;
}
private final void writeInt(final int value, final int at) {
buf[start + at] = (byte) (value & 0xFF);
buf[start + at + 1] = (byte) ((value >> 8) & 0xFF);
buf[start + at + 2] = (byte) ((value >> 16) & 0xFF);
buf[start + at + 3] = (byte) ((value >> 24) & 0xFF);
}
private final int getInt(final int at) {
int value = (0xFF & buf[start + at]) | ((0xFF & buf[start + at + 1]) << 8)
| ((0xFF & buf[start + at + 2]) << 16) | ((0xFF & buf[start + at + 3]) << 24);
return value;
}
private int writeUInt(Long value, int at) {
if (value < 0) {
throw new IllegalArgumentException("Negative value (" + value + ") passed to unsigned writing method.");
}
if (value > MAX_UINT) {
throw new IllegalArgumentException("Value (" + value + ") to large to be written as uint.");
}
buf[start + at] = (byte) (value & 0xFF);
buf[start + at + 1] = (byte) ((value >> 8) & 0xFF);
buf[start + at + 2] = (byte) ((value >> 16) & 0xFF);
buf[start + at + 3] = (byte) ((value >> 24) & 0xFF);
return 4;
}
private long getUInt(int at) {
long value = 0;
for (int i = start + at; i < at + 5; i++)
value |= 0xFF & buf[i];
return value;
}
private final int wrteZString(final String value, final int at) {
value.getBytes(0, value.length(), buf, start + at);
buf[start + at + value.length()] = 0;
return value.length() + 1;
}
private void writeUByte(short value, int at) {
buf[start + at] = (byte) (value & 0xFF);
}
private int getUByte(int at) {
return buf[start + at] & 0xFF;
}
private byte writeByte(int value, int at) {
buf[start + at] = (byte) value;
return 1;
}
private final void writeUShort(final int value, final int at) {
buf[start + at] = (byte) (value & 0xFF);
buf[start + at + 1] = (byte) ((value >> 8) & 0xFF);
}
private final int getUShort(int at) {
int value = buf[start + at + 1] & 0xFF;
value <<= 8;
value |= buf[start + at] & 0xFF;
return value;
}
private final int writeShort(final short value, final int at) {
buf[start + at] = (byte) (value & 0xFF);
buf[start + at + 1] = (byte) ((value >> 8) & 0xFF);
return 2;
}
/**
* Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to
* a byte array half as long, with =, A, C, G, T converted to 0, 1, 2, 4, 8,
* 15.
*
* @param readBases
* Bases as ASCII bytes.
* @return New byte array with bases represented as nybbles, in BAM binary
* format.
*/
private static byte[] bytesToCompressedBases(final byte[] readBases) {
final byte[] compressedBases = new byte[(readBases.length + 1) / 2];
int i;
for (i = 1; i < readBases.length; i += 2) {
compressedBases[i / 2] = (byte) (charToCompressedBaseHigh(readBases[i - 1]) | charToCompressedBaseLow(readBases[i]));
}
// Last nybble
if (i == readBases.length) {
compressedBases[i / 2] = charToCompressedBaseHigh((char) readBases[i - 1]);
}
return compressedBases;
}
/**
* Convert from ASCII byte to BAM nybble representation of a base in
* high-order nybble.
*
* @param base
* One of =AaCcGgTtNn.
* @return High-order nybble-encoded equivalent.
*/
private static byte charToCompressedBaseHigh(final int base) {
switch (base) {
case '=':
return COMPRESSED_EQUAL_HIGH;
case 'a':
case 'A':
return COMPRESSED_A_HIGH;
case 'c':
case 'C':
return COMPRESSED_C_HIGH;
case 'g':
case 'G':
return COMPRESSED_G_HIGH;
case 't':
case 'T':
return COMPRESSED_T_HIGH;
case 'n':
case 'N':
case '.':
return COMPRESSED_N_HIGH;
// IUPAC ambiguity codes
case 'M':
case 'm':
return COMPRESSED_M_HIGH;
case 'R':
case 'r':
return COMPRESSED_R_HIGH;
case 'S':
case 's':
return COMPRESSED_S_HIGH;
case 'V':
case 'v':
return COMPRESSED_V_HIGH;
case 'W':
case 'w':
return COMPRESSED_W_HIGH;
case 'Y':
case 'y':
return COMPRESSED_Y_HIGH;
case 'H':
case 'h':
return COMPRESSED_H_HIGH;
case 'K':
case 'k':
return COMPRESSED_K_HIGH;
case 'D':
case 'd':
return COMPRESSED_D_HIGH;
case 'B':
case 'b':
return COMPRESSED_B_HIGH;
default:
throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base);
}
}
/**
* Convert from BAM nybble representation of a base in low-order nybble to
* ASCII byte.
*
* @param base
* One of COMPRESSED_*_LOW, a low-order nybble encoded base.
* @return ASCII base, one of ACGTN=.
*/
private static byte compressedBaseToByteLow(final int base) {
switch (base & 0xf) {
case COMPRESSED_EQUAL_LOW:
return '=';
case COMPRESSED_A_LOW:
return 'A';
case COMPRESSED_C_LOW:
return 'C';
case COMPRESSED_G_LOW:
return 'G';
case COMPRESSED_T_LOW:
return 'T';
case COMPRESSED_N_LOW:
return 'N';
// IUPAC ambiguity codes
case COMPRESSED_M_LOW:
return 'M';
case COMPRESSED_R_LOW:
return 'R';
case COMPRESSED_S_LOW:
return 'S';
case COMPRESSED_V_LOW:
return 'V';
case COMPRESSED_W_LOW:
return 'W';
case COMPRESSED_Y_LOW:
return 'Y';
case COMPRESSED_H_LOW:
return 'H';
case COMPRESSED_K_LOW:
return 'K';
case COMPRESSED_D_LOW:
return 'D';
case COMPRESSED_B_LOW:
return 'B';
default:
throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base);
}
}
/**
* Convert from ASCII byte to BAM nybble representation of a base in
* low-order nybble.
*
* @param base
* One of =AaCcGgTtNn.
* @return Low-order nybble-encoded equivalent.
*/
private static byte charToCompressedBaseLow(final int base) {
switch (base) {
case '=':
return COMPRESSED_EQUAL_LOW;
case 'a':
case 'A':
return COMPRESSED_A_LOW;
case 'c':
case 'C':
return COMPRESSED_C_LOW;
case 'g':
case 'G':
return COMPRESSED_G_LOW;
case 't':
case 'T':
return COMPRESSED_T_LOW;
case 'n':
case 'N':
case '.':
return COMPRESSED_N_LOW;
// IUPAC ambiguity codes
case 'M':
case 'm':
return COMPRESSED_M_LOW;
case 'R':
case 'r':
return COMPRESSED_R_LOW;
case 'S':
case 's':
return COMPRESSED_S_LOW;
case 'V':
case 'v':
return COMPRESSED_V_LOW;
case 'W':
case 'w':
return COMPRESSED_W_LOW;
case 'Y':
case 'y':
return COMPRESSED_Y_LOW;
case 'H':
case 'h':
return COMPRESSED_H_LOW;
case 'K':
case 'k':
return COMPRESSED_K_LOW;
case 'D':
case 'd':
return COMPRESSED_D_LOW;
case 'B':
case 'b':
return COMPRESSED_B_LOW;
default:
throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base);
}
}
private static final byte COMPRESSED_EQUAL_LOW = 0;
private static final byte COMPRESSED_A_LOW = 1;
private static final byte COMPRESSED_C_LOW = 2;
private static final byte COMPRESSED_M_LOW = 3;
private static final byte COMPRESSED_G_LOW = 4;
private static final byte COMPRESSED_R_LOW = 5;
private static final byte COMPRESSED_S_LOW = 6;
private static final byte COMPRESSED_V_LOW = 7;
private static final byte COMPRESSED_T_LOW = 8;
private static final byte COMPRESSED_W_LOW = 9;
private static final byte COMPRESSED_Y_LOW = 10;
private static final byte COMPRESSED_H_LOW = 11;
private static final byte COMPRESSED_K_LOW = 12;
private static final byte COMPRESSED_D_LOW = 13;
private static final byte COMPRESSED_B_LOW = 14;
private static final byte COMPRESSED_N_LOW = 15;
private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4;
private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4;
private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4;
private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4;
private static final byte COMPRESSED_T_HIGH = (byte) (COMPRESSED_T_LOW << 4);
private static final byte COMPRESSED_N_HIGH = (byte) (COMPRESSED_N_LOW << 4);
private static final byte COMPRESSED_M_HIGH = (byte) (COMPRESSED_M_LOW << 4);
private static final byte COMPRESSED_R_HIGH = (byte) (COMPRESSED_R_LOW << 4);
private static final byte COMPRESSED_S_HIGH = (byte) (COMPRESSED_S_LOW << 4);
private static final byte COMPRESSED_V_HIGH = (byte) (COMPRESSED_V_LOW << 4);
private static final byte COMPRESSED_W_HIGH = (byte) (COMPRESSED_W_LOW << 4);
private static final byte COMPRESSED_Y_HIGH = (byte) (COMPRESSED_Y_LOW << 4);
private static final byte COMPRESSED_H_HIGH = (byte) (COMPRESSED_H_LOW << 4);
private static final byte COMPRESSED_K_HIGH = (byte) (COMPRESSED_K_LOW << 4);
private static final byte COMPRESSED_D_HIGH = (byte) (COMPRESSED_D_LOW << 4);
private static final byte COMPRESSED_B_HIGH = (byte) (COMPRESSED_B_LOW << 4);
public int getSequenceId() {
return getInt(REF_ID);
}
public int getAlignmentStart() {
return getInt(POS);
}
public int getFlags() {
return getUShort(FLAGS);
}
// Representation of CigarOperator in BAM file
private static final byte OP_M = 0;
private static final byte OP_I = 1;
private static final byte OP_D = 2;
private static final byte OP_N = 3;
private static final byte OP_S = 4;
private static final byte OP_H = 5;
private static final byte OP_P = 6;
private static final byte OP_EQ = 7;
private static final byte OP_X = 8;
public int calculateAlignmentEnd() {
int aend = getAlignmentStart() - 1;
int readNamelen = getUByte(READ_NAME_LEN);
int cigarLen = getUShort(CIGAR_LEN);
CIGAR = READ_NAME + readNamelen;
for (int i = CIGAR; i < CIGAR + 4 * cigarLen; i += 4) {
int e = getInt(i);
byte op = (byte) (e & 15);
int oplen = e >>> 4;
switch (op) {
case OP_M:
case OP_D:
case OP_N:
case OP_EQ:
case OP_X:
aend += oplen;
break;
case OP_I:
case OP_S:
case OP_H:
case OP_P:
break;
default:
break;
}
}
return aend;
}
/**
* From htsjdk
*
* @param cigar
* @return
*/
static int[] encodeBinaryCigar(final Cigar cigar) {
if (cigar.numCigarElements() == 0) {
return new int[0];
}
final int[] binaryCigar = new int[cigar.numCigarElements()];
int binaryCigarLength = 0;
for (int i = 0; i < cigar.numCigarElements(); ++i) {
final CigarElement cigarElement = cigar.getCigarElement(i);
final int op = CigarOperator.enumToBinary(cigarElement.getOperator());
binaryCigar[binaryCigarLength++] = cigarElement.getLength() << 4 | op;
}
return binaryCigar;
}
}