/*******************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package htsjdk.samtools.cram.encoding.reader;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.TextCigarCodec;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.ReadTag;
import htsjdk.samtools.cram.structure.SubstitutionMatrix;
import java.io.IOException;
public class ReaderToBAM extends AbstractReader {
protected static int detachedCount = 0;
protected int recordCounter = 0;
protected CramCompressionRecord prevRecord;
public int refId;
public SubstitutionMatrix substitutionMatrix;
public boolean AP_delta = true;
public byte[] buf = new byte[1024 * 1024 * 100];
public int[] index = new int[4 * 100000];
public int[] distances = new int[4 * 100000];
private int[] names = new int[4 * 100000];
private int[] next = new int[distances.length];
private int[] prev = new int[distances.length];
private BAMRecordView view = new BAMRecordView(buf);
private int flags;
private int compressionFlags;
private int readLength;
public int prevAlStart = 1;
private int readGroupID;
private byte mateFlags;
private int tagDataLen;
private byte[] tagData = new byte[1024 * 1024];
public byte[] ref;
public static final int maxReadBufferLength = 1024 * 1024;
public byte[] bases = new byte[maxReadBufferLength];
public byte[] scores = new byte[maxReadBufferLength];
private ReadFeatureBuffer rfBuf = new ReadFeatureBuffer();
private byte[][] readGroups = null;
public void read() throws IOException {
index[recordCounter] = view.position();
try {
flags = bitFlagsCodec.readData();
compressionFlags = compressionBitFlagsCodec.readData();
if (refId == -2)
view.setRefID(refIdCodec.readData());
else
view.setRefID(refId);
readLength = readLengthCodec.readData();
if (AP_delta)
prevAlStart += alignmentStartCodec.readData();
else
prevAlStart = alignmentStartCodec.readData();
view.setAlignmentStart(prevAlStart);
readGroupID = readGroupCodec.readData();
if (captureReadNames)
view.setReadName(readNameCodec.readData());
// mate record:
if ((compressionFlags & CramFlags.DETACHED_FLAG) != 0) {
mateFlags = mateBitFlagCodec.readData();
if (!captureReadNames)
view.setReadName(readNameCodec.readData());
view.setMateRefID(mateReferenceIdCodec.readData());
view.setMateAlStart(mateAlignmentStartCodec.readData());
view.setInsertSize(insertSizeCodec.readData());
detachedCount++;
distances[recordCounter] = 0;
next[recordCounter] = -1;
prev[recordCounter] = -1;
flags |= (mateFlags & CramFlags.MATE_NEG_STRAND_FLAG) != 0 ? BamFlags.MATE_STRAND_FLAG : 0;
flags |= (mateFlags & CramFlags.MATE_UNMAPPED_FLAG) != 0 ? BamFlags.MATE_UNMAPPED_FLAG : 0;
} else if ((compressionFlags & CramFlags.HAS_MATE_DOWNSTREAM_FLAG) != 0) {
mateFlags = 0;
distances[recordCounter] = distanceToNextFragmentCodec.readData();
next[recordCounter] = recordCounter + distances[recordCounter] + 1;
prev[next[recordCounter]] = recordCounter;
names[recordCounter + distances[recordCounter]] = recordCounter;
}
view.setFlags(flags);
if (!view.isReadNameSet()) {
if (names[recordCounter] == 0)
view.setReadName(String.valueOf(recordCounter).getBytes());
else
view.setReadName(String.valueOf(names[recordCounter]).getBytes());
}
tagDataLen = 0;
if (readGroupID >= 0) {
tagData[tagDataLen++] = (byte) 'R';
tagData[tagDataLen++] = (byte) 'G';
tagData[tagDataLen++] = (byte) 'Z';
System.arraycopy(readGroups[readGroupID], 0, tagData, tagDataLen, readGroups[readGroupID].length);
tagDataLen += readGroups[readGroupID].length;
tagData[tagDataLen++] = (byte) 0;
}
Integer tagIdList = tagIdListCodec.readData();
byte[][] ids = tagIdDictionary[tagIdList];
if (ids.length > 0) {
for (int i = 0; i < ids.length; i++) {
int id = ReadTag.name3BytesToInt(ids[i]);
DataReader<byte[]> dataReader = tagValueCodecs.get(id);
byte[] data = dataReader.readData();
tagData[tagDataLen++] = (byte) ((id >> 16) & 0xFF);
tagData[tagDataLen++] = (byte) ((id >> 8) & 0xFF);
tagData[tagDataLen++] = (byte) (id & 0xFF);
System.arraycopy(data, 0, tagData, tagDataLen, data.length);
tagDataLen += data.length;
}
}
if ((flags & CramFlags.SEGMENT_UNMAPPED_FLAG) == 0) {
rfBuf.readReadFeatures(this);
rfBuf.restoreReadBases(readLength, prevAlStart, ref, substitutionMatrix, bases);
// mapping quality:
view.setMappingScore(mappingScoreCodec.readData());
if ((compressionFlags & CramFlags.FORCE_PRESERVE_QS_FLAG) != 0)
for (int i=0; i<readLength; i++)
scores[i]=qualityScoreCodec.readData();
} else {
view.setMappingScore(SAMRecord.NO_MAPPING_QUALITY);
for (int i = 0; i < readLength; i++)
bases[i] = baseCodec.readData();
if ((compressionFlags & CramFlags.FORCE_PRESERVE_QS_FLAG) != 0)
for (int i=0; i<readLength; i++)
scores[i]=qualityScoreCodec.readData();
}
if ((flags & CramFlags.SEGMENT_UNMAPPED_FLAG) != 0) {
Cigar noCigar = TextCigarCodec.decode(SAMRecord.NO_ALIGNMENT_CIGAR);
view.setCigar(noCigar);
} else
view.setCigar(rfBuf.getCigar(readLength));
view.setBases(bases, 0, readLength);
view.setQualityScores(scores, 0, readLength);
view.setTagData(tagData, 0, tagDataLen);
recordCounter++;
} catch (Exception e) {
System.err.printf("Failed at record %d, read len=%d\n.", recordCounter, readLength);
throw new RuntimeException(e);
}
}
private int getFlagsForRecord(int record) {
view.start = index[record];
return view.getFlags();
}
private void setFlagsForRecord(int record, int flags) {
view.start = index[record];
view.setFlags(flags);
}
private int getAlignmentStartForRecord(int record) {
view.start = index[record];
return view.getAlignmentStart();
}
/**
* @param record
* @return inclusive 0-based coordinate of the last base alignment
*/
private int getAlignmentEndForRecord(int record) {
view.start = index[record];
return view.calculateAlignmentEnd();
}
private int getRefIDForRecord(int record) {
view.start = index[record];
return view.getRefID();
}
private void setInsertSizeForRecord(int record, int insertSize) {
view.start = index[record];
view.setInsertSize(insertSize);
}
private void setMateRefIDForRecord(int record, int refId) {
view.start = index[record];
view.setMateRefID(refId);
}
private void setMateAlignmentStartForRecord(int record, int astart) {
view.start = index[record];
view.setMateAlStart(astart);
}
private boolean recordHasMoreMates(int record) {
return next[record] > 0;
}
private int nextRecord(int record) {
if (next[record] > 0)
return next[record];
return -1;
}
private boolean isFirstInPairBitOnForRecord(int record) {
return (getFlagsForRecord(record) & CramFlags.FIRST_SEGMENT_FLAG) != 0;
}
public void fixMateInfo() {
int fixes = 0;
for (int record = 0; record < recordCounter; record++) {
if (prev[record] >= 0)
// skip non-first reads
continue;
if (recordHasMoreMates(record)) {
int id1 = record;
int id2 = record;
int refid = getRefIDForRecord(record);
do {
id1 = id2;
id2 = nextRecord(id1);
int flags1 = getFlagsForRecord(id1);
int flags2 = getFlagsForRecord(id2);
if (refid != getRefIDForRecord(id2))
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
if ((flags2 & BamFlags.READ_UNMAPPED_FLAG) != 0) {
flags1 |= BamFlags.MATE_UNMAPPED_FLAG;
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
}
if ((flags1 & BamFlags.READ_UNMAPPED_FLAG) != 0) {
flags2 |= BamFlags.MATE_UNMAPPED_FLAG;
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
}
if ((flags2 & BamFlags.READ_STRAND_FLAG) != 0)
flags1 |= BamFlags.MATE_STRAND_FLAG;
if ((flags1 & BamFlags.READ_STRAND_FLAG) != 0)
flags2 |= BamFlags.MATE_STRAND_FLAG;
setFlagsForRecord(id1, flags1);
setFlagsForRecord(id2, flags2);
setMateRefIDForRecord(id1, getRefIDForRecord(id2));
setMateAlignmentStartForRecord(id1, 1 + getAlignmentStartForRecord(id2));
fixes++;
} while (recordHasMoreMates(id2));
id1 = record;
int flags1 = getFlagsForRecord(id1);
int flags2 = getFlagsForRecord(id2);
if (refid != getRefIDForRecord(id1))
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
if ((flags2 & BamFlags.READ_UNMAPPED_FLAG) != 0) {
flags1 |= BamFlags.MATE_UNMAPPED_FLAG;
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
}
if ((flags1 & BamFlags.READ_UNMAPPED_FLAG) != 0) {
flags2 |= BamFlags.MATE_UNMAPPED_FLAG;
refid = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
}
if ((flags2 & BamFlags.READ_STRAND_FLAG) != 0)
flags1 |= BamFlags.MATE_STRAND_FLAG;
if ((flags1 & BamFlags.READ_STRAND_FLAG) != 0)
flags2 |= BamFlags.MATE_STRAND_FLAG;
setFlagsForRecord(id1, flags1);
setFlagsForRecord(id2, flags2);
setMateRefIDForRecord(id2, getRefIDForRecord(record));
setMateAlignmentStartForRecord(id2, 1 + getAlignmentStartForRecord(record));
fixes++;
if (refid == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
id1 = record;
id2 = record;
do {
id1 = id2;
id2 = nextRecord(id1);
setInsertSizeForRecord(id1, 0);
setInsertSizeForRecord(id2, 0);
fixes++;
} while (recordHasMoreMates(id2));
} else
calculateTemplateSize(record);
}
}
}
private int calculateTemplateSize(int record) {
int aleft = getAlignmentStartForRecord(record);
int aright = getAlignmentEndForRecord(record);
int id1 = record, id2 = record;
int tlen = 0;
int ref = getRefIDForRecord(id1);
// System.out.println("Calculating template size for record " + record);
while (recordHasMoreMates(id2)) {
id2 = nextRecord(id2);
// System.out.println("\tnext=" + id2);
if (aleft > getAlignmentStartForRecord(id2))
aleft = getAlignmentStartForRecord(id2);
if (aright < getAlignmentEndForRecord(id2))
aright = getAlignmentEndForRecord(id2);
if (ref != getRefIDForRecord(id2))
ref = -1;
}
if (ref == -1) {
tlen = 0;
id1 = id2 = record;
setInsertSizeForRecord(id1, tlen);
while (recordHasMoreMates(id1)) {
id1 = nextRecord(id1);
setInsertSizeForRecord(id1, tlen);
}
} else {
tlen = aright - aleft + 1;
id1 = id2 = record;
if (getAlignmentStartForRecord(id2) == aleft) {
if (getAlignmentEndForRecord(id2) != aright)
setInsertSizeForRecord(id2, tlen);
else if (isFirstInPairBitOnForRecord(id2))
setInsertSizeForRecord(id2, tlen);
else
setInsertSizeForRecord(id2, -tlen);
} else
setInsertSizeForRecord(id2, -tlen);
while (recordHasMoreMates(id2)) {
id2 = nextRecord(id2);
if (getAlignmentStartForRecord(id2) == aleft) {
if (getAlignmentEndForRecord(id2) != aright)
setInsertSizeForRecord(id2, tlen);
else if (isFirstInPairBitOnForRecord(id2))
setInsertSizeForRecord(id2, tlen);
else
setInsertSizeForRecord(id2, -tlen);
} else
setInsertSizeForRecord(id2, -tlen);
}
}
return tlen;
}
}