package picard.illumina.parser.readers;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.RuntimeIOException;
import picard.illumina.parser.BclData;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
/**
* ------------------------------------- CBCL Header -----------------------------------
* Bytes 0 - 1 Version number, current version is 1 unsigned 16 bits little endian integer
* Bytes 2 - 5 Header size unsigned 32 bits little endian integer
* Byte 6 Number of bits per basecall unsigned
* Byte 7 Number of bits per q-score unsigned
* <p>
* q-val mapping info
* Bytes 0-3 Number of bins (B), zero indicates no mapping
* B pairs of 4 byte values (if B > 0) {from, to}, {from, to}, {from, to} …from: quality score bin to: quality score
* <p>
* Number of tile records unsigned 32bits little endian integer
* <p>
* gzip virtual file offsets, one record per tile
* Bytes 0-3: tile number
* Bytes 4-7 Number of clusters that were written into the current block (required due to bit-packed q-scores)
* unsigned 32 bit integer
* <p>
* Bytes 8-11 Uncompressed block size of the tile data (useful for sanity check whenexcluding non-PF clusters)
* unsigned 32 bit integer
* <p>
* Bytes 12-15 Compressed block size of the tile data unsigned 32 bit integer
* <p>
* non-PF clusters excluded flag 1: non-PF clusters are excluded 0: non-PF clusters are included
* <p>
* ------------------------------------- CBCL File Content -----------------------------------
* <p>
* N blocks of gzip files, where N is the number of tiles.
* <p>
* Each block consists of C number of basecall, quality score pairs where C is the number of clusters for the given tile.
* <p>
* Each basecall, quality score pair has the following format (assuming 2 bits are used for the basecalls):
* Bits 0-1: Basecalls (respectively [A, C, G, T] for [00, 01, 10, 11])
* Bits 2 and up: Quality score (unsigned Q bit little endian integer where Q is the number of bits per q-score).
* For a two bit quality score, this is two clusters per byte where the bottom 4 bits are the first cluster and the
* higher 4 bits are the second cluster.
**/
public class CbclReader extends BaseBclReader implements CloseableIterator<BclData> {
private static Log log = Log.getInstance(CbclReader.class);
private ByteBuffer[] cachedTile;
private int[] currentTile;
private List<BclData> queue = new ArrayList<>();
private final CycleData[] cycleData;
private Map<Integer, File> filters;
private Map<Integer, boolean[]> cachedFilter = new HashMap<>();
void addFilters(Map<Integer, File> filters) {
this.filters = filters;
}
private static final int INITIAL_HEADER_SIZE = 6;
public CbclReader(final List<File> cbcls, final int[] outputLengths) {
super(outputLengths);
cycleData = new CycleData[cycles];
cachedTile = new ByteBuffer[cycles];
currentTile = new int[cycles];
try {
final ByteBuffer byteBuffer = ByteBuffer.allocate(INITIAL_HEADER_SIZE);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < cycles; i++) {
currentTile[i] = 0;
final File bclFile = cbcls.get(i);
final InputStream stream = open(bclFile, false, false, false);
int read = stream.read(byteBuffer.array());
//we need to read the first 6 bytes to determine the header size
if (read != INITIAL_HEADER_SIZE) {
close();
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
short version = byteBuffer.getShort();
int headerSize = byteBuffer.getInt();
final ByteBuffer headerBuffer = ByteBuffer.allocate(headerSize - INITIAL_HEADER_SIZE);
headerBuffer.order(ByteOrder.LITTLE_ENDIAN);
read = stream.read(headerBuffer.array());
if (read != headerSize - INITIAL_HEADER_SIZE) {
close();
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
byte bitsPerBasecall = headerBuffer.get();
byte bitsPerQualityScore = headerBuffer.get();
if (bitsPerBasecall != 2 && bitsPerBasecall != bitsPerQualityScore) {
close();
throw new RuntimeIOException("CBCL data not encoded in nibbles. (not currently supported)");
}
int numberOfBins = headerBuffer.getInt();
byte[] qualityBins = new byte[numberOfBins];
//each bin has a pair of 4 byte mappings
for (int j = 0; j < numberOfBins; j++) {
headerBuffer.getInt(); // first int is "from" value, which we don't need
int to = headerBuffer.getInt();
qualityBins[j] = (byte) to;
}
int numTiles = headerBuffer.getInt();
TileData[] tileInfo = new TileData[numTiles];
for (int j = 0; j < numTiles; j++) {
int tileNum = headerBuffer.getInt();
int numClustersInTile = headerBuffer.getInt();
int uncompressedBlockSize = headerBuffer.getInt();
int compressedBlockSize = headerBuffer.getInt();
tileInfo[j] = new TileData(tileNum, numClustersInTile, uncompressedBlockSize, compressedBlockSize);
}
boolean pfExcluded = headerBuffer.get() == 1;
cycleData[i] = new CycleData(version, headerSize, bitsPerBasecall, bitsPerQualityScore, numberOfBins, qualityBins, numTiles, tileInfo, pfExcluded);
this.streams[i] = stream;
this.streamFiles[i] = bclFile;
byteBuffer.clear();
headerBuffer.clear();
}
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
@Override
public boolean hasNext() {
if (queue.isEmpty()) {
advance();
}
return !queue.isEmpty();
}
public BclData next() {
if (queue.isEmpty()) {
advance();
}
final BclData data = queue.get(0);
queue.remove(0);
return data;
}
@Override
public void close() {
for (final InputStream stream : this.streams) {
CloserUtil.close(stream);
}
}
private void advance() {
int totalCycleCount = 0;
BclData data = new BclData(outputLengths);
for (int read = 0; read < outputLengths.length; read++) {
for (int cycle = 0; cycle < outputLengths[read]; cycle++) {
try {
CycleData currentCycleData = cycleData[totalCycleCount];
TileData currentTileData = currentCycleData.tileInfo[currentTile[totalCycleCount]];
try {
if (cachedTile[totalCycleCount] == null) {
if (!cachedFilter.containsKey(currentTileData.tileNum)) {
cacheFilter(currentTileData);
}
cacheTile(totalCycleCount, currentTileData, currentCycleData);
}
} catch (IOException e) {
// when logging the error, increment cycle by 1, since totalCycleCount is zero-indexed but Illumina directories are 1-indexed.
throw new IOException(String.format("Error while reading from BCL file for cycle %d. Offending file on disk is %s",
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()), e);
}
if (!cachedTile[totalCycleCount].hasRemaining()) {
//on to the next tile
currentTile[totalCycleCount]++;
//if past the last tile then return
if (currentTile[totalCycleCount] > currentCycleData.tileInfo.length - 1) {
return;
}
currentTileData = currentCycleData.tileInfo[currentTile[totalCycleCount]];
if (!cachedFilter.containsKey(currentTileData.tileNum)) {
cacheFilter(currentTileData);
}
cacheTile(totalCycleCount, currentTileData, currentCycleData);
}
int singleByte = cachedTile[totalCycleCount].get();
decodeQualityBinnedBasecall(data, read, cycle, singleByte, currentCycleData);
totalCycleCount++;
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
}
this.queue.add(data);
}
private void cacheFilter(TileData currentTileData) {
boolean[] filterValues = new boolean[currentTileData.numClustersInTile];
FilterFileReader reader = new FilterFileReader(filters.get(currentTileData.tileNum));
int count = 0;
while (reader.hasNext()) {
filterValues[count] = reader.next();
count++;
}
cachedFilter.put(currentTileData.tileNum, filterValues);
}
private void cacheTile(int totalCycleCount, TileData tileData, CycleData currentCycleData) throws IOException {
ByteBuffer tileByteBuffer = ByteBuffer.allocate(tileData.compressedBlockSize);
//we are going to explode the nibbles in to bytes to make PF filtering easier
ByteBuffer tempUncompressedByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize);
ByteBuffer uncompressedByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize);
ByteBuffer unNibbledByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize * 2);
tileByteBuffer.order(ByteOrder.LITTLE_ENDIAN);
uncompressedByteBuffer.order(ByteOrder.LITTLE_ENDIAN);
// Read the whole compressed block into a buffer, then sanity check the length
int readBytes = this.streams[totalCycleCount].read(tileByteBuffer.array());
if (readBytes != tileData.compressedBlockSize) {
throw new IOException(String.format("Error while reading from BCL file for cycle %d. Offending file on disk is %s",
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()));
}
// Uncompress the data from the buffer we just wrote - use gzip input stream to write to uncompressed buffer
ByteArrayInputStream byteInputStream = new ByteArrayInputStream(tileByteBuffer.array());
if (cachedTile[totalCycleCount] != null) {
//clear the old cache
cachedTile[totalCycleCount] = null;
}
GZIPInputStream gzipInputStream = new GZIPInputStream(byteInputStream, uncompressedByteBuffer.capacity());
int read = 0;
int totalRead = 0;
while ((read = gzipInputStream.read(tempUncompressedByteBuffer.array(), 0, tempUncompressedByteBuffer.capacity())) != -1) {
uncompressedByteBuffer.put(tempUncompressedByteBuffer.array(), 0, read);
totalRead += read;
}
if (totalRead != tileData.uncompressedBlockSize) {
throw new IOException(String.format("Error while decompressing from BCL file for cycle %d. Offending file on disk is %s",
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()));
}
// Read uncompressed data from the buffer and expand each nibble into a full byte for ease of use
uncompressedByteBuffer.flip();
while (uncompressedByteBuffer.hasRemaining()) {
byte singleByte = uncompressedByteBuffer.get();
unNibbledByteBuffer.put((byte) (singleByte & 0x0f));
unNibbledByteBuffer.put((byte) ((singleByte >> 4) & 0x0f));
}
gzipInputStream.close();
// Write buffer contents to cached tile array
unNibbledByteBuffer.flip();
//if nonPF reads are included we need to strip them out
if (!currentCycleData.pfExcluded) {
ByteBuffer uncompressedFilteredByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize * 2);
FilterFileReader reader = new FilterFileReader(filters.get(tileData.tileNum));
while (reader.hasNext()) {
byte readByte = unNibbledByteBuffer.get();
if (reader.next()) {
uncompressedFilteredByteBuffer.put(readByte);
}
}
uncompressedFilteredByteBuffer.flip();
cachedTile[totalCycleCount] = uncompressedFilteredByteBuffer.slice();
} else {
cachedTile[totalCycleCount] = unNibbledByteBuffer;
}
}
}