/* * The MIT License * * Copyright (c) 2014 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package htsjdk.samtools.util; import htsjdk.samtools.BAMRecordCodec; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import java.io.File; import java.util.ArrayDeque; import java.util.BitSet; import java.util.Deque; import java.util.List; import java.util.NoSuchElementException; /** * This class stores SAMRecords for return. The purpose of this class is to buffer records that need to be modified or processed in some * fashion, and only return (or emit) them when they have been recorded as being fully examined. If we have too many records in RAM, * we can spill over to disk. The order in which they are given (via SamRecordWithOrdinal) determines their order of being returned. It is the * responsibility of the user of this class to make sure all records have unique index and are added in order. * * When a record is examined, we also store a result state. This is currently a boolean to reduce on memory and disk footprint. * * We store groups of records in blocks and the size of these blocks can be controlled. If we have too many records in RAM, we start * spilling blocks to disk. * * Users should check isEmpty() to see if any records are still being tracked. If so, they should check canEmit() to see if the * next record can be returned. If so, they can call next() to get that record. * * When users are done with this structure, call close(). * * @author bradtaylor */ public class SamRecordTrackingBuffer<T extends SamRecordWithOrdinal> { private int availableRecordsInMemory; // how many more records can we store in memory private final int blockSize; // the size of each block private final List<File> tmpDirs; // the list of temporary directories to use private long queueHeadRecordIndex; // the index of the head of the buffer private long queueTailRecordIndex; // the index of the tail of the buffer private final Deque<BufferBlock> blocks; // the queue of blocks, in which records are contained private final SAMFileHeader header; private final Class<T> clazz; // the class to create /** * @param maxRecordsInRam how many records to buffer before spilling to disk * @param blockSize the number of records in a given block * @param tmpDirs the temporary directories to use when spilling to disk * @param header the header * @param clazz the class that extends SamRecordWithOrdinal */ public SamRecordTrackingBuffer(final int maxRecordsInRam, final int blockSize, final List<File> tmpDirs, final SAMFileHeader header, final Class<T> clazz) { this.availableRecordsInMemory = maxRecordsInRam; this.blockSize = blockSize; this.tmpDirs = tmpDirs; this.queueHeadRecordIndex = -1; this.queueTailRecordIndex = -1; this.blocks = new ArrayDeque<BufferBlock>(); this.header = header; this.clazz = clazz; } /** Returns true if we are tracking no records, false otherwise */ public boolean isEmpty() { return (blocks.size() == 0 || this.blocks.getFirst().isEmpty()); } /** Returns true if we can return the next record (it has been examined). */ public boolean canEmit() { return (this.blocks.size() != 0 && this.blocks.getFirst().canEmit()); } /** * Add the given SAMRecordIndex to the buffer. The records must be added in order. * @param samRecordWithOrdinal The samRecordWithOrdinal to be added */ public void add(final SamRecordWithOrdinal samRecordWithOrdinal) { if (this.isEmpty()) { this.queueHeadRecordIndex = samRecordWithOrdinal.getRecordOrdinal(); this.queueTailRecordIndex = samRecordWithOrdinal.getRecordOrdinal() - 1; } this.queueTailRecordIndex++; if (samRecordWithOrdinal.getRecordOrdinal() != this.queueTailRecordIndex) { throw new SAMException("The records were added out of order"); } // If necessary, create a new block, using as much ram as available up to its total size if (this.blocks.size() == 0 || !this.blocks.getLast().canAdd()) { // once ram is given to a block, we can't give it to another block (until some is recovered from the head of the queue) final int blockRam = Math.min(this.blockSize, this.availableRecordsInMemory); this.availableRecordsInMemory = this.availableRecordsInMemory - blockRam; final BufferBlock block = new BufferBlock(this.blockSize, blockRam, this.tmpDirs, this.header, samRecordWithOrdinal.getRecordOrdinal()); this.blocks.addLast(block); } this.blocks.getLast().add(samRecordWithOrdinal); } /** * Returns the next element in the iteration. * * @return The next element in the iteration. * @throws java.util.NoSuchElementException if the buffer is empty. * @throws SAMException if the buffer is not competent to emit (canEmit returns false) */ public SamRecordWithOrdinal next() { if (this.isEmpty()) throw new NoSuchElementException("Attempting to remove an element from an empty SamRecordTrackingBuffer"); final BufferBlock headBlock = this.blocks.getFirst(); if (!headBlock.canEmit()) throw new SAMException("Attempting to get a samRecordWithOrdinal from the SamRecordTrackingBuffer that has not been through " + "marked as examined. canEmit() must return true in order to call next()"); // If the samRecordWithOrdinal was stored in memory, reclaim its ram for use in additional blocks at tail of queue // NB: this must be checked before calling next(), as that method updates the block-head if (!headBlock.headRecordIsFromDisk()) { this.availableRecordsInMemory++; } final SamRecordWithOrdinal samRecordWithOrdinal = headBlock.next(); if (headBlock.hasBeenDrained()) { blocks.poll(); // remove the block as it is now empty headBlock.clear(); // free any disk io resources associated with empty block } this.queueHeadRecordIndex++; return samRecordWithOrdinal; } /** Removes the next record from this buffer */ public void remove() { this.next(); } /** * Return the total number of elements in the queue, both in memory and on disk */ public long size() { return this.queueTailRecordIndex - this.queueHeadRecordIndex + 1; } /** Returns the block that holds the sam record at the given index, null if no such block exists */ private BufferBlock getBlock(final SamRecordWithOrdinal samRecordWithOrdinal) { for (final BufferBlock block : this.blocks) { if (block.getStartIndex() <= samRecordWithOrdinal.getRecordOrdinal() && block.getEndIndex() >= samRecordWithOrdinal.getRecordOrdinal()) { return block; } } return null; } /** Returns true if this buffer contains the record at the given index, false otherwise */ public boolean contains(final SamRecordWithOrdinal samRecordWithOrdinal) { return (null != getBlock(samRecordWithOrdinal)); } /** * Mark the current samRecordWithOrdinal as having been examined. * * @param samRecordWithOrdinal The samRecordWithOrdinal to be marked * @param resultState Boolean flag indicating the result of the examination of this record. * @throws SAMException if the provided recordIndex is not found within the SamRecordTrackingBuffer */ public void setResultState(final SamRecordWithOrdinal samRecordWithOrdinal, final boolean resultState) { final BufferBlock block = getBlock(samRecordWithOrdinal); if (null == block) { throw new SAMException("Attempted to set examined information on a samRecordWithOrdinal whose index is not found " + "in the SamRecordTrackingBuffer. recordIndex: " + samRecordWithOrdinal.getRecordOrdinal()); } block.setResultState(samRecordWithOrdinal, resultState); } /** * Close IO resources associated with each underlying BufferBlock */ public void close() { while (!blocks.isEmpty()) { final BufferBlock block = blocks.pollFirst(); block.clear(); } } /** * This stores blocks of records, either in memory or on disk, or both! */ private class BufferBlock { private final DiskBackedQueue<SAMRecord> recordsQueue; private final int maxBlockSize; private long currentStartIndex; private final long originalStartIndex; private long endIndex; private final BitSet wasExaminedIndexes; private final BitSet resultStateIndexes; /** Creates an empty block buffer, with an allowable # of records in RAM */ public BufferBlock(final int maxBlockSize, final int maxBlockRecordsInMemory, final List<File> tmpDirs, final SAMFileHeader header, final long originalStartIndex) { this.recordsQueue = DiskBackedQueue.newInstance(new BAMRecordCodec(header), maxBlockRecordsInMemory, tmpDirs); this.maxBlockSize = maxBlockSize; this.currentStartIndex = 0; this.endIndex = -1; this.wasExaminedIndexes = new BitSet(maxBlockSize); this.resultStateIndexes = new BitSet(maxBlockSize); this.originalStartIndex = originalStartIndex; } /** * Check that the tail of the block has not grown past the maximum block size (even if records were popped) and that the underlying queue can be added to. * TODO - reimplement with a circular byte array buffer PROVIDED RECORDS ARE IN MEMORY * @return */ public boolean canAdd() { return (this.endIndex - this.originalStartIndex + 1) < this.maxBlockSize && this.recordsQueue.canAdd(); } /** Returns true if the record at the front of the buffer is on disk */ public boolean headRecordIsFromDisk() { return this.recordsQueue.headRecordIsFromDisk(); } /** * Check whether we have read all possible records from this block (and it is available to be destroyed) * @return true if we have read the last /possible/ record (ie the block size, or if !canAdd the end index) */ public boolean hasBeenDrained() { final long maximalIndex = (this.canAdd()) ? (this.originalStartIndex + this.maxBlockSize) : this.endIndex; return this.currentStartIndex > maximalIndex; //NB: watch out for an off by one here } /** Gets the index of the first record in this block */ public long getStartIndex() { return this.currentStartIndex; } /** Gets the index of the last record in this block */ public long getEndIndex() { return this.endIndex; } /** Add a record to this block */ public void add(final SamRecordWithOrdinal samRecordWithOrdinal) { if (this.recordsQueue.canAdd()) { if (this.recordsQueue.isEmpty()) { this.currentStartIndex = samRecordWithOrdinal.getRecordOrdinal(); this.endIndex = samRecordWithOrdinal.getRecordOrdinal() - 1; } this.recordsQueue.add(samRecordWithOrdinal.getRecord()); this.endIndex++; } else { throw new IllegalStateException("Cannot add to DiskBackedQueue whose canAdd() method returns false"); } } private int ensureIndexFitsInAnInt(final long value) { if (value < Integer.MIN_VALUE || Integer.MAX_VALUE < value) throw new SAMException("Error: index out of range: " + value); return (int)value; } /** * Mark the current samRecordWithOrdinal as having been examined with a given result state. * * @param samRecordWithOrdinal The samRecordWithOrdinal to be marked * @param resultState Boolean flag indicating the result of the examination of this record. * * This assumes that this record index does not fall out of range. */ public void setResultState(final SamRecordWithOrdinal samRecordWithOrdinal, final boolean resultState) { // find the correct byte array index and update both metadata byte arrays this.wasExaminedIndexes.set(ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), true); this.resultStateIndexes.set(ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), resultState); } public boolean isEmpty() { return (this.recordsQueue.isEmpty()); } public boolean canEmit() { // TODO: what if isEmpty() == true? return this.wasExaminedIndexes.get(ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex)); } public SamRecordWithOrdinal next() throws IllegalStateException { if (this.canEmit()) { try { // create a wrapped record for the head of the queue, and set the underlying record's examined information appropriately final SamRecordWithOrdinal samRecordWithOrdinal = clazz.newInstance(); samRecordWithOrdinal.setRecord(this.recordsQueue.poll()); samRecordWithOrdinal.setRecordOrdinal(this.currentStartIndex); samRecordWithOrdinal.setResultState(this.resultStateIndexes.get(ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex))); this.currentStartIndex++; return samRecordWithOrdinal; } catch (final Exception e) { throw new RuntimeException(e); } } else { throw new IllegalStateException("Cannot call next() on a buffer block where canEmit() is false!"); } } /** * Remove, but do not return, the next samRecordWithOrdinal in the iterator */ public void remove() { this.next(); } /** * Return the total number of elements in the block, both in memory and on disk */ public long size() { return this.endIndex - this.currentStartIndex + 1; } /** * Close disk IO resources associated with the underlying records queue. * This must be called when a block is no longer needed in order to prevent memory leaks. */ public void clear() { this.recordsQueue.clear(); } } }