package htsjdk.samtools; import htsjdk.samtools.util.BlockCompressedFilePointerUtil; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * A [start,stop) file pointer pairing into the BAM file, stored * as a BAM file index. A chunk is represented as a single 64-bit * value where the high-order 48 bits point to the location of the * start of a compressed BGZF block within a BGZF file and the * low-order 16 bits point to a position within the decompressed * data in the BGZF block. * * See the SAM/BAM spec for more details. */ public class Chunk implements Cloneable, Serializable,Comparable<Chunk> { private static final long serialVersionUID = 1L; /** * A pointer to the start of a region in a SAM/BAM file. The * start is inclusive: start reading from this point. */ private long mChunkStart; /** * A pointer to the end of a region in a SAM/BAM file. The end * is exclusive: this pointer points to one byte past the end * of the region of interest inside the file. */ private long mChunkEnd; public Chunk(final long start, final long end) { mChunkStart = start; mChunkEnd = end; } public Chunk clone() { return new Chunk(mChunkStart,mChunkEnd); } public long getChunkStart() { return mChunkStart; } protected void setChunkStart(final long value) { mChunkStart = value; } public long getChunkEnd() { return mChunkEnd; } protected void setChunkEnd(final long value) { mChunkEnd = value; } public int compareTo(final Chunk chunk) { int result = Long.signum(mChunkStart - chunk.mChunkStart); if (result == 0) { result = Long.signum(mChunkEnd - chunk.mChunkEnd); } return result; } @Override public boolean equals(final Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; final Chunk chunk = (Chunk) o; if (mChunkEnd != chunk.mChunkEnd) return false; if (mChunkStart != chunk.mChunkStart) return false; return true; } /** * Returns whether two chunks overlap. * @param other Chunk to which this should be compared. * @return True if the chunks overlap. Returns false if the two chunks abut or are disjoint. */ public boolean overlaps(final Chunk other) { final int comparison = this.compareTo(other); if(comparison == 0) return true; // "sort" the two chunks using the comparator. final Chunk leftMost = comparison==-1 ? this : other; final Chunk rightMost = comparison==1 ? this : other; final long leftMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(leftMost.getChunkEnd()); final long rightMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(rightMost.getChunkStart()); // If the left block's address is after the right block's address, compare the two blocks. // If the two blocks are identical, compare the block offsets. // If the right block is after the left block, no overlap is possible. if(leftMostBlockAddress > rightMostBlockAddress) return true; else if(leftMostBlockAddress == rightMostBlockAddress) { final int leftMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(leftMost.getChunkEnd()); final int rightMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(rightMost.getChunkStart()); return leftMostOffset > rightMostOffset; } else return false; } /** * Returns whether two chunks overlap. * @param other Chunk to which this should be compared. * @return True if the two chunks are adjacent. Returns false if the chunks overlap or are discontinuous. */ public boolean isAdjacentTo(final Chunk other) { // Simpler implementation would be to == the chunk end of one to the chunk start of the other. Chose this implementation to ensure that all chunk // comparisons point directly to the return (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkEnd()) == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkStart()) && BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkEnd()) == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkStart())) || (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkStart()) == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkEnd()) && BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkStart()) == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkEnd())); } @Override public int hashCode() { int result = (int) (mChunkStart ^ (mChunkStart >>> 32)); result = 31 * result + (int) (mChunkEnd ^ (mChunkEnd >>> 32)); return result; } @Override public String toString() { return String.format("%d:%d-%d:%d",mChunkStart >> 16,mChunkStart & 0xFFFF,mChunkEnd >> 16,mChunkEnd & 0xFFFF); } /** * @param minimumOffset Discard chunks that end before this file offset. * @return sorted list of chunks in which adjacent chunks are coalesced. */ public static List<Chunk> optimizeChunkList(final List<Chunk> chunks, final long minimumOffset) { Chunk lastChunk = null; Collections.sort(chunks); final List<Chunk> result = new ArrayList<Chunk>(); for (final Chunk chunk : chunks) { if (chunk.getChunkEnd() <= minimumOffset) { continue; // linear index optimization } if (result.isEmpty()) { result.add(chunk); lastChunk = chunk; continue; } // Coalesce chunks that are in adjacent file blocks. // This is a performance optimization. if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) { result.add(chunk); lastChunk = chunk; } else { if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) { lastChunk.setChunkEnd(chunk.getChunkEnd()); } } } return result; } }