/*
* The MIT License
*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
import java.io.File;
import java.io.IOException;
import java.util.List;
/**
* Metadata about the bam index contained within the bam index.
* One instance created per index file.
*/
public class BAMIndexMetaData {
// information for the entire index.
// stored at the end of the index
private long noCoordinateRecords = 0;
// information for each reference.
// stored in two chunks in bin # MAX_BINS
private long firstOffset = -1;
private long lastOffset = 0;
private int alignedRecords = 0;
private int unAlignedRecords = 0; // unmapped, but associated with this reference
/**
* Constructor used when writing an index
* construct one instance for each index generated
*/
BAMIndexMetaData() {
noCoordinateRecords = 0;
newReference();
}
/**
* Constructor used when reading an index
* construct one instance for each index generated
*/
BAMIndexMetaData(List<Chunk> chunkList) {
noCoordinateRecords = 0;
if (chunkList == null || chunkList.size() == 0) {
// System.out.println("No metadata chunks");
} else if (chunkList.size() != 2) {
throw new SAMException("Unexpected number of metadata chunks " + (chunkList.size()));
}
// fill in the first/lastOffset un/alignedRecords from this
boolean firstChunk = true;
if (chunkList != null) {
for (Chunk c : chunkList) {
long start = c.getChunkStart();
long end = c.getChunkEnd();
if (firstChunk) {
firstOffset = start;
lastOffset = end;
firstChunk = false;
} else {
firstChunk = true;
alignedRecords = (int) start;
unAlignedRecords = (int) end;
}
}
}
}
/**
* @return the count of aligned records associated with this reference
*/
public int getAlignedRecordCount() {
return alignedRecords;
}
/**
* @return the count of unaligned records associated with this reference
*/
public int getUnalignedRecordCount() {
return unAlignedRecords;
}
/**
* Call for each new reference sequence encountered
*/
void newReference() {
firstOffset = -1;
lastOffset = 0;
alignedRecords = 0;
unAlignedRecords = 0;
}
/**
* Extract relevant metaData from the record and its filePointer
* Call only once per record in the file being indexed
*
* @param rec
*/
void recordMetaData(final SAMRecord rec) {
final int alignmentStart = rec.getAlignmentStart();
if (alignmentStart == SAMRecord.NO_ALIGNMENT_START) {
incrementNoCoordinateRecordCount();
return;
}
if (rec.getFileSource() == null){
throw new SAMException("BAM cannot be indexed without setting a fileSource for record " + rec);
}
final Chunk newChunk = ((BAMFileSpan) rec.getFileSource().getFilePointer()).getSingleChunk();
final long start = newChunk.getChunkStart();
final long end = newChunk.getChunkEnd();
if (rec.getReadUnmappedFlag()) {
unAlignedRecords++;
} else {
alignedRecords++;
}
if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) {
this.firstOffset = start;
}
if (BlockCompressedFilePointerUtil.compare(lastOffset, end) < 1) {
this.lastOffset = end;
}
}
/**
* Call whenever a reference with no coordinate information is encountered in the bam file
*/
void incrementNoCoordinateRecordCount() {
noCoordinateRecords++;
}
/**
* Set local variable. Normally noCoordinateRecord count accessed from AbstractBAMFileIndex when reading
*/
private void setNoCoordinateRecordCount(long count) {
noCoordinateRecords = count;
}
/**
* @return the count of records with no coordinate information in the bam file.
* Not public, since only used by BAMIndexer when writing bam index.
* Readers of bam index should use AbstractBAMFileIndex.getNoCoordinateRecordCount.
*/
long getNoCoordinateRecordCount() {
return noCoordinateRecords;
}
/**
* @return the first virtual file offset used by this reference
*/
long getFirstOffset() {
return firstOffset;
}
/**
* @return the last virtual file offset used by this reference
*/
long getLastOffset() {
return lastOffset;
}
/**
* Prints meta-data statistics from BAM index (.bai) file
* Statistics include count of aligned and unaligned reads for each reference sequence
* and a count of all records with no start coordinate
*/
static public void printIndexStats(final File inputBamFile) {
try {
final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
if (!bam.hasIndex()) {
throw new SAMException("No index for bam file " + inputBamFile);
}
BAMIndexMetaData[] data = getIndexStats(bam);
// read through all the bins of every reference.
int nRefs = bam.getFileHeader().getSequenceDictionary().size();
for (int i = 0; i < nRefs; i++) {
final SAMSequenceRecord seq = bam.getFileHeader().getSequence(i);
if (seq == null) continue;
final String sequenceName = seq.getSequenceName();
final int sequenceLength = seq.getSequenceLength();
System.out.print(sequenceName + ' ' + "length=\t" + sequenceLength);
if (data[i] == null) {
System.out.println();
continue;
}
System.out.println("\tAligned= " + data[i].getAlignedRecordCount() +
"\tUnaligned= " + data[i].getUnalignedRecordCount());
}
System.out.println("NoCoordinateCount= " + data[0].getNoCoordinateRecordCount());
} catch (IOException e) {
throw new SAMException("Exception in getting index statistics", e);
}
}
/**
* Prints meta-data statistics from BAM index (.bai) file
* Statistics include count of aligned and unaligned reads for each reference sequence
* and a count of all records with no start coordinate
*/
static public BAMIndexMetaData[] getIndexStats(final BAMFileReader bam){
AbstractBAMFileIndex index = (AbstractBAMFileIndex) bam.getIndex();
// read through all the bins of every reference.
int nRefs = index.getNumberOfReferences();
BAMIndexMetaData[] result = new BAMIndexMetaData[nRefs == 0 ? 1 : nRefs];
for (int i = 0; i < nRefs; i++) {
result[i] = index.getMetaData(i);
}
if (result[0] == null){
result[0] = new BAMIndexMetaData();
}
final Long noCoordCount = index.getNoCoordinateCount();
if (noCoordCount != null) // null in old index files without metadata
result[0].setNoCoordinateRecordCount(noCoordCount);
return result;
}
}