/*
* The MIT License
*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.StopWatch;
import htsjdk.samtools.util.StringUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import static org.testng.Assert.*;
/**
* Test BAM file indexing.
*/
public class BAMFileIndexTest
{
private final File BAM_FILE = new File("testdata/htsjdk/samtools/BAMFileIndexTest/index_test.bam");
private final boolean mVerbose = false;
@Test
public void testGetSearchBins()
throws Exception {
final DiskBasedBAMFileIndex bfi = new DiskBasedBAMFileIndex(new File(BAM_FILE.getPath() + ".bai"),
null); // todo can null be replaced with a Sequence dictionary for the BAM_FILE?
final long[] bins = bfi.getSpanOverlapping(1, 0, 0).toCoordinateArray();
/***
if (bins == null) {
System.out.println("Search bins: " + bins);
return;
}
System.out.println("Search bins:");
for (int i = 0; i < bins.length; i++) {
System.out.println(" " + Long.toHexString(bins[i]));
}
***/
assertNotNull(bins);
assertEquals(bins.length, 2);
}
@Test
public void testSpecificQueries()
throws Exception {
assertEquals(runQueryTest(BAM_FILE, "chrM", 10400, 10600, true), 1);
assertEquals(runQueryTest(BAM_FILE, "chrM", 10400, 10600, false), 2);
}
@Test(groups = {"slow"})
public void testRandomQueries()
throws Exception {
runRandomTest(BAM_FILE, 1000, new Random());
}
@Test
public void testWholeChromosomes() {
checkChromosome("chrM", 23);
checkChromosome("chr1", 885);
checkChromosome("chr2", 837);
/***
checkChromosome("chr3", 683);
checkChromosome("chr4", 633);
checkChromosome("chr5", 611);
checkChromosome("chr6", 585);
checkChromosome("chr7", 521);
checkChromosome("chr8", 507);
checkChromosome("chr9", 388);
checkChromosome("chr10", 477);
checkChromosome("chr11", 467);
checkChromosome("chr12", 459);
checkChromosome("chr13", 327);
checkChromosome("chr14", 310);
checkChromosome("chr15", 280);
checkChromosome("chr16", 278);
checkChromosome("chr17", 269);
checkChromosome("chr18", 265);
checkChromosome("chr19", 178);
checkChromosome("chr20", 228);
checkChromosome("chr21", 123);
checkChromosome("chr22", 121);
checkChromosome("chrX", 237);
checkChromosome("chrY", 29);
***/
}
@Test
public void testQueryUnmapped() {
final StopWatch linearScan = new StopWatch();
final StopWatch queryUnmapped = new StopWatch();
int unmappedCountFromLinearScan = 0;
final File bamFile = BAM_FILE;
final SAMFileReader reader = new SAMFileReader(bamFile);
linearScan.start();
CloseableIterator<SAMRecord> it = reader.iterator();
int mappedCount = 0;
while (it.hasNext()) {
final SAMRecord rec = it.next();
if (rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
unmappedCountFromLinearScan = 1;
break;
}
++mappedCount;
}
linearScan.stop();
System.out.println("Found start of unmapped reads. Num mapped reads: " + mappedCount);
System.out.println("Time so far: " + linearScan.getElapsedTimeSecs());
linearScan.start();
while (it.hasNext()) {
final SAMRecord rec = it.next();
Assert.assertEquals(rec.getReferenceIndex().intValue(), SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
++unmappedCountFromLinearScan;
}
it.close();
linearScan.stop();
queryUnmapped.start();
it = reader.queryUnmapped();
int unmappedCountFromQueryUnmapped = 0;
while (it.hasNext()) {
final SAMRecord rec = it.next();
Assert.assertEquals(rec.getReferenceIndex().intValue(), SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
++unmappedCountFromQueryUnmapped;
}
it.close();
queryUnmapped.stop();
System.out.println("Linear scan total time: " + linearScan.getElapsedTimeSecs());
System.out.println("queryUnmapped time: " + queryUnmapped.getElapsedTimeSecs());
System.out.println("Number of unmapped reads:" + unmappedCountFromQueryUnmapped);
Assert.assertEquals(unmappedCountFromQueryUnmapped, unmappedCountFromLinearScan);
reader.close();
}
@Test
public void testQueryAlignmentStart() {
final SAMFileReader reader = new SAMFileReader(BAM_FILE);
CloseableIterator<SAMRecord> it = reader.queryAlignmentStart("chr1", 202160268);
Assert.assertEquals(countElements(it), 2);
it.close();
it = reader.queryAlignmentStart("chr1", 201595153);
Assert.assertEquals(countElements(it), 1);
it.close();
// There are records that overlap this position, but none that start here
it = reader.queryAlignmentStart("chrM", 10400);
Assert.assertEquals(countElements(it), 0);
it.close();
// One past the last chr1 record
it = reader.queryAlignmentStart("chr1", 246817509);
Assert.assertEquals(countElements(it), 0);
it.close();
}
@Test
public void testQueryMate() {
final SAMFileReader reader = new SAMFileReader(BAM_FILE);
// Both ends mapped
SAMRecord rec = getSingleRecordStartingAt(reader, "chrM", 1687);
SAMRecord mate = reader.queryMate(rec);
assertMate(rec, mate);
SAMRecord originalRec = reader.queryMate(mate);
Assert.assertEquals(originalRec, rec);
// One end mapped
rec = getSingleRecordStartingAt(reader, "chr11", 48720338);
mate = reader.queryMate(rec);
assertMate(rec, mate);
originalRec = reader.queryMate(mate);
Assert.assertEquals(originalRec, rec);
// Both ends mapped
final CloseableIterator<SAMRecord> it = reader.queryUnmapped();
rec = null;
while (it.hasNext()) {
final SAMRecord next = it.next();
if (next.getReadName().equals("2615")) {
rec = next;
break;
}
}
it.close();
Assert.assertNotNull(rec);
mate = reader.queryMate(rec);
assertMate(rec, mate);
originalRec = reader.queryMate(mate);
Assert.assertEquals(originalRec, rec);
}
private void assertMate(final SAMRecord rec, final SAMRecord mate) {
Assert.assertNotNull(mate);
Assert.assertEquals(mate.getReadName(), rec.getReadName());
Assert.assertEquals(mate.getReferenceIndex(), rec.getMateReferenceIndex());
if (SAMUtils.getMateCigarString(rec) != null) {
Assert.assertEquals(mate.getCigarString(), SAMUtils.getMateCigarString(rec));
}
Assert.assertEquals(mate.getAlignmentStart(), rec.getMateAlignmentStart());
Assert.assertFalse(mate.getFirstOfPairFlag() == rec.getFirstOfPairFlag());
}
/**
* Compare the results of a multi-interval query versus the union of the results from each interval done
* separately.
*/
@Test(dataProvider = "testMultiIntervalQueryDataProvider")
public void testMultiIntervalQuery(final boolean contained) {
final List<String> referenceNames = getReferenceNames(BAM_FILE);
final QueryInterval[] intervals = generateRandomIntervals(referenceNames.size(), 1000, new Random());
final Set<SAMRecord> multiIntervalRecords = new HashSet<SAMRecord>();
final Set<SAMRecord> singleIntervalRecords = new HashSet<SAMRecord>();
final SAMFileReader reader = new SAMFileReader(BAM_FILE);
for (final QueryInterval interval : intervals) {
consumeAll(singleIntervalRecords, reader.query(referenceNames.get(interval.referenceIndex), interval.start, interval.end, contained));
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(intervals);
consumeAll(multiIntervalRecords, reader.query(optimizedIntervals, contained));
final Iterator<SAMRecord> singleIntervalRecordIterator = singleIntervalRecords.iterator();
boolean failed = false;
while (singleIntervalRecordIterator.hasNext()) {
final SAMRecord record = singleIntervalRecordIterator.next();
if (!multiIntervalRecords.remove(record)) {
System.out.println("SingleIntervalQuery found " + record + " but MultiIntervalQuery did not");
failed = true;
}
}
for (final SAMRecord record : multiIntervalRecords) {
System.out.println("MultiIntervalQuery found " + record + " but SingleIntervalQuery did not");
failed = true;
}
Assert.assertFalse(failed);
}
@DataProvider(name = "testMultiIntervalQueryDataProvider")
private Object[][] testMultiIntervalQueryDataProvider() {
return new Object[][]{{true}, {false}};
}
@Test
public void testUnmappedMateWithCoordinate() throws Exception {
// TODO: Use SAMRecordSetBuilder when it is able to create a pair with one end unmapped
final String samText = "@HD\tVN:1.0\tSO:coordinate\n" +
"@SQ\tSN:chr1\tLN:101\n" +
"@SQ\tSN:chr2\tLN:101\n" +
"@SQ\tSN:chr3\tLN:101\n" +
"@SQ\tSN:chr4\tLN:101\n" +
"@SQ\tSN:chr5\tLN:101\n" +
"@SQ\tSN:chr6\tLN:101\n" +
"@SQ\tSN:chr7\tLN:404\n" +
"@SQ\tSN:chr8\tLN:202\n" +
"@RG\tID:0\tSM:Hi,Mom!\n" +
"@PG\tID:1\tPN:Hey!\tVN:2.0\n" +
"one_end_mapped\t73\tchr7\t100\t255\t101M\t*\t0\t0\tCAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN\t)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&\tRG:Z:0\n" +
"one_end_mapped\t133\tchr7\t100\t0\t*\t=\t100\t0\tNCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA\t&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1\tRG:Z:0\n";
final ByteArrayInputStream bis = new ByteArrayInputStream(StringUtil.stringToBytes(samText));
final File bamFile = File.createTempFile("BAMFileIndexTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
bamFile.deleteOnExit();
final SAMFileReader textReader = new SAMFileReader(bis);
SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
samFileWriterFactory.setCreateIndex(true);
final SAMFileWriter writer = samFileWriterFactory.makeBAMWriter(textReader.getFileHeader(), true, bamFile);
for (final SAMRecord rec : textReader) {
writer.addAlignment(rec);
}
writer.close();
final SAMFileReader bamReader = new SAMFileReader(bamFile);
SamFiles.findIndex(bamFile).deleteOnExit();
Assert.assertEquals(countElements(bamReader.queryContained("chr7", 100, 100)), 1);
Assert.assertEquals(countElements(bamReader.queryOverlapping("chr7", 100, 100)), 2);
}
private <E> void consumeAll(final Collection<E> collection, final CloseableIterator<E> iterator) {
while (iterator.hasNext()) {
collection.add(iterator.next());
}
iterator.close();
}
private SAMRecord getSingleRecordStartingAt(final SAMFileReader reader, final String sequence, final int alignmentStart) {
final CloseableIterator<SAMRecord> it = reader.queryAlignmentStart(sequence, alignmentStart);
Assert.assertTrue(it.hasNext());
final SAMRecord rec = it.next();
Assert.assertNotNull(rec);
Assert.assertFalse(it.hasNext());
it.close();
return rec;
}
private int countElements(final CloseableIterator<SAMRecord> it) {
int num;
for (num = 0; it.hasNext(); ++num, it.next()) {
}
it.close();
return num;
}
private void checkChromosome(final String name, final int expectedCount) {
int count = runQueryTest(BAM_FILE, name, 0, 0, true);
assertEquals(count, expectedCount);
count = runQueryTest(BAM_FILE, name, 0, 0, false);
assertEquals(count, expectedCount);
}
private void runRandomTest(final File bamFile, final int count, final Random generator) {
final List<String> referenceNames = getReferenceNames(bamFile);
final QueryInterval[] intervals = generateRandomIntervals(referenceNames.size(), count, generator);
for (final QueryInterval interval : intervals) {
final String refName = referenceNames.get(interval.referenceIndex);
final int startPos = interval.start;
final int endPos = interval.end;
System.out.println("Testing query " + refName + ":" + startPos + "-" + endPos + " ...");
try {
runQueryTest(bamFile, refName, startPos, endPos, true);
runQueryTest(bamFile, refName, startPos, endPos, false);
} catch (final Throwable exc) {
String message = "Query test failed: " + refName + ":" + startPos + "-" + endPos;
message += ": " + exc.getMessage();
throw new RuntimeException(message, exc);
}
}
}
private QueryInterval[] generateRandomIntervals(final int numReferences, final int count, final Random generator) {
final QueryInterval[] intervals = new QueryInterval[count];
final int maxCoordinate = 10000000;
for (int i = 0; i < count; i++) {
final int referenceIndex = generator.nextInt(numReferences);
final int coord1 = generator.nextInt(maxCoordinate+1);
final int coord2 = generator.nextInt(maxCoordinate+1);
final int startPos = Math.min(coord1, coord2);
final int endPos = Math.max(coord1, coord2);
intervals[i] = new QueryInterval(referenceIndex, startPos, endPos);
}
return intervals;
}
private List<String> getReferenceNames(final File bamFile) {
final SAMFileReader reader = new SAMFileReader(bamFile);
final List<String> result = new ArrayList<String>();
final List<SAMSequenceRecord> seqRecords = reader.getFileHeader().getSequenceDictionary().getSequences();
for (final SAMSequenceRecord seqRecord : seqRecords) {
if (seqRecord.getSequenceName() != null) {
result.add(seqRecord.getSequenceName());
}
}
reader.close();
return result;
}
private int runQueryTest(final File bamFile, final String sequence, final int startPos, final int endPos, final boolean contained) {
verbose("Testing query " + sequence + ":" + startPos + "-" + endPos + " ...");
final SAMFileReader reader1 = new SAMFileReader(bamFile);
final SAMFileReader reader2 = new SAMFileReader(bamFile);
final Iterator<SAMRecord> iter1 = reader1.query(sequence, startPos, endPos, contained);
final Iterator<SAMRecord> iter2 = reader2.iterator();
// Compare ordered iterators.
// Confirm that iter1 is a subset of iter2 that properly filters.
SAMRecord record1 = null;
SAMRecord record2 = null;
int count1 = 0;
int count2 = 0;
int beforeCount = 0;
int afterCount = 0;
while (true) {
if (record1 == null && iter1.hasNext()) {
record1 = iter1.next();
count1++;
}
if (record2 == null && iter2.hasNext()) {
record2 = iter2.next();
count2++;
}
// System.out.println("Iteration:");
// System.out.println(" Record1 = " + ((record1 == null) ? "null" : record1.format()));
// System.out.println(" Record2 = " + ((record2 == null) ? "null" : record2.format()));
if (record1 == null && record2 == null) {
break;
}
if (record1 == null) {
checkPassesFilter(false, record2, sequence, startPos, endPos, contained);
record2 = null;
afterCount++;
continue;
}
assertNotNull(record2);
final int ordering = compareCoordinates(record1, record2);
if (ordering > 0) {
checkPassesFilter(false, record2, sequence, startPos, endPos, contained);
record2 = null;
beforeCount++;
continue;
}
assertTrue(ordering == 0);
checkPassesFilter(true, record1, sequence, startPos, endPos, contained);
checkPassesFilter(true, record2, sequence, startPos, endPos, contained);
assertEquals(record1.getReadName(), record2.getReadName());
assertEquals(record1.getReadString(), record2.getReadString());
record1 = null;
record2 = null;
}
reader1.close();
reader2.close();
verbose("Checked " + count1 + " records against " + count2 + " records.");
verbose("Found " + (count2 - beforeCount - afterCount) + " records matching.");
verbose("Found " + beforeCount + " records before.");
verbose("Found " + afterCount + " records after.");
return count1;
}
private void checkPassesFilter(final boolean expected, final SAMRecord record, final String sequence, final int startPos, final int endPos, final boolean contained) {
final boolean passes = passesFilter(record, sequence, startPos, endPos, contained);
if (passes != expected) {
System.out.println("Error: Record erroneously " +
(passes ? "passed" : "failed") +
" filter.");
System.out.println(" Record: " + record.format());
System.out.println(" Filter: " + sequence + ":" +
startPos + "-" + endPos +
" (" + (contained ? "contained" : "overlapping") + ")");
assertEquals(passes, expected);
}
}
private boolean passesFilter(final SAMRecord record, final String sequence, final int startPos, final int endPos, final boolean contained) {
if (record == null) {
return false;
}
if (!safeEquals(record.getReferenceName(), sequence)) {
return false;
}
final int alignmentStart = record.getAlignmentStart();
int alignmentEnd = record.getAlignmentEnd();
if (alignmentStart <= 0) {
assertTrue(record.getReadUnmappedFlag());
return false;
}
if (alignmentEnd <= 0) {
// For indexing-only records, treat as single base alignment.
assertTrue(record.getReadUnmappedFlag());
alignmentEnd = alignmentStart;
}
if (contained) {
if (startPos != 0 && alignmentStart < startPos) {
return false;
}
if (endPos != 0 && alignmentEnd > endPos) {
return false;
}
} else {
if (startPos != 0 && alignmentEnd < startPos) {
return false;
}
if (endPos != 0 && alignmentStart > endPos) {
return false;
}
}
return true;
}
private int compareCoordinates(final SAMRecord record1, final SAMRecord record2) {
final int seqIndex1 = record1.getReferenceIndex();
final int seqIndex2 = record2.getReferenceIndex();
if (seqIndex1 == -1) {
return ((seqIndex2 == -1) ? 0 : -1);
} else if (seqIndex2 == -1) {
return 1;
}
int result = seqIndex1 - seqIndex2;
if (result != 0) {
return result;
}
result = record1.getAlignmentStart() - record2.getAlignmentStart();
return result;
}
private boolean safeEquals(final Object o1, final Object o2) {
if (o1 == o2) {
return true;
} else if (o1 == null || o2 == null) {
return false;
} else {
return o1.equals(o2);
}
}
private void verbose(final String text) {
if (mVerbose) {
System.out.println("# " + text);
}
}
}