package htsjdk.tribble;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.LocationAware;
import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.example.ExampleBinaryCodec;
import htsjdk.tribble.index.Block;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.IndexFactory;
import htsjdk.tribble.util.ParsingUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
public class FeatureReaderTest {
private final static File asciiBedFile = new File(TestUtils.DATA_DIR + "test.bed");
private File binaryBedFile;
private final static File tabixBedFile = new File(TestUtils.DATA_DIR + "test.tabix.bed.gz");
@BeforeClass
public void setup() throws IOException {
binaryBedFile = File.createTempFile("htsjdk-test.featurereader", ".bed");
binaryBedFile.deleteOnExit();
ExampleBinaryCodec.convertToBinaryTest(asciiBedFile, binaryBedFile, new BEDCodec());
}
@AfterClass
public void tearDown() throws Exception {
binaryBedFile.delete();
}
@DataProvider(name = "indexProvider")
public Object[][] createData1() {
return new Object[][]{
{asciiBedFile, IndexFactory.IndexType.LINEAR, new BEDCodec()},
{asciiBedFile, IndexFactory.IndexType.INTERVAL_TREE, new BEDCodec()},
{tabixBedFile, IndexFactory.IndexType.TABIX, new BEDCodec()},
{binaryBedFile, IndexFactory.IndexType.LINEAR, new ExampleBinaryCodec()},
{binaryBedFile, IndexFactory.IndexType.INTERVAL_TREE, new ExampleBinaryCodec()},
};
}
@Test(dataProvider = "indexProvider")
public void testBedQuery(final File featureFile, final IndexFactory.IndexType indexType, final FeatureCodec<Feature, LocationAware> codec) throws IOException {
final AbstractFeatureReader<Feature, ?> reader = getReader(featureFile, indexType, codec);
// Query
testQuery(reader, "chr1", 1, 500, 3);
testQuery(reader, "chr1", 1, 200, 1);
testQuery(reader, "chr1", 1, 201, 2);
testQuery(reader, "chr1", 500, 600, 0);
testQuery(reader, "chr1", 100000, 100010, 1);
testQuery(reader, "chr1", 100000, 100000, 0);
testQuery(reader, "chr1", 100001, 100001, 1);
testQuery(reader, "chr1", 100005, 100006, 1);
testQuery(reader, "chr1", 100009, 100011, 1);
testQuery(reader, "chr1", 100010, 100010, 1);
testQuery(reader, "chr1", 100011, 100011, 0);
testQuery(reader, "chr2", 1, 100, 2);
testQuery(reader, "chr2", 1, 10, 1);
testQuery(reader, "chr2", 15, 16, 0);
testQuery(reader, "chr3", 1, 201, 0);
// Close reader
reader.close();
}
@Test(dataProvider = "indexProvider")
public void testLargeNumberOfQueries(final File featureFile, final IndexFactory.IndexType indexType, final FeatureCodec<Feature, LocationAware> codec) throws IOException {
final AbstractFeatureReader<Feature, LocationAware> reader = getReader(featureFile, indexType, codec);
for (int i = 0; i < 2000; i++) {
for (final int start : Arrays.asList(500, 200, 201, 600, 100000)) {
for (final String chr : Arrays.asList("chr1", "chr2", "chr3")) {
CloseableTribbleIterator<Feature> iter = null;
try {
iter = reader.query(chr, start, start + 1);
Assert.assertNotNull(iter, "Failed to create non-null iterator");
} finally {
CloserUtil.close(iter);
}
}
}
}
// Close reader
reader.close();
}
private void testQuery(final AbstractFeatureReader<Feature, ?> reader, final String chr, final int start, final int stop, final int expectedNumRecords) throws IOException {
final Iterator<Feature> iter = reader.query(chr, start, stop);
int count = 0;
while (iter.hasNext()) {
final Feature f = iter.next();
Assert.assertTrue(f.getEnd() >= start && f.getStart() <= stop);
count++;
}
Assert.assertEquals(count, expectedNumRecords);
}
@Test(dataProvider = "indexProvider")
public void testBedNames(final File featureFile, final IndexFactory.IndexType indexType, final FeatureCodec<Feature, LocationAware> codec) throws IOException {
final AbstractFeatureReader<Feature, ?> reader = getReader(featureFile, indexType, codec);
final String[] expectedSequences = {"chr1", "chr2"};
final List<String> seqNames = reader.getSequenceNames();
Assert.assertEquals(seqNames.size(), expectedSequences.length,
"Expected sequences " + ParsingUtils.join(",", expectedSequences) + " but saw " + ParsingUtils.join(",", seqNames));
for (final String s : expectedSequences) {
Assert.assertTrue(seqNames.contains(s));
}
}
private static <FEATURE extends Feature, SOURCE extends LocationAware> AbstractFeatureReader<FEATURE, SOURCE> getReader(final File featureFile,
final IndexFactory.IndexType indexType,
final FeatureCodec<FEATURE, SOURCE> codec)
throws IOException {
if (indexType.canCreate()) {
// for types we can create make a new index each time
final File idxFile = Tribble.indexFile(featureFile);
// delete an already existing index
if (idxFile.exists()) {
idxFile.delete();
}
final Index idx = IndexFactory.createIndex(featureFile, codec, indexType);
IndexFactory.writeIndex(idx, idxFile);
idxFile.deleteOnExit();
} // else let's just hope the index exists, and if so use it
return AbstractFeatureReader.getFeatureReader(featureFile.getAbsolutePath(), codec);
}
@Test
public void testReadingBeyondIntSizedBlock() throws IOException {
final Block block = new Block(0, ((long) Integer.MAX_VALUE) * 2);
final SeekableFileStream stream = new SeekableFileStream(new File("/dev/zero"));
final TribbleIndexedFeatureReader.BlockStreamWrapper blockStreamWrapper = new TribbleIndexedFeatureReader.BlockStreamWrapper(stream, block);
final int chunkSize = 100000; // 10 Mb
final int chunksToRead = (int) Math.ceil(block.getSize() / (chunkSize * 1.0));
final byte[] bytes = new byte[chunkSize];
long totalRead = 0;
for (int chunk = 0; chunk < chunksToRead; chunk++) {
//System.out.println("Reading chunk " + chunk + " of " + chunkSize + " total read " + totalRead);
final int nRead = blockStreamWrapper.read(bytes);
Assert.assertTrue(nRead != -1, "Prematurely got EOF after " + totalRead + " bytes");
totalRead += nRead;
}
// /dev/zero doesn't advance file stream on Linux, so reading never terminates
// Therefore, we only require a minimum number of bytes
Assert.assertTrue(totalRead >= block.getSize(), "Failed to read all bytes from a block with size > 2B = " + block.getSize());
}
}