/*
* Copyright (c) 2009-2010 by The Broad Institute, Inc.
* All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which
* is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF
* ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT
* OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR
* RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF
* ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC
* DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL
* BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE
* FOREGOING.
*/
package htsjdk.tribble.index.linear;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.CloseableTribbleIterator;
import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.TestUtils;
import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.bed.BEDFeature;
import htsjdk.tribble.index.Block;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.IndexFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class LinearIndexTest {
private static final File RANDOM_FILE = new File("notMeaningful");
private final static Block CHR1_B1 = new Block(1, 10);
private final static Block CHR1_B2 = new Block(10, 20);
private final static Block CHR1_B3 = new Block(20, 30);
private final static Block CHR2_B1 = new Block(1, 100);
private final static Block CHR2_B2 = new Block(100, 200);
private LinearIndex idx;
@BeforeTest
public void setup() {
idx = createTestIndex();
}
// chr1 (0, 10]
// chr1 (10, 20]
// chr1 (20, 30]
// chr2 (0, 100]
// chr2 (100, 200]
private static LinearIndex createTestIndex() {
final LinearIndex.ChrIndex chr1 = new LinearIndex.ChrIndex("chr1", 10);
chr1.addBlock(CHR1_B1);
chr1.addBlock(CHR1_B2);
chr1.addBlock(CHR1_B3);
chr1.updateLongestFeature(1);
final LinearIndex.ChrIndex chr2 = new LinearIndex.ChrIndex("chr2", 100);
chr2.addBlock(CHR2_B1);
chr2.addBlock(CHR2_B2);
chr2.updateLongestFeature(50);
final List<LinearIndex.ChrIndex> indices = Arrays.asList(chr1, chr2);
return new LinearIndex(indices, RANDOM_FILE);
}
@Test()
public void testBasicFeatures() {
Assert.assertEquals(idx.getChrIndexClass(), LinearIndex.ChrIndex.class);
Assert.assertEquals(idx.getType(), LinearIndex.INDEX_TYPE);
Assert.assertFalse(idx.hasFileSize());
Assert.assertFalse(idx.hasTimestamp());
Assert.assertFalse(idx.hasMD5());
Assert.assertTrue(idx.isCurrentVersion());
Assert.assertNotNull(idx.getSequenceNames());
Assert.assertEquals(idx.getSequenceNames().size(), 2);
Assert.assertTrue(idx.getSequenceNames().contains("chr1"));
Assert.assertTrue(idx.getSequenceNames().contains("chr2"));
Assert.assertTrue(idx.containsChromosome("chr1"));
Assert.assertTrue(idx.containsChromosome("chr2"));
Assert.assertFalse(idx.containsChromosome("chr3"));
Assert.assertEquals(idx.getIndexedFile(), new File(RANDOM_FILE.getAbsolutePath()));
Assert.assertNotNull(idx.getBlocks("chr1"));
Assert.assertEquals(idx.getBlocks("chr1").size(), 3);
Assert.assertNotNull(idx.getBlocks("chr2"));
Assert.assertEquals(idx.getBlocks("chr2").size(), 2);
}
@Test()
public void testEquals() {
final LinearIndex idx2 = createTestIndex();
Assert.assertEquals(idx, idx, "Identical indices are equal");
Assert.assertTrue(idx.equalsIgnoreProperties(idx), "Identical indices are equalIgnoreTimeStamp");
Assert.assertTrue(idx.equalsIgnoreProperties(idx2), "Indices constructed the same are equalIgnoreTimeStamp");
idx2.setTS(123456789);
Assert.assertNotSame(idx, idx2, "Indices with different timestamps are not the same");
Assert.assertTrue(idx.equalsIgnoreProperties(idx2), "Indices with different timestamps are equalIgnoreTimeStamp");
}
// chr1 (0, 10]
// chr1 (10, 20]
// chr1 (20, 30]
// chr2 (0, 100]
// chr2 (100, 200]
//@Test()
// TODO -- this is not a useful test as written -- the linear index always returns a single block since by
// TODO -- definition they are contiguous and can be collapsed to a single block.
public void testBasicQuery() {
testQuery("chr1", 1, 1, CHR1_B1);
testQuery("chr1", 1, 2, CHR1_B1);
testQuery("chr1", 1, 9, CHR1_B1);
testQuery("chr1", 10, 10, CHR1_B1);
testQuery("chr1", 10, 11, CHR1_B1, CHR1_B2);
testQuery("chr1", 11, 11, CHR1_B2);
testQuery("chr1", 11, 12, CHR1_B2);
testQuery("chr1", 11, 19, CHR1_B2);
testQuery("chr1", 10, 19, CHR1_B1, CHR1_B2);
testQuery("chr1", 10, 21, CHR1_B1, CHR1_B2, CHR1_B3);
testQuery("chr1", 25, 30, CHR1_B3);
testQuery("chr1", 35, 40);
testQuery("chr2", 1, 1, CHR2_B1);
testQuery("chr2", 100, 100, CHR2_B1);
testQuery("chr2", 125, 125, CHR2_B1, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 151, 151, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 249, 249, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 251, 251); // just escaping the 50 bp longest event
}
private final void testQuery(final String chr, final int start, final int stop, final Block... expectedBlocksArray) {
final List<Block> qBlocks = idx.getBlocks(chr, start, stop);
final List<Block> eBlocks = Arrays.asList(expectedBlocksArray);
Assert.assertEquals(qBlocks.size(), eBlocks.size(),
String.format("Query %s:%d-%d returned %d blocks but we only expected %d.", chr, start, stop, qBlocks.size(), eBlocks.size()));
for (int i = 0; i < qBlocks.size(); i++)
Assert.assertEquals(qBlocks.get(i), eBlocks.get(i));
}
File fakeBed = new File(TestUtils.DATA_DIR + "fakeBed.bed");
@Test
public void oneEntryFirstChr() {
final BEDCodec code = new BEDCodec();
final Index index = IndexFactory.createLinearIndex(fakeBed, code);
final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(fakeBed.getAbsolutePath(), code, index);
try {
final CloseableTribbleIterator it = reader.iterator();
int count = 0;
while (it.hasNext()) {
it.next();
count++;
}
Assert.assertEquals(51, count);
} catch (final IOException e) {
Assert.fail("Unable to get iterator due to " + e.getMessage());
}
}
@Test
/**
*
* chr2 1 200000000 LONG_FEATURE
* ...
* chr2 179098961 179380395 Hs.134602
* chr2 179209546 179287210 Hs.620337
* chr2 179266309 179266748 Hs.609465
* chr2 179296428 179300012 Hs.623987
* chr2 179302952 179303488 Hs.594545
*/
public void testOverlappingFeatures() throws Exception {
//chr2:179,222,066-179,262,059<- CONTAINS TTN
final Set<String> names = new HashSet<String>(Arrays.asList("Hs.134602", "Hs.620337", "Hs.609465", "Hs.623987",
"Hs.594545", "LONG_FEATURE"));
final String bedFile = TestUtils.DATA_DIR + "bed/Unigene.sample.bed";
final String chr = "chr2";
final int start = 179266309;
final int end = 179303488;
final int expectedCount = 6;
// Linear binned index
LinearIndex.enableAdaptiveIndexing = false;
final int binSize = 1000;
Index idx = IndexFactory.createLinearIndex(new File(bedFile), new BEDCodec(), binSize);
FeatureReader<BEDFeature> bfr = AbstractFeatureReader.getFeatureReader(bedFile, new BEDCodec(), idx);
CloseableTribbleIterator<BEDFeature> iter = bfr.query(chr, start, end);
int countInterval = 0;
while (iter.hasNext()) {
final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
}
Assert.assertEquals(countInterval, expectedCount);
//Repeat with adaptive indexing
LinearIndex.enableAdaptiveIndexing = true;
idx = IndexFactory.createLinearIndex(new File(bedFile), new BEDCodec(), binSize);
bfr = AbstractFeatureReader.getFeatureReader(bedFile, new BEDCodec(), idx);
iter = bfr.query(chr, start, end);
countInterval = 0;
while (iter.hasNext()) {
final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
}
Assert.assertEquals(countInterval, expectedCount);
}
}