/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.tribble.bed;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.Feature;
import htsjdk.tribble.TestUtils;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.bed.FullBEDFeature.Exon;
import htsjdk.tribble.index.IndexFactory;
import htsjdk.tribble.index.linear.LinearIndex;
import htsjdk.tribble.util.LittleEndianOutputStream;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.awt.*;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
public class BEDCodecTest {
@Test
public void testSimpleDecode() {
BEDCodec codec = new BEDCodec();
BEDFeature feature;
feature = codec.decode("chr1 1");
Assert.assertEquals(feature.getChr(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 2);
feature = codec.decode("chr1 1 2");
Assert.assertEquals(feature.getChr(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 2);
feature = codec.decode("chr1 1 3");
Assert.assertEquals(feature.getChr(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 3);
}
@Test
public void testFullDecode() {
BEDCodec codec = new BEDCodec();
FullBEDFeature feature;
List<Exon> exons;
// Borrowed samples from Example: on http://genome.ucsc.edu/FAQ/FAQformat#format1
feature = (FullBEDFeature) codec.decode("chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512");
Assert.assertEquals(feature.getChr(), "chr22");
Assert.assertEquals(feature.getStart(), 1001);
Assert.assertEquals(feature.getEnd(), 5000);
Assert.assertEquals(feature.getName(), "cloneA");
Assert.assertEquals(feature.getScore(), 960f);
Assert.assertEquals(feature.getStrand(), Strand.POSITIVE);
Assert.assertEquals(feature.getColor(), new Color(0));
exons = feature.getExons();
Assert.assertEquals(exons.size(), 2);
Assert.assertEquals(exons.get(0).getNumber(), 1);
Assert.assertEquals(exons.get(0).start, 1001);
Assert.assertEquals(exons.get(0).end, 1567);
Assert.assertEquals(exons.get(0).getCdStart(), 1001);
Assert.assertEquals(exons.get(0).getCdEnd(), 1567);
Assert.assertEquals(exons.get(0).getCodingLength(), 567);
Assert.assertEquals(exons.get(1).getNumber(), 2);
Assert.assertEquals(exons.get(1).start, 4513);
Assert.assertEquals(exons.get(1).end, 5000);
Assert.assertEquals(exons.get(1).getCdStart(), 4513);
Assert.assertEquals(exons.get(1).getCdEnd(), 5000);
Assert.assertEquals(exons.get(1).getCodingLength(), 488);
feature = (FullBEDFeature) codec.decode("chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601");
Assert.assertEquals(feature.getChr(), "chr22");
Assert.assertEquals(feature.getStart(), 2001);
Assert.assertEquals(feature.getEnd(), 6000);
Assert.assertEquals(feature.getName(), "cloneB");
Assert.assertEquals(feature.getScore(), 900f);
Assert.assertEquals(feature.getStrand(), Strand.NEGATIVE);
Assert.assertEquals(feature.getColor(), new Color(0));
exons = feature.getExons();
Assert.assertEquals(exons.size(), 2);
Assert.assertEquals(exons.get(0).getNumber(), 2);
Assert.assertEquals(exons.get(0).start, 2001);
Assert.assertEquals(exons.get(0).end, 2433);
Assert.assertEquals(exons.get(0).getCdStart(), 2001);
Assert.assertEquals(exons.get(0).getCdEnd(), 2433);
Assert.assertEquals(exons.get(0).getCodingLength(), 433);
Assert.assertEquals(exons.get(1).getNumber(), 1);
Assert.assertEquals(exons.get(1).start, 5602);
Assert.assertEquals(exons.get(1).end, 6000);
Assert.assertEquals(exons.get(1).getCdStart(), 5602);
Assert.assertEquals(exons.get(1).getCdEnd(), 6000);
Assert.assertEquals(exons.get(1).getCodingLength(), 399);
}
@Test
public void testDecodeBEDFile_good() throws Exception {
String filepath = TestUtils.DATA_DIR + "bed/NA12878.deletions.10kbp.het.gq99.hand_curated.hg19_fixed.bed";
int expected_lines = 34;
/*
Line 0:
1 25592413 25657872
Line 3:
1 152555536 152587611
Line 28:
14 73996607 74025282
Remember tribble increments numbers by 1
*/
BEDCodec codec = new BEDCodec();
AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(filepath, codec, false);
Iterable<Feature> iter = reader.iterator();
int count = 0;
for (Feature feat : iter) {
Assert.assertTrue(feat.getChr().length() > 0);
Assert.assertTrue(feat.getEnd() >= feat.getStart());
if (count == 0) {
Assert.assertEquals("1", feat.getChr());
Assert.assertEquals(25592413 + 1, feat.getStart());
Assert.assertEquals(25657872, feat.getEnd());
}
if (count == 3) {
Assert.assertEquals("1", feat.getChr());
Assert.assertEquals(152555536 + 1, feat.getStart());
Assert.assertEquals(152587611, feat.getEnd());
}
if (count == 28) {
Assert.assertEquals("14", feat.getChr());
Assert.assertEquals(73996607 + 1, feat.getStart());
Assert.assertEquals(74025282, feat.getEnd());
}
count += 1;
}
Assert.assertEquals(expected_lines, count);
reader.close();
}
/**
* Test reading a BED file which is malformed.
*
* @throws Exception
*/
@Test(expectedExceptions = RuntimeException.class)
public void testDecodeBEDFile_bad() throws Exception {
//This file has an extra tab in the second to last line
String filepath = TestUtils.DATA_DIR + "bed/NA12878.deletions.10kbp.het.gq99.hand_curated.hg19.bed";
//The iterator implementation next() actually performs a get / read_next. The bad line is number 32,
//so we actually will only get 31 lines before reading that line.
int expected_count = 31;
BEDCodec codec = new BEDCodec();
AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(filepath, codec, false);
Iterable<Feature> iter = reader.iterator();
int count = 0;
for (Feature feat : iter) {
count += 1;
}
reader.close();
}
private void createIndex(File testFile, File idxFile) throws IOException {
// Create an index if missing
if (idxFile.exists()) {
idxFile.delete();
}
LinearIndex idx = (LinearIndex) IndexFactory.createLinearIndex(testFile, new BEDCodec());
LittleEndianOutputStream stream = null;
try {
stream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)));
idx.write(stream);
} finally {
if (stream != null) {
stream.close();
}
}
}
}