/*
* The MIT License
*
* Copyright (c) 2013 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.tribble;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.tribble.readers.LineReader;
import htsjdk.tribble.readers.LineReaderUtil;
import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.tribble.readers.TabixIteratorLineReader;
import htsjdk.tribble.readers.TabixReader;
import htsjdk.tribble.util.ParsingUtils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* @author Jim Robinson
* @since 2/11/12
*/
public class TabixFeatureReader<T extends Feature, SOURCE> extends AbstractFeatureReader<T, SOURCE> {
TabixReader tabixReader;
List<String> sequenceNames;
/**
*
* @param featureFile - path to a feature file. Can be a local file, http url, or ftp url
* @param codec
* @throws IOException
*/
public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec codec) throws IOException {
super(featureFile, codec);
tabixReader = new TabixReader(featureFile);
sequenceNames = new ArrayList<String>(tabixReader.getChromosomes());
readHeader();
}
/**
*
* @param featureFile - path to a feature file. Can be a local file, http url, or ftp url
* @param indexFile - path to the index file.
* @param codec
* @throws IOException
*/
public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec) throws IOException {
super(featureFile, codec);
tabixReader = new TabixReader(featureFile, indexFile);
sequenceNames = new ArrayList<String>(tabixReader.getChromosomes());
readHeader();
}
/**
* read the header
*
* @return a Object, representing the file header, if available
* @throws IOException throws an IOException if we can't open the file
*/
private void readHeader() throws IOException {
SOURCE source = null;
try {
source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(ParsingUtils.openInputStream(path))));
header = codec.readHeader(source);
} catch (Exception e) {
throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(), path, e);
} finally {
if (source != null) {
codec.close(source);
}
}
}
@Override
public boolean hasIndex(){
return true;
}
public List<String> getSequenceNames() {
return sequenceNames;
}
/**
* Return iterator over all features overlapping the given interval
*
* @param chr
* @param start
* @param end
* @return
* @throws IOException
*/
public CloseableTribbleIterator<T> query(final String chr, final int start, final int end) throws IOException {
final List<String> mp = getSequenceNames();
if (mp == null) throw new TribbleException.TabixReaderFailure("Unable to find sequence named " + chr +
" in the tabix index. ", path);
if (!mp.contains(chr)) {
return new EmptyIterator<T>();
}
final TabixIteratorLineReader lineReader = new TabixIteratorLineReader(tabixReader.query(tabixReader.chr2tid(chr), start - 1, end));
return new FeatureIterator<T>(lineReader, start - 1, end);
}
public CloseableTribbleIterator<T> iterator() throws IOException {
final InputStream is = new BlockCompressedInputStream(ParsingUtils.openInputStream(path));
final PositionalBufferedStream stream = new PositionalBufferedStream(is);
final LineReader reader = LineReaderUtil.fromBufferedStream(stream, LineReaderUtil.LineReaderOption.SYNCHRONOUS);
return new FeatureIterator<T>(reader, 0, Integer.MAX_VALUE);
}
public void close() throws IOException {
tabixReader.close();
}
class FeatureIterator<T extends Feature> implements CloseableTribbleIterator<T> {
private T currentRecord;
private LineReader lineReader;
private int start;
private int end;
public FeatureIterator(final LineReader lineReader, final int start, final int end) throws IOException {
this.lineReader = lineReader;
this.start = start;
this.end = end;
readNextRecord();
}
/**
* Advance to the next record in the query interval.
*
* @throws IOException
*/
protected void readNextRecord() throws IOException {
currentRecord = null;
String nextLine;
while (currentRecord == null && (nextLine = lineReader.readLine()) != null) {
final Feature f;
try {
f = ((AsciiFeatureCodec)codec).decode(nextLine);
if (f == null) {
continue; // Skip
}
if (f.getStart() > end) {
return; // Done
}
if (f.getEnd() <= start) {
continue; // Skip
}
currentRecord = (T) f;
} catch (TribbleException e) {
e.setSource(path);
throw e;
} catch (NumberFormatException e) {
String error = "Error parsing line: " + nextLine;
throw new TribbleException.MalformedFeatureFile(error, path, e);
}
}
}
public boolean hasNext() {
return currentRecord != null;
}
public T next() {
T ret = currentRecord;
try {
readNextRecord();
} catch (IOException e) {
throw new RuntimeException("Unable to read the next record, the last record was at " +
ret.getChr() + ":" + ret.getStart() + "-" + ret.getEnd(), e);
}
return ret;
}
public void remove() {
throw new UnsupportedOperationException("Remove is not supported in Iterators");
}
public void close() {
lineReader.close();
}
public Iterator<T> iterator() {
return this;
}
}
}