package com.illumina.basespace.igv.vcf;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.sf.samtools.seekablestream.SeekableStream;
import net.sf.samtools.util.BlockCompressedInputStream;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.CloseableTribbleIterator;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
import org.broad.tribble.readers.PositionalBufferedStream;
import com.illumina.basespace.igv.BaseSpaceMain;
import com.illumina.basespace.igv.BaseSpaceUtil;
import com.illumina.basespace.igv.io.BaseSpaceSeekableFileStream;
import com.illumina.basespace.igv.vcf.VCFLocatorFactory.VCFTrackLoader;
/**
*
* A reader for text feature files (i.e. not tabix files). This includes
* tribble-indexed and non-indexed files. If
*
* index both iterate() and query() methods are supported.
*
* <p/>
*
* Note: Non-indexed files can be gzipped, but not bgzipped.
*
*
*
* @author Jim Robinson
*
* @since 2/11/12
*/
public class BaseSpaceTribbleFeatureReader<T extends Feature> extends AbstractFeatureReader
{
private Index index;
private VCFTrackLoader locator;
/**
*
* @param featurePath
* - path to the feature file, can be a local file path, http
* url, or ftp url
*
* @param codec
* - codec to decode the features
*
* @param requireIndex
* - true if the reader will be queries for specific ranges. An
* index (idx) file must exist
*
* @throws IOException
*/
public BaseSpaceTribbleFeatureReader(VCFTrackLoader locator, FeatureCodec codec) throws IOException
{
super(null, codec);
this.locator = locator;
readHeader();
}
public void close() throws IOException
{
// Nothing to do -- streams are opened and closed in the iterator
// classes
}
/**
*
* Return the sequence (chromosome/contig) names in this file, if known.
*
*
*
* @return list of strings of the contig names
*/
public List<String> getSequenceNames()
{
return index == null ? new ArrayList<String>() : new ArrayList<String>(index.getSequenceNames());
}
private void readHeader() throws IOException
{
PositionalBufferedStream is = null;
try
{
is = new PositionalBufferedStream(BaseSpaceMain.instance()
.getApiClient(locator.getClientId()).getFileInputStream(locator.getFile()));
header = codec.readHeader(is);
}
catch (Exception e)
{
throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(),
locator.getPath(), e);
}
finally
{
BaseSpaceUtil.dispose(is);
}
}
/**
*
* Return an iterator to iterate over features overlapping the specified
* interval
*
*
*
* @param chr
* contig
*
* @param start
* start position
*
* @param end
* end position
*
* @return an iterator of records in this interval
*
* @throws IOException
*/
public CloseableTribbleIterator query(String chr, int start, int end) throws IOException
{
if (index == null)
{
throw new TribbleException("Index not found for: " + locator.getPath());
}
if (index.containsChromosome(chr))
{
List<Block> blocks = index.getBlocks(chr, start - 1, end);
return new QueryIterator(chr, start, end, blocks);
}
else
{
return new EmptyIterator();
}
}
/**
*
* @return Return an iterator to iterate over the entire file
*
* @throws IOException
*/
public CloseableTribbleIterator iterator() throws IOException
{
return new WFIterator();
}
/**
*
* Class to iterator over an entire file.
*
*
*
* @param <T>
*/
class WFIterator<T extends Feature> implements CloseableTribbleIterator
{
private T currentRecord;
private PositionalBufferedStream stream;
/**
*
* Constructor for iterating over the entire file (seekableStream).
*
*
*
* @throws IOException
*/
public WFIterator() throws IOException
{
final InputStream is = BaseSpaceMain.instance()
.getApiClient(locator.getClientId()).getFileInputStream(locator.getFile());
stream = new PositionalBufferedStream(is, 512000);
if (header.skipHeaderBytes()) stream.skip(header.getHeaderEnd());
readNextRecord();
}
public boolean hasNext()
{
return currentRecord != null;
}
public T next()
{
T ret = currentRecord;
try
{
readNextRecord();
}
catch (IOException e)
{
throw new RuntimeException("Unable to read the next record, the last record was at " +
ret.getChr() + ":" + ret.getStart() + "-" + ret.getEnd(), e);
}
return ret;
}
/**
*
* Advance to the next record in the query interval.
*
*
*
* @throws IOException
*/
private void readNextRecord() throws IOException
{
currentRecord = null;
while (!stream.isDone())
{
Feature f = null;
try
{
f = codec.decode(stream);
if (f == null)
{
continue;
}
currentRecord = (T) f;
return;
}
catch (TribbleException e)
{
e.setSource(locator.getPath());
throw e;
}
catch (NumberFormatException e)
{
String error = "Error parsing line at byte position: " + stream.getPosition();
throw new TribbleException.MalformedFeatureFile(error, locator.getPath(), e);
}
}
}
public void remove()
{
throw new UnsupportedOperationException("Remove is not supported in Iterators");
}
public void close()
{
stream.close();
}
public WFIterator<T> iterator()
{
return this;
}
}
/**
*
* Iterator for a query interval
*
*
*
* @param <T>
*/
class QueryIterator<T extends Feature> implements CloseableTribbleIterator
{
private String chr;
private String chrAlias;
int start;
int end;
private T currentRecord;
private PositionalBufferedStream stream;
private Iterator<Block> blockIterator;
private SeekableStream seekableStream;
public QueryIterator(String chr, int start, int end, List<Block> blocks) throws IOException
{
final InputStream is = new BlockCompressedInputStream(BaseSpaceMain.instance()
.getApiClient(locator.getClientId()).getFileInputStream(locator.getFile()));
seekableStream = new BaseSpaceSeekableFileStream(locator,locator.getFile());
// seekableStream = SeekableStreamFactory.getStreamFor(path);
this.chr = chr;
this.start = start;
this.end = end;
blockIterator = blocks.iterator();
advanceBlock();
readNextRecord();
// The feature chromosome might not be the query chromosome, due to
// alias definitions. We assume
// the chromosome of the first record is correct and record it here.
// This is not pretty.
chrAlias = (currentRecord == null ? chr : currentRecord.getChr());
}
public boolean hasNext()
{
return currentRecord != null;
}
public T next()
{
T ret = currentRecord;
try
{
readNextRecord();
}
catch (IOException e)
{
throw new RuntimeException("Unable to read the next record, the last record was at " +
ret.getChr() + ":" + ret.getStart() + "-" + ret.getEnd(), e);
}
return ret;
}
private void advanceBlock() throws IOException
{
while (blockIterator != null && blockIterator.hasNext())
{
Block block = blockIterator.next();
if (block.getSize() > 0)
{
seekableStream.seek(block.getStartPosition());
int bufferSize = Math.min(2000000, block.getSize() > 100000000 ? 10000000 : (int) block.getSize());
stream = new PositionalBufferedStream(new BlockStreamWrapper(seekableStream, block), bufferSize);
// note we don't have to skip the header here as the block
// should never start in the header
return;
}
}
// If we get here the blocks are exhausted, set reader to null
if (stream != null)
{
stream.close();
stream = null;
}
}
/**
*
* Advance to the next record in the query interval.
*
*
*
* @throws IOException
*/
private void readNextRecord() throws IOException
{
if (stream == null)
{
return; // <= no more features to read
}
currentRecord = null;
while (true)
{ // Loop through blocks
while (!stream.isDone())
{ // Loop through current block
Feature f = null;
try
{
f = codec.decode(stream);
if (f == null)
{
continue; // Skip
}
if ((chrAlias != null && !f.getChr().equals(chrAlias)) || f.getStart() > end)
{
if (blockIterator.hasNext())
{
advanceBlock();
continue;
}
else
{
return; // Done
}
}
if (f.getEnd() < start)
{
continue; // Skip
}
currentRecord = (T) f; // Success
return;
}
catch (TribbleException e)
{
e.setSource(locator.getPath());
throw e;
}
catch (NumberFormatException e)
{
String error = "Error parsing line: " + stream.getPosition();
throw new TribbleException.MalformedFeatureFile(error, locator.getPath(), e);
}
}
if (blockIterator != null && blockIterator.hasNext())
{
advanceBlock(); // Advance to next block
}
else
{
return; // No blocks left, we're done.
}
}
}
public void remove()
{
throw new UnsupportedOperationException("Remove is not supported.");
}
public void close()
{
if (stream != null) stream.close();
try
{
seekableStream.close(); // todo -- uncomment to fix bug
}
catch (IOException e)
{
throw new TribbleException("Couldn't close seekable stream", e);
}
}
public Iterator<T> iterator()
{
return this;
}
}
/**
*
* Wrapper around a SeekableStream that limits reading to the specified
* "block" of bytes. Attempts to
*
* read beyond the end of the block should return -1 (EOF).
*/
static class BlockStreamWrapper extends InputStream
{
SeekableStream seekableStream;
long maxPosition;
BlockStreamWrapper(SeekableStream seekableStream, Block block) throws IOException
{
this.seekableStream = seekableStream;
seekableStream.seek(block.getStartPosition());
maxPosition = block.getEndPosition();
}
@Override
public int read() throws IOException
{
return (seekableStream.position() > maxPosition) ? -1 : seekableStream.read();
}
@Override
public int read(byte[] bytes, int off, int len) throws IOException
{
// note the careful treatment here to ensure we can continue to
// read very long > Integer sized blocks
long maxBytes = maxPosition - seekableStream.position();
if (maxBytes <= 0)
{
return -1;
}
int bytesToRead = (int) Math.min(len, Math.min(maxBytes, Integer.MAX_VALUE));
return seekableStream.read(bytes, off, bytesToRead);
}
}
static class EmptyIterator<T extends Feature> implements CloseableTribbleIterator
{
public Iterator iterator()
{
return this;
}
public boolean hasNext()
{
return false;
}
public Object next()
{
return null;
}
public void remove()
{
}
@Override
public void close()
{
}
}
}