package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
import java.io.Closeable;
import java.text.MessageFormat;
/**
* Describes functionality for objects that produce {@link SAMRecord}s and associated information.
*
* @author mccowan
*/
public interface SamReader extends Iterable<SAMRecord>, Closeable {
/** Describes a type of SAM file. */
public abstract class Type {
/** A string representation of this type. */
abstract String name();
/** The recommended file extension for SAMs of this type, without a period. */
abstract String fileExtension();
/** The recommended file extension for SAM indexes of this type, without a period, or null if this type is not associated with indexes. */
abstract String indexExtension();
static class TypeImpl extends Type {
final String name, fileExtension, indexExtension;
TypeImpl(final String name, final String fileExtension, final String indexExtension) {
this.name = name;
this.fileExtension = fileExtension;
this.indexExtension = indexExtension;
}
@Override
String name() {
return name;
}
@Override
String fileExtension() {
return fileExtension;
}
@Override
String indexExtension() {
return indexExtension;
}
@Override
public String toString() {
return String.format("TypeImpl{name='%s', fileExtension='%s', indexExtension='%s'}", name, fileExtension, indexExtension);
}
}
public static Type BAM_TYPE = new TypeImpl("BAM", "bam", "bai");
public static Type SAM_TYPE = new TypeImpl("SAM", "sam", null);
}
/**
* Facet for index-related operations.
*/
public interface Indexing {
/**
* Retrieves the index for the given file type. Ensure that the index is of the specified type.
*
* @return An index of the given type.
*/
public BAMIndex getIndex();
/**
* Returns true if the supported index is browseable, meaning the bins in it can be traversed
* and chunk data inspected and retrieved.
*
* @return True if the index supports the BrowseableBAMIndex interface. False otherwise.
*/
public boolean hasBrowseableIndex();
/**
* Gets an index tagged with the BrowseableBAMIndex interface. Throws an exception if no such
* index is available.
*
* @return An index with a browseable interface, if possible.
* @throws SAMException if no such index is available.
*/
public BrowseableBAMIndex getBrowseableIndex();
/**
* Iterate through the given chunks in the file.
*
* @param chunks List of chunks for which to retrieve data.
* @return An iterator over the given chunks.
*/
public SAMRecordIterator iterator(final SAMFileSpan chunks);
/**
* Gets a pointer spanning all reads in the BAM file.
*
* @return Unbounded pointer to the first record, in chunk format.
*/
public SAMFileSpan getFilePointerSpanningReads();
}
public SAMFileHeader getFileHeader();
/**
* @return Answers {@code true} if this is a BAM reader.
*/
public Type type();
/**
* @return true if ths is a BAM file, and has an index
*/
public boolean hasIndex();
/**
* Exposes the {@link SamReader.Indexing} facet of this {@link SamReader}.
*
* @throws java.lang.UnsupportedOperationException If {@link #hasIndex()} returns false.
*/
public Indexing indexing();
/**
* Iterate through file in order. For a SAMFileReader constructed from an InputStream, and for any SAM file,
* a 2nd iteration starts where the 1st one left off. For a BAM constructed from a SeekableStream or File, each new iteration
* starts at the first record.
* <p/>
* Only a single open iterator on a SAM or BAM file may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
*/
public SAMRecordIterator iterator();
/**
* Iterate over records that match the given interval. Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
* in parallel over the same underlying file.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param sequence Reference sequence of interest.
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
* @param contained If true, each SAMRecord returned is will have its alignment completely contained in the
* interval of interest. If false, the alignment of the returned SAMRecords need only overlap the interval of interest.
* @return Iterator over the SAMRecords matching the interval.
*/
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained);
/**
* Iterate over records that overlap the given interval. Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param sequence Reference sequence of interest.
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
* @return Iterator over the SAMRecords overlapping the interval.
*/
public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end);
/**
* Iterate over records that are contained in the given interval. Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param sequence Reference sequence of interest.
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
* @return Iterator over the SAMRecords contained in the interval.
*/
public SAMRecordIterator queryContained(final String sequence, final int start, final int end);
/**
* Iterate over records that match one of the given intervals. This may be more efficient than querying
* each interval separately, because multiple reads of the same SAMRecords is avoided.
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
* in parallel over the same underlying file.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match an interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
* and abutting intervals merged. This can be done with
* htsjdk.samtools.SAMFileReader.QueryInterval#optimizeIntervals(htsjdk.samtools.SAMFileReader.QueryInterval[])
* @param contained If true, each SAMRecord returned is will have its alignment completely contained in one of the
* intervals of interest. If false, the alignment of the returned SAMRecords need only overlap one of
* the intervals of interest.
* @return Iterator over the SAMRecords matching the interval.
*/
public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained);
/**
* Iterate over records that overlap any of the given intervals. This may be more efficient than querying
* each interval separately, because multiple reads of the same SAMRecords is avoided.
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
* and abutting intervals merged. This can be done with
* htsjdk.samtools.SAMFileReader.QueryInterval#optimizeIntervals(htsjdk.samtools.SAMFileReader.QueryInterval[])
* @return Iterator over the SAMRecords overlapping any of the intervals.
*/
public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals);
/**
* Iterate over records that are contained in the given interval. This may be more efficient than querying
* each interval separately, because multiple reads of the same SAMRecords is avoided.
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* is in the query region.
*
* @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
* and abutting intervals merged. This can be done with
* htsjdk.samtools.SAMFileReader.QueryInterval#optimizeIntervals(htsjdk.samtools.SAMFileReader.QueryInterval[])
* @return Iterator over the SAMRecords contained in any of the intervals.
*/
public SAMRecordIterator queryContained(final QueryInterval[] intervals);
public SAMRecordIterator queryUnmapped();
/**
* Iterate over records that map to the given sequence and start at the given position. Only valid to call this if hasIndex() == true.
* <p/>
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
* and then discarded because they do not match the interval of interest.
* <p/>
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
* matches the arguments.
*
* @param sequence Reference sequence of interest.
* @param start Alignment start of interest.
* @return Iterator over the SAMRecords with the given alignment start.
*/
public SAMRecordIterator queryAlignmentStart(final String sequence, final int start);
/**
* Fetch the mate for the given read. Only valid to call this if hasIndex() == true.
* This will work whether the mate has a coordinate or not, so long as the given read has correct
* mate information. This method iterates over the SAM file, so there may not be an unclosed
* iterator on the SAM file when this method is called.
* <p/>
* Note that it is not possible to call queryMate when iterating over the SAMFileReader, because queryMate
* requires its own iteration, and there cannot be two simultaneous iterations on the same SAMFileReader. The
* work-around is to open a second SAMFileReader on the same input file, and call queryMate on the second
* reader.
*
* @param rec Record for which mate is sought. Must be a paired read.
* @return rec's mate, or null if it cannot be found.
*/
public SAMRecord queryMate(final SAMRecord rec);
/**
* The minimal subset of functionality to implement to conform with the requirements of
* {@link SamReader.PrimitiveSamReaderToSamReaderAdapter}.
*/
public interface PrimitiveSamReader {
Type type();
boolean hasIndex();
BAMIndex getIndex();
SAMFileHeader getFileHeader();
CloseableIterator<SAMRecord> getIterator();
CloseableIterator<SAMRecord> getIterator(SAMFileSpan fileSpan);
SAMFileSpan getFilePointerSpanningReads();
CloseableIterator<SAMRecord> query(QueryInterval[] intervals, boolean contained);
CloseableIterator<SAMRecord> queryAlignmentStart(String sequence, int start);
CloseableIterator<SAMRecord> queryUnmapped();
void close();
ValidationStringency getValidationStringency();
}
/** Decorator for a {@link SamReader.PrimitiveSamReader} that expands its functionality into a {@link SamReader}. */
class PrimitiveSamReaderToSamReaderAdapter implements SamReader, Indexing {
final PrimitiveSamReader p;
public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p) {
this.p = p;
}
PrimitiveSamReader underlyingReader() {
return p;
}
@Override
public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end) {
return query(sequence, start, end, false);
}
@Override
public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) {
return query(intervals, false);
}
@Override
public SAMRecordIterator queryContained(final String sequence, final int start, final int end) {
return query(sequence, start, end, true);
}
@Override
public SAMRecordIterator queryContained(final QueryInterval[] intervals) {
return query(intervals, true);
}
@Override
public SAMRecord queryMate(final SAMRecord rec) {
if (!rec.getReadPairedFlag()) {
throw new IllegalArgumentException("queryMate called for unpaired read.");
}
if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
}
final boolean firstOfPair = rec.getFirstOfPairFlag();
final CloseableIterator<SAMRecord> it;
if (rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
it = queryUnmapped();
} else {
it = queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
}
try {
SAMRecord mateRec = null;
while (it.hasNext()) {
final SAMRecord next = it.next();
if (!next.getReadPairedFlag()) {
if (rec.getReadName().equals(next.getReadName())) {
throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
}
continue;
}
if (firstOfPair) {
if (next.getFirstOfPairFlag()) continue;
} else {
if (next.getSecondOfPairFlag()) continue;
}
if (rec.getReadName().equals(next.getReadName())) {
if (mateRec != null) {
throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() +
" for " + (firstOfPair ? "second" : "first") + " end.");
}
mateRec = next;
}
}
return mateRec;
} finally {
it.close();
}
}
@Override
public boolean hasBrowseableIndex() {
return hasIndex() && getIndex() instanceof BrowseableBAMIndex;
}
@Override
public BrowseableBAMIndex getBrowseableIndex() {
final BAMIndex index = getIndex();
if (!(index instanceof BrowseableBAMIndex))
throw new SAMException("Cannot return index: index created by BAM is not browseable.");
return BrowseableBAMIndex.class.cast(index);
}
@Override
public SAMRecordIterator iterator() {
return new AssertingIterator(p.getIterator());
}
@Override
public SAMRecordIterator iterator(final SAMFileSpan chunks) {
return new AssertingIterator(p.getIterator(chunks));
}
@Override
public void close() {
p.close();
}
@Override
public SAMFileSpan getFilePointerSpanningReads() {
return p.getFilePointerSpanningReads();
}
@Override
public SAMFileHeader getFileHeader() {
return p.getFileHeader();
}
@Override
public Type type() {
return p.type();
}
@Override
public boolean hasIndex() {
return p.hasIndex();
}
@Override
public Indexing indexing() {
return this;
}
@Override
public BAMIndex getIndex() {
return p.getIndex();
}
@Override
public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) {
return AssertingIterator.of(p.query(intervals, contained));
}
@Override
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
return query(new QueryInterval[]{new QueryInterval(getFileHeader().getSequenceIndex(sequence), start, end)}, contained);
}
@Override
public SAMRecordIterator queryUnmapped() {
return AssertingIterator.of(p.queryUnmapped());
}
@Override
public SAMRecordIterator queryAlignmentStart(final String sequence, final int start) {
return AssertingIterator.of(p.queryAlignmentStart(sequence, start));
}
}
static class AssertingIterator implements SAMRecordIterator {
static AssertingIterator of(final CloseableIterator<SAMRecord> iterator) {
return new AssertingIterator(iterator);
}
private final CloseableIterator<SAMRecord> wrappedIterator;
private SAMRecord previous = null;
private SAMRecordComparator comparator = null;
public AssertingIterator(final CloseableIterator<SAMRecord> iterator) {
wrappedIterator = iterator;
}
public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) {
if (sortOrder == null || sortOrder == SAMFileHeader.SortOrder.unsorted) {
comparator = null;
return this;
}
comparator = sortOrder.getComparatorInstance();
return this;
}
public SAMRecord next() {
final SAMRecord result = wrappedIterator.next();
if (comparator != null) {
if (previous != null) {
if (comparator.fileOrderCompare(previous, result) > 0) {
throw new IllegalStateException(MessageFormat.format(
"Records {0} ({1}:{2}) should come after {3} ({4}:{5}) when sorting with {6}",
previous.getReadName(),
previous.getReferenceName(),
previous.getAlignmentStart(),
result.getReadName(),
result.getReferenceName(),
result.getAlignmentStart(),
comparator.getClass().getName())
);
}
}
previous = result;
}
return result;
}
public void close() { wrappedIterator.close(); }
public boolean hasNext() { return wrappedIterator.hasNext(); }
public void remove() { wrappedIterator.remove(); }
}
}