package htsjdk.samtools; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.BlockCompressedStreamConstants; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.Collections; import java.util.EnumSet; import java.util.zip.GZIPInputStream; /** * <p>Describes the functionality for producing {@link SamReader}, and offers a * handful of static generators.</p> * <pre> * SamReaderFactory.makeDefault().open(new File("/my/bam.bam"); * </pre> * <p>Example: Configure a factory</p> * <pre> * final {@link SamReaderFactory} factory = * SamReaderFactory.makeDefault() * .enable({@link Option#INCLUDE_SOURCE_IN_RECORDS}, {@link Option#VALIDATE_CRC_CHECKSUMS}) * .validationStringency({@link ValidationStringency#SILENT}); * * </pre> * <p>Example: Open two bam files from different sources, using different options</p> * <pre> * final {@link SamReaderFactory} factory = * SamReaderFactory.makeDefault() * .enable({@link Option#INCLUDE_SOURCE_IN_RECORDS}, {@link Option#VALIDATE_CRC_CHECKSUMS}) * .validationStringency({@link ValidationStringency#SILENT}); * * // File-based bam * final {@link SamReader} fileReader = factory.open(new File("/my/bam.bam")); * * // HTTP-hosted BAM with index from an arbitrary stream * final SeekableStream myBamIndexStream = ... * final {@link SamInputResource} resource = * {@link SamInputResource}.of(new URL("http://example.com/data.bam")).index(myBamIndexStream); * final {@link SamReader} complicatedReader = factory.open(resource); * </pre> * * @author mccowan */ public abstract class SamReaderFactory { private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; abstract public SamReader open(final File file); abstract public SamReader open(final SamInputResource resource); abstract public ValidationStringency validationStringency(); /** Set this factory's {@link htsjdk.samtools.SAMRecordFactory} to the provided one, then returns itself. */ abstract public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory); /** Enables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory enable(final Option... options); /** Disables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory disable(final Option... options); /** Set this factory's {@link ValidationStringency} to the provided one, then returns itself. */ abstract public SamReaderFactory validationStringency(final ValidationStringency validationStringency); private static SamReaderFactoryImpl DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) { SamReaderFactory.defaultValidationStringency = defaultValidationStringency; // The default may have changed, so reset the default SamReader DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); } /** Creates a copy of the default {@link SamReaderFactory}. */ public static SamReaderFactory makeDefault() { return SamReaderFactoryImpl.copyOf(DEFAULT); } /** * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, and * {@link htsjdk.samtools.DefaultSAMRecordFactory}. */ public static SamReaderFactory make() { return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance()); } private static class SamReaderFactoryImpl extends SamReaderFactory { private final static Log LOG = Log.getInstance(SamReaderFactory.class); private final EnumSet<Option> enabledOptions; private ValidationStringency validationStringency; private SAMRecordFactory samRecordFactory; private CustomReaderFactory customReaderFactory; private SamReaderFactoryImpl(final EnumSet<Option> enabledOptions, final ValidationStringency validationStringency, final SAMRecordFactory samRecordFactory) { this.enabledOptions = EnumSet.copyOf(enabledOptions); this.samRecordFactory = samRecordFactory; this.validationStringency = validationStringency; this.customReaderFactory = CustomReaderFactory.getInstance(); } @Override public SamReader open(final File file) { final SamInputResource r = SamInputResource.of(file); final File indexMaybe = SamFiles.findIndex(file); if (indexMaybe != null) r.index(indexMaybe); return open(r); } @Override public ValidationStringency validationStringency() { return validationStringency; } @Override public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory) { this.samRecordFactory = samRecordFactory; return this; } @Override public SamReaderFactory enable(final Option... options) { Collections.addAll(this.enabledOptions, options); return this; } @Override public SamReaderFactory disable(final Option... options) { for (final Option option : options) { this.enabledOptions.remove(option); } return this; } @Override public SamReaderFactory validationStringency(final ValidationStringency validationStringency) { this.validationStringency = validationStringency; return this; } @Override public SamReader open(final SamInputResource resource) { final SamReader.PrimitiveSamReader primitiveSamReader; try { final InputResource data = resource.data(); final InputResource indexMaybe = resource.indexMaybe(); final boolean indexDefined = indexMaybe != null; final InputResource.Type type = data.type(); if (type == InputResource.Type.URL) { SamReader reader = customReaderFactory.maybeOpen( data.asUrl()); if (reader != null) { return reader; } } if (type == InputResource.Type.SEEKABLE_STREAM || type == InputResource.Type.URL) { if (SamStreams.sourceLikeBam(data.asUnbufferedSeekableStream())) { final SeekableStream bufferedIndexStream; if (indexDefined && indexMaybe.asUnbufferedSeekableStream() != null) { bufferedIndexStream = IOUtil.maybeBufferedSeekableStream(indexMaybe.asUnbufferedSeekableStream()); } else { // TODO: Throw an exception here? An index _may_ have been provided, but we're ignoring it bufferedIndexStream = null; } primitiveSamReader = new BAMFileReader( IOUtil.maybeBufferedSeekableStream(data.asUnbufferedSeekableStream()), bufferedIndexStream, false, validationStringency, this.samRecordFactory ); } else { throw new SAMFormatException("Unrecognized file format: " + data.asUnbufferedSeekableStream()); } } else { final InputStream bufferedStream = IOUtil.maybeBufferInputStream( data.asUnbufferedInputStream(), Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE) ); final File sourceFile = data.asFile(); final File indexFile = indexMaybe == null ? null : indexMaybe.asFile(); if (SamStreams.isBAMFile(bufferedStream)) { if (sourceFile == null || !sourceFile.isFile()) { // Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, validationStringency, this.samRecordFactory); } else { bufferedStream.close(); primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, validationStringency, this.samRecordFactory); } } else if (BlockCompressedInputStream.isValidFile(bufferedStream)) { primitiveSamReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory); } else if (SamStreams.isGzippedSAMFile(bufferedStream)) { primitiveSamReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory); } else { if (indexDefined) { bufferedStream.close(); throw new RuntimeException("Cannot use index file with textual SAM file"); } primitiveSamReader = new SAMTextReader(bufferedStream, sourceFile, validationStringency, this.samRecordFactory); } } // Apply the options defined by this factory to this reader final SamReader.PrimitiveSamReaderToSamReaderAdapter reader = new SamReader.PrimitiveSamReaderToSamReaderAdapter(primitiveSamReader); for (final Option option : enabledOptions) { option.applyTo(reader); } return reader; } catch (final IOException e) { throw new RuntimeIOException(e); } } public static SamReaderFactory copyOf(final SamReaderFactoryImpl target) { return new SamReaderFactoryImpl(target.enabledOptions, target.validationStringency, target.samRecordFactory); } } /** A collection of binary {@link SamReaderFactory} options. */ public enum Option { /** * The factory's {@link SamReader}s will produce populated (non-null) values when calling {@link SAMRecord#getFileSource()}. * <p/> * This option increases memory footprint slightly per {@link htsjdk.samtools.SAMRecord}. */ INCLUDE_SOURCE_IN_RECORDS { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } }, /** * The factory's {@link SamReader}s' {@link SamReader#indexing()}'s calls to {@link SamReader.Indexing#getIndex()} will produce * {@link BAMIndex}es that do some caching in memory instead of reading the index from the disk for each query operation. * * @see SamReader#indexing() * @see htsjdk.samtools.SamReader.Indexing#getIndex() */ CACHE_FILE_BASED_INDEXES { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexCaching(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }, /** * The factory's {@link SamReader}s' will not use memory mapping for accessing index files (which is used by default). This is * slower but more scalable when accessing large numbers of BAM files sequentially. * * @see SamReader#indexing() * @see htsjdk.samtools.SamReader.Indexing#getIndex() */ DONT_MEMORY_MAP_INDEX { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexMemoryMapping(false); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }, /** * Eagerly decode {@link htsjdk.samtools.SamReader}'s {@link htsjdk.samtools.SAMRecord}s, which can reduce memory footprint if many * fields are being read per record, or if fields are going to be updated. */ EAGERLY_DECODE { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.setEagerDecode(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }, /** * For {@link htsjdk.samtools.SamReader}s backed by block-compressed streams, enable CRC validation of those streams. This is an * expensive operation, but serves to ensure validity of the stream. */ VALIDATE_CRC_CHECKSUMS { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableCrcChecking(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }; public static EnumSet<Option> DEFAULTS = EnumSet.noneOf(Option.class); /** Applies this option to the provided reader, if applicable. */ void applyTo(final SamReader.PrimitiveSamReaderToSamReaderAdapter reader) { final SamReader.PrimitiveSamReader underlyingReader = reader.underlyingReader(); if (underlyingReader instanceof BAMFileReader) { applyTo((BAMFileReader) underlyingReader, reader); } else if (underlyingReader instanceof SAMTextReader) { applyTo((SAMTextReader) underlyingReader, reader); } else { throw new IllegalArgumentException(String.format("Unrecognized reader type: %s.", underlyingReader.getClass())); } } private static void logDebugIgnoringOption(final SamReader r, final Option option) { LOG.debug(String.format("Ignoring %s option; does not apply to %s readers.", option, r.getClass().getSimpleName())); } private final static Log LOG = Log.getInstance(Option.class); abstract void applyTo(final BAMFileReader underlyingReader, final SamReader reader); abstract void applyTo(final SAMTextReader underlyingReader, final SamReader reader); } }