/* * Copyright (c) 2014 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package htsjdk.variant.variantcontext.writer; import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Md5CalculatingOutputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.index.IndexCreator; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.index.tabix.TabixIndexCreator; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.OutputStream; import java.util.EnumSet; /** * Created with IntelliJ IDEA. * User: thibault * Date: 3/7/14 * Time: 2:07 PM * * Provides methods for creating VariantContextWriters using the Builder pattern. * Replaces VariantContextWriterFactory. * * The caller must choose an output file or an output stream for the VariantContextWriter to write to. * When a file is chosen, the output stream is created implicitly based on Defaults and options passed to the builder. * When a stream is chosen, it is passed unchanged to the VariantContextWriter. * * Example: Create a series of files with buffering and indexing on the fly. * Determine the appropriate file type based on filename. * * VariantContextWriterBuilder builder = new VariantContextWriterBuilder() * .setReferenceDictionary(refDict) * .setOption(Options.INDEX_ON_THE_FLY) * .setBuffer(8192); * * VariantContextWriter sample1_writer = builder * .setOutputFile("sample1.vcf") * .build(); * VariantContextWriter sample2_writer = builder * .setOutputFile("sample2.bcf") * .build(); * VariantContextWriter sample3_writer = builder * .setOutputFile("sample3.vcf.bgzf") * .build(); * * Example: Explicitly turn off buffering and explicitly set the file type * * VariantContextWriterBuilder builder = new VariantContextWriterBuilder() * .setReferenceDictionary(refDict) * .setOption(Options.INDEX_ON_THE_FLY) * .unsetBuffering(); * * VariantContextWriter sample1_writer = builder * .setOutputFile("sample1.custom_extension") * .setOutputFileType(OutputType.VCF) * .build(); * VariantContextWriter sample2_writer = builder * .setOutputFile("sample2.custom_extension") * .setOutputFileType(OutputType.BLOCK_COMPRESSED_VCF) * .build(); */ public class VariantContextWriterBuilder { public static final EnumSet<Options> DEFAULT_OPTIONS = EnumSet.of(Options.INDEX_ON_THE_FLY); public static final EnumSet<Options> NO_OPTIONS = EnumSet.noneOf(Options.class); public enum OutputType { UNSPECIFIED, VCF, BCF, BLOCK_COMPRESSED_VCF, VCF_STREAM, BCF_STREAM } public static final EnumSet<OutputType> FILE_TYPES = EnumSet.of(OutputType.VCF, OutputType.BCF, OutputType.BLOCK_COMPRESSED_VCF); public static final EnumSet<OutputType> STREAM_TYPES = EnumSet.of(OutputType.VCF_STREAM, OutputType.BCF_STREAM); private SAMSequenceDictionary refDict = null; private OutputType outType = OutputType.UNSPECIFIED; private File outFile = null; private OutputStream outStream = null; private IndexCreator idxCreator = null; private int bufferSize = Defaults.BUFFER_SIZE; private boolean createMD5 = Defaults.CREATE_MD5; private EnumSet<Options> options = DEFAULT_OPTIONS.clone(); /** * Default constructor. Adds USE_ASYNC_IO to the Options if it is present in Defaults. */ public VariantContextWriterBuilder() { if (Defaults.USE_ASYNC_IO) options.add(Options.USE_ASYNC_IO); } /** * Set the reference dictionary to be used by VariantContextWriters created by this builder * * @param refDict the reference dictionary * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setReferenceDictionary(final SAMSequenceDictionary refDict) { this.refDict = refDict; return this; } /** * Set the output file for the next VariantContextWriter created by this builder * Determines file type implicitly from the filename * * @param outFile the file the VariantContextWriter will write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputFile(final File outFile) { this.outFile = outFile; this.outStream = null; determineOutputTypeFromFilename(); return this; } /** * Set the output file for the next VariantContextWriter created by this builder * Determines file type implicitly from the filename * * @param outFile the file the VariantContextWriter will write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputFile(final String outFile) { this.outFile = new File(outFile); this.outStream = null; determineOutputTypeFromFilename(); return this; } /** * Set the output file type for the next VariantContextWriter created by this builder * * @param outType the type of file the VariantContextWriter will write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputFileType(final OutputType outType) { if (!FILE_TYPES.contains(outType)) throw new IllegalArgumentException("Must choose a file type, not other output types."); if (this.outFile == null || this.outStream != null) throw new IllegalArgumentException("Cannot set a file type if the output is not to a file."); this.outType = outType; return this; } /** * Set the output VCF stream for the next VariantContextWriter created by this builder * If buffered writing is desired, caller must provide some kind of buffered OutputStream. * * @param outStream the output stream to write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputVCFStream(final OutputStream outStream) { this.outStream = outStream; this.outFile = null; this.outType = OutputType.VCF_STREAM; return this; } /** * Set the output BCF stream for the next VariantContextWriter created by this builder * If buffered writing is desired, caller must provide some kind of buffered OutputStream. * * @param outStream the output stream to write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputBCFStream(final OutputStream outStream) { this.outStream = outStream; this.outFile = null; this.outType = OutputType.BCF_STREAM; return this; } /** * Set the output stream (VCF, by default) for the next VariantContextWriter created by this builder * If buffered writing is desired, caller must provide some kind of buffered OutputStream. * * @param outStream the output stream to write to * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOutputStream(final OutputStream outStream) { return setOutputVCFStream(outStream); } /** * Set an IndexCreator for the next VariantContextWriter created by this builder * * @param idxCreator the IndexCreator to use * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setIndexCreator(final IndexCreator idxCreator) { this.idxCreator = idxCreator; return this; } /** * Do not pass an IndexCreator to the next VariantContextWriter created by this builder * * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder clearIndexCreator() { this.idxCreator = null; return this; } /** * Set a buffer size for the file output stream passed to the next VariantContextWriter created by this builder * Set to 0 for no buffering * Does not affect OutputStreams passed directly to VariantContextWriterBuilder * * @param bufferSize the buffer size to use * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setBuffer(final int bufferSize) { this.bufferSize = bufferSize; return this; } /** * Do not use buffering in the next VariantContextWriter created by this builder * Does not affect OutputStreams passed directly to VariantContextWriterBuilder * * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder unsetBuffering() { this.bufferSize = 0; return this; } /** * Choose whether to also create an MD5 digest file for the next VariantContextWriter created by this builder * * @param createMD5 boolean, true to create an MD5 digest * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setCreateMD5(final boolean createMD5) { this.createMD5 = createMD5; return this; } /** * Create an MD5 digest file for the next VariantContextWriter created by this builder * * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setCreateMD5() { return setCreateMD5(true); } /** * Don't create an MD5 digest file for the next VariantContextWriter created by this builder * * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder unsetCreateMD5() { return setCreateMD5(false); } /** * Replace the set of Options for the VariantContextWriterBuilder with a new set * * @param options the complete set of options to use * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOptions(final EnumSet<Options> options) { this.options = options; return this; } /** * Add one option to the set of Options for the VariantContextWriterBuilder, if it's not already present * * @param option the option to set * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder setOption(final Options option) { this.options.add(option); return this; } /** * Remove one option from the set of Options for the VariantContextWriterBuilder, if it's present * * @param option the option to unset * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder unsetOption(final Options option) { this.options.remove(option); return this; } /** * Remove all options from the set of Options for the VariantContextWriterBuilder * * @return this VariantContextWriterBuilder */ public VariantContextWriterBuilder clearOptions() { this.options = NO_OPTIONS; return this; } /** * Validate and build the VariantContextWriter * * @return the VariantContextWriter as specified by previous method calls */ public VariantContextWriter build() { VariantContextWriter writer = null; // don't allow FORCE_BCF to modify the outType state OutputType typeToBuild = this.outType; if (this.options.contains(Options.FORCE_BCF)) { if (FILE_TYPES.contains(this.outType)) typeToBuild = OutputType.BCF; else if (STREAM_TYPES.contains(this.outType)) typeToBuild = OutputType.BCF_STREAM; } OutputStream outStreamFromFile = this.outStream; if (FILE_TYPES.contains(this.outType)) { try { outStreamFromFile = IOUtil.maybeBufferOutputStream(new FileOutputStream(outFile), bufferSize); } catch (final FileNotFoundException e) { throw new RuntimeIOException("File not found: " + outFile, e); } if (createMD5) outStreamFromFile = new Md5CalculatingOutputStream(outStreamFromFile, new File(outFile.getAbsolutePath() + ".md5")); } switch (typeToBuild) { case UNSPECIFIED: throw new IllegalArgumentException("Must specify file or stream output type."); case VCF: if ((refDict == null) && (options.contains(Options.INDEX_ON_THE_FLY))) throw new IllegalArgumentException("A reference dictionary is required for creating Tribble indices on the fly"); writer = createVCFWriter(outFile, outStreamFromFile); break; case BLOCK_COMPRESSED_VCF: if (refDict == null) idxCreator = new TabixIndexCreator(TabixFormat.VCF); else idxCreator = new TabixIndexCreator(refDict, TabixFormat.VCF); writer = createVCFWriter(outFile, new BlockCompressedOutputStream(outStreamFromFile, outFile)); break; case BCF: if ((refDict == null) && (options.contains(Options.INDEX_ON_THE_FLY))) throw new IllegalArgumentException("A reference dictionary is required for creating Tribble indices on the fly"); writer = createBCFWriter(outFile, outStreamFromFile); break; case VCF_STREAM: if (options.contains(Options.INDEX_ON_THE_FLY)) throw new IllegalArgumentException("VCF index creation not supported for stream output."); writer = createVCFWriter(null, outStream); break; case BCF_STREAM: if (options.contains(Options.INDEX_ON_THE_FLY)) throw new IllegalArgumentException("BCF index creation not supported for stream output."); writer = createBCFWriter(null, outStream); break; } if (this.options.contains(Options.USE_ASYNC_IO)) writer = new AsyncVariantContextWriter(writer, AsyncVariantContextWriter.DEFAULT_QUEUE_SIZE); return writer; } private void determineOutputTypeFromFilename() { if (isBCF(this.outFile)) { this.outType = OutputType.BCF; } else if (isCompressedVCF(this.outFile)) { this.outType = OutputType.BLOCK_COMPRESSED_VCF; } else if (isVCF(this.outFile)) { this.outType = OutputType.VCF; } else { this.outType = OutputType.UNSPECIFIED; } } private boolean isVCF(final File outFile) { return outFile != null && outFile.getName().endsWith(".vcf"); } private boolean isBCF(final File outFile) { return outFile != null && outFile.getName().endsWith(".bcf"); } private boolean isCompressedVCF(final File outFile) { if (outFile == null) return false; return AbstractFeatureReader.hasBlockCompressedExtension(outFile); } private VariantContextWriter createVCFWriter(final File writerFile, final OutputStream writerStream) { if (idxCreator == null) { return new VCFWriter(writerFile, writerStream, refDict, options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES), options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), options.contains(Options.WRITE_FULL_FORMAT_FIELD)); } else { return new VCFWriter(writerFile, writerStream, refDict, idxCreator, options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES), options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), options.contains(Options.WRITE_FULL_FORMAT_FIELD)); } } private VariantContextWriter createBCFWriter(final File writerFile, final OutputStream writerStream) { if (idxCreator == null) { return new BCF2Writer(writerFile, writerStream, refDict, options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES)); } else { return new BCF2Writer(writerFile, writerStream, refDict, idxCreator, options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES)); } } }