/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam; import htsjdk.samtools.MergingSamRecordIterator; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SamFileHeaderMerger; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.SamOrBam; import java.io.File; import java.util.ArrayList; import java.util.List; /** * Reads a SAM or BAM file and combines the output to one file * * @author Tim Fennell */ @CommandLineProgramProperties( usage = "Merges multiple SAM/BAM files into one file.", usageShort = "Merges multiple SAM or BAM files into one file", programGroup = SamOrBam.class ) public class MergeSamFiles extends CommandLineProgram { private static final Log log = Log.getInstance(MergeSamFiles.class); @Option(shortName="I", doc="SAM or BAM input file", minElements=1) public List<File> INPUT = new ArrayList<File>(); @Option(shortName="O", doc="SAM or BAM file to write merged result to") public File OUTPUT; @Option(shortName=StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, doc="Sort order of output file", optional=true) public SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate; @Option(doc="If true, assume that the input files are in the same sort order as the requested output sort order, even if their headers say otherwise.", shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME) public boolean ASSUME_SORTED = false; @Option(shortName="MSD", doc="Merge the sequence dictionaries", optional=true) public boolean MERGE_SEQUENCE_DICTIONARIES = false; @Option(doc="Option to create a background thread to encode, " + "compress and write to disk the output file. The threaded version uses about 20% more CPU and decreases " + "runtime by ~20% when writing out a compressed BAM file.") public boolean USE_THREADING = false; @Option(doc="Comment(s) to include in the merged output file's header.", optional=true, shortName="CO") public List<String> COMMENT = new ArrayList<String>(); private static final int PROGRESS_INTERVAL = 1000000; /** Required main method implementation. */ public static void main(final String[] argv) { System.exit(new MergeSamFiles().instanceMain(argv)); } /** Combines multiple SAM/BAM files into one. */ @Override protected int doWork() { boolean matchedSortOrders = true; // Open the files for reading and writing final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); { SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SAMFileReader in = new SAMFileReader(inFile); readers.add(in); headers.add(in.getFileHeader()); // A slightly hackish attempt to keep memory consumption down when merging multiple files with // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then // replace the duplicate copies with a single dictionary to reduce the memory footprint. if (dict == null) { dict = in.getFileHeader().getSequenceDictionary(); } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) { in.getFileHeader().setSequenceDictionary(dict); } matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; } } // If all the input sort orders match the output sort order then just merge them and // write on the fly, otherwise setup to merge and sort before writing out the final file IOUtil.assertFileIsWritable(OUTPUT); final boolean presorted; final SAMFileHeader.SortOrder headerMergerSortOrder; final boolean mergingSamRecordIteratorAssumeSorted; if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) { log.info("Input files are in same order as output so sorting to temp directory is not needed."); headerMergerSortOrder = SORT_ORDER; mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED; presorted = true; } else { log.info("Sorting input files using temp directory " + TMP_DIR); headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; mergingSamRecordIteratorAssumeSorted = false; presorted = false; } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES); final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted); final SAMFileHeader header = headerMerger.getMergedHeader(); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(SORT_ORDER); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); if (USE_THREADING) { samFileWriterFactory.setUseAsyncIo(true); } final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT); // Lastly loop through and write out the records final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } log.info("Finished reading inputs."); out.close(); return 0; } @Override protected String[] customCommandLineValidation() { if (CREATE_INDEX && SORT_ORDER != SAMFileHeader.SortOrder.coordinate) { return new String[]{"Can't CREATE_INDEX unless SORT_ORDER is coordinate"}; } return null; } }