/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam; import htsjdk.samtools.BAMRecordCodec; import htsjdk.samtools.BamFileIoUtils; import htsjdk.samtools.DuplicateScoringStrategy; import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy; import htsjdk.samtools.MergingSamRecordIterator; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordQueryNameComparator; import htsjdk.samtools.SamFileHeaderMerger; import htsjdk.samtools.SamPairUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.PeekableIterator; import htsjdk.samtools.util.ProgressLogger; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.SortingCollection; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.SamOrBam; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * Class to fix mate pair information for all reads in a SAM file. Will run in fairly limited * memory unless there are lots of mate pairs that are far apart from each other in the file. * * @author Tim Fennell */ @CommandLineProgramProperties( usage = "Ensure that all mate-pair information is in sync between each read " + "and its mate pair. If no OUTPUT file is supplied then the output is written to a temporary file " + "and then copied over the INPUT file. Reads marked with the secondary alignment flag are written " + "to the output file unchanged.", usageShort = "Ensure that all mate-pair information is in sync between each read and its mate pair", programGroup = SamOrBam.class ) public class FixMateInformation extends CommandLineProgram { @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input file to fix.") public List<File> INPUT; @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional=true, doc="The output file to write to. If no output file is supplied, the input file is overwritten.") public File OUTPUT; @Option(shortName=StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, optional=true, doc="Optional sort order if the OUTPUT file should be sorted differently than the INPUT file.") public SortOrder SORT_ORDER; @Option(doc="If true, assume that the input file is queryname sorted, even if the header says otherwise.", shortName=StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME) public boolean ASSUME_SORTED = false; @Option(shortName="MC", optional=true, doc="Adds the mate CIGAR tag (MC) if true, does not if false.") public Boolean ADD_MATE_CIGAR = true; private static final Log log = Log.getInstance(FixMateInformation.class); protected SAMFileWriter out; public static void main(final String[] args) { new FixMateInformation().instanceMainWithExit(args); } protected int doWork() { // Open up the input boolean allQueryNameSorted = true; final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(); for (final File f : INPUT) { IOUtil.assertFileIsReadable(f); final SAMFileReader reader = new SAMFileReader(f); readers.add(reader); if (reader.getFileHeader().getSortOrder() != SortOrder.queryname) allQueryNameSorted = false; } // Decide where to write the fixed file - into the specified output file // or into a temporary file that will overwrite the INPUT file eventually if (OUTPUT != null) OUTPUT = OUTPUT.getAbsoluteFile(); final boolean differentOutputSpecified = OUTPUT != null; if (differentOutputSpecified) { IOUtil.assertFileIsWritable(OUTPUT); } else if (INPUT.size() != 1) { throw new PicardException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden."); } else { final File soleInput = INPUT.get(0).getAbsoluteFile(); final File dir = soleInput.getParentFile().getAbsoluteFile(); try { IOUtil.assertFileIsWritable(soleInput); IOUtil.assertDirectoryIsWritable(dir); OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir); } catch (final IOException ioe) { throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath()); } } // Get the input records merged and sorted by query name as needed final PeekableIterator<SAMRecord> iterator; final SAMFileHeader header; { // Deal with merging if necessary final Iterator<SAMRecord> tmp; if (INPUT.size() > 1) { final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size()); for (final SAMFileReader reader : readers) { headers.add(reader.getFileHeader()); } final SortOrder sortOrder = (allQueryNameSorted? SortOrder.queryname: SortOrder.unsorted); final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false); tmp = new MergingSamRecordIterator(merger, readers, false); header = merger.getMergedHeader(); } else { tmp = readers.get(0).iterator(); header = readers.get(0).getFileHeader(); } // And now deal with re-sorting if necessary if (ASSUME_SORTED || allQueryNameSorted) { iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(tmp), ADD_MATE_CIGAR); } else { log.info("Sorting input into queryname order."); final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); while (tmp.hasNext()) { sorter.add(tmp.next()); } iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) { @Override public void close() { super.close(); sorter.cleanup(); } }, ADD_MATE_CIGAR); log.info("Sorting by queryname complete."); } // Deal with the various sorting complications final SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER; log.info("Output will be sorted by " + outputSortOrder); header.setSortOrder(outputSortOrder); } if (CREATE_INDEX && header.getSortOrder() != SortOrder.coordinate){ throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate"); } createSamFileWriter(header); log.info("Traversing query name sorted records and fixing up mate pair information."); final ProgressLogger progress = new ProgressLogger(log); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } iterator.close(); if (header.getSortOrder() == SortOrder.queryname) { log.info("Closing output file."); } else { log.info("Finished processing reads; re-sorting output file."); } closeWriter(); // Lastly if we're fixing in place, swap the files if (!differentOutputSpecified) { log.info("Replacing input file with fixed file."); final File soleInput = INPUT.get(0).getAbsoluteFile(); final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old"); if (!old.exists() && soleInput.renameTo(old)) { if (OUTPUT.renameTo(soleInput)) { if (!old.delete()) { log.warn("Could not delete old file: " + old.getAbsolutePath()); return 1; } if (CREATE_INDEX) { final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length()-4) + ".bai"); final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length()-4) + ".bai"); if (!newIndex.renameTo(oldIndex)) { log.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath()); } } } else { log.error("Could not move new file to " + soleInput.getAbsolutePath()); log.error("Input file preserved as: " + old.getAbsolutePath()); log.error("New file preserved as: " + OUTPUT.getAbsolutePath()); return 1; } } else { log.error("Could not move input file out of the way: " + soleInput.getAbsolutePath()); if (!OUTPUT.delete()) { log.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath()); } return 1; } } return 0; } protected void createSamFileWriter(final SAMFileHeader header) { out = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, header.getSortOrder() == SortOrder.queryname, OUTPUT); } protected void writeAlignment(final SAMRecord sam) { out.addAlignment(sam); } protected void closeWriter() { out.close(); } }