package picard.sam; import htsjdk.samtools.BAMRecordCodec; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordQueryNameComparator; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.SamPairUtil; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; import htsjdk.samtools.util.SortingCollection; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.SamOrBam; import java.io.File; import java.util.Iterator; /** * This tool reverts the original base qualities (if specified) and adds the mate cigar tag to mapped BAMs. * If the file does not have OQs and already has mate cigar tags, nothing is done. * New BAM/BAI/MD5 files are created. * @author Nils Homer */ @CommandLineProgramProperties( usage = "Reverts the original base qualities and adds the mate cigar tag to read-group BAMs.", usageShort = "Reverts the original base qualities and adds the mate cigar tag to read-group BAMs", programGroup = SamOrBam.class ) public class RevertOriginalBaseQualitiesAndAddMateCigar extends CommandLineProgram { @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input SAM/BAM file to revert the state of.") public File INPUT; @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output SAM/BAM file to create.") public File OUTPUT; @Option(shortName="SO", doc="The sort order to create the reverted output file with." + "By default, the sort order will be the same as the input.", optional = true) public SAMFileHeader.SortOrder SORT_ORDER = null; @Option(shortName=StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc="True to restore original" + " qualities from the OQ field to the QUAL field if available.") public boolean RESTORE_ORIGINAL_QUALITIES = true; @Option(doc="The maximum number of records to examine to determine if we can exit early and not output, given that" + " there are a no original base qualities (if we are to restore) and mate cigars exist." + " Set to 0 to never skip the file.") public int MAX_RECORDS_TO_EXAMINE = 10000; private final static Log log = Log.getInstance(RevertOriginalBaseQualitiesAndAddMateCigar.class); public RevertOriginalBaseQualitiesAndAddMateCigar() { this.CREATE_INDEX = true; this.CREATE_MD5_FILE = true; } /** Default main method impl. */ public static void main(final String[] args) { new RevertOriginalBaseQualitiesAndAddMateCigar().instanceMainWithExit(args); } public int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); boolean foundPairedMappedReads = false; // Check if we can skip this file since it does not have OQ tags and the mate cigar tag is already there. final CanSkipSamFile skipSamFile = RevertOriginalBaseQualitiesAndAddMateCigar.canSkipSAMFile(INPUT, MAX_RECORDS_TO_EXAMINE, RESTORE_ORIGINAL_QUALITIES); log.info(skipSamFile.getMessage(MAX_RECORDS_TO_EXAMINE)); if (skipSamFile.canSkip()) return 0; final SAMFileReader in = new SAMFileReader(INPUT, true); final SAMFileHeader inHeader = in.getFileHeader(); // Build the output writer based on the correct sort order final SAMFileHeader outHeader = inHeader.clone(); if (null == SORT_ORDER) this.SORT_ORDER = inHeader.getSortOrder(); // same as the input outHeader.setSortOrder(SORT_ORDER); SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(CREATE_INDEX); SAMFileWriterFactory.setDefaultCreateMd5File(CREATE_MD5_FILE); final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, false, OUTPUT); // Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM); final ProgressLogger revertingProgress = new ProgressLogger(log, 1000000, " reverted OQs"); int numOriginalQualitiesRestored = 0; for (final SAMRecord record : in) { // Clean up reads that map off the end of the reference AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record); if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) { // revert the original base qualities record.setBaseQualities(record.getOriginalBaseQualities()); record.setOriginalBaseQualities(null); numOriginalQualitiesRestored++; } if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag()) foundPairedMappedReads = true; revertingProgress.record(record); sorter.add(record); } CloserUtil.close(in); log.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records"); /** * Iterator through sorting collection output * 1. Set mate cigar string and mate information * 2. push record into SAMFileWriter to the output */ final SamPairUtil.SetMateInfoIterator sorterIterator = new SamPairUtil.SetMateInfoIterator(sorter.iterator(), true); final ProgressLogger sorterProgress = new ProgressLogger(log, 1000000, " mate cigars added"); while (sorterIterator.hasNext()) { final SAMRecord record = sorterIterator.next(); out.addAlignment(record); sorterProgress.record(record); } sorterIterator.close(); CloserUtil.close(out); log.info("Updated " + sorterIterator.getNumMateCigarsAdded() + " records with mate cigar"); if (!foundPairedMappedReads) log.info("Did not find any paired mapped reads."); return 0; } /** * Used as a return for the canSkipSAMFile function. */ public enum CanSkipSamFile { CAN_SKIP("Can skip the BAM file", true), CANNOT_SKIP_FOUND_OQ("Cannot skip the BAM as we found a record with an OQ", false), CANNOT_SKIP_FOUND_NO_MC("Cannot skip the BAM as we found a mate with no mate cigar tag", false), FOUND_NO_EVIDENCE("Found no evidence of OQ or mate with no mate cigar in the first %d records. Will continue...", false); final private String format; final private boolean skip; private CanSkipSamFile(final String format, final boolean skip) { this.format = format; this.skip = skip; } public String getMessage(final int maxRecordsToExamine) { return String.format(this.format, maxRecordsToExamine); } public boolean canSkip() { return this.skip; } } /** * Checks if we can skip the SAM/BAM file when reverting origin base qualities and adding mate cigars. * @param inputFile the SAM/BAM input file * @param maxRecordsToExamine the maximum number of records to examine before quitting * @param revertOriginalBaseQualities true if we are to revert original base qualities, false otherwise * @return whether we can skip or not, and the explanation why. */ public static CanSkipSamFile canSkipSAMFile(final File inputFile, final int maxRecordsToExamine, final boolean revertOriginalBaseQualities) { final SAMFileReader in = new SAMFileReader(inputFile, true); final Iterator<SAMRecord> iterator = in.iterator(); int numRecordsExamined = 0; CanSkipSamFile returnType = CanSkipSamFile.FOUND_NO_EVIDENCE; while (iterator.hasNext() && numRecordsExamined < maxRecordsToExamine) { final SAMRecord record = iterator.next(); if (revertOriginalBaseQualities && null != record.getOriginalBaseQualities()) { // has OQ, break and return case #2 returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_OQ; break; } // check if mate pair and its mate is mapped if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { if (null == SAMUtils.getMateCigar(record)) { // has no MC, break and return case #2 returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_NO_MC; break; } else { // has MC, previously checked that it does not have OQ, break and return case #1 returnType = CanSkipSamFile.CAN_SKIP; break; } } numRecordsExamined++; } // no more records anyhow, so we can skip if (!iterator.hasNext() && CanSkipSamFile.FOUND_NO_EVIDENCE == returnType) { returnType = CanSkipSamFile.CAN_SKIP; } in.close(); return returnType; } }