/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam; import htsjdk.samtools.SAMValidationError; import htsjdk.samtools.SamFileValidator; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.BamIndexValidator.IndexValidationStringency; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.IOUtil; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.SamOrBam; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; /** * Command line program wrapping SamFileValidator. * * @author Doug Voet */ @CommandLineProgramProperties( usage = ValidateSamFile.USAGE_SUMMARY + ValidateSamFile.USAGE_DETAILS, usageShort = ValidateSamFile.USAGE_SUMMARY, programGroup = SamOrBam.class ) public class ValidateSamFile extends CommandLineProgram { static final String USAGE_SUMMARY = "Validates a SAM or BAM file. "; static final String USAGE_DETAILS = "<p>This tool reports on the validity of a SAM or BAM file relative to the SAM format " + "specification. This is useful for troubleshooting errors encountered with other tools that may be caused by improper " + "formatting, faulty alignments, incorrect flag values, etc. </p> " + "<p>By default, the tool runs in VERBOSE mode and will exit after finding 100 errors and output them to the console (stdout). " + "Therefore, it is often more practical to run this tool initially using the MODE=SUMMARY option. This mode outputs a summary " + "table listing the numbers of all 'errors' and 'warnings'.</p> "+ "<p>When fixing errors in your file, it is often useful to prioritize the severe validation errors and ignore the " + "errors/warnings of lesser concern. This can be done using the IGNORE and/or IGNORE_WARNINGS arguments. For helpful " + "suggestions on error prioritization, please follow this link to obtain additional documentation on <a href='https://www.broadinstitute.org/gatk/guide/article?id=7571'>ValidateSamFile</a>.</p>" + "<p>After identifying and fixing your 'warnings/errors', we recommend that you rerun this tool to validate your SAM/BAM " + "file prior to proceeding with your downstream analysis. This will verify that all problems in your file have been addressed.</p>" + "<h4>Usage example:</h4>" + "<pre>" + "java -jar picard.jar ValidateSamFile \\<br />" + " I=input.bam \\<br />" + " MODE=SUMMARY" + "</pre>" + "<p>To obtain a complete list with descriptions of both 'ERROR' and 'WARNING' messages, please see our additional " + " <a href='https://www.broadinstitute.org/gatk/guide/article?id=7571'>documentation</a> for this tool.</p>" + ""+ "<hr />"; public enum Mode {VERBOSE, SUMMARY} @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM/BAM file") public File INPUT; @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output file or standard out if missing", optional = true) public File OUTPUT; @Option(shortName = "M", doc = "Mode of output") public Mode MODE = Mode.VERBOSE; @Option(doc = "List of validation error types to ignore.") public List<SAMValidationError.Type> IGNORE = new ArrayList<SAMValidationError.Type>(); @Option(shortName = "MO", doc = "The maximum number of lines output in verbose mode") public Integer MAX_OUTPUT = 100; @Option(doc = "If true, only report errors and ignore warnings.") public boolean IGNORE_WARNINGS = false; @Option(doc = "DEPRECATED. Use INDEX_VALIDATION_STRINGENCY instead. If true and input is " + "a BAM file with an index file, also validates the index. Until this parameter is retired " + "VALIDATE INDEX and INDEX_VALIDATION_STRINGENCY must agree on whether to validate the index.") public boolean VALIDATE_INDEX = true; @Option(doc = "If set to anything other than IndexValidationStringency.NONE and input is " + "a BAM file with an index file, also validates the index at the specified stringency. " + "Until VALIDATE_INDEX is retired, VALIDATE INDEX and INDEX_VALIDATION_STRINGENCY " + "must agree on whether to validate the index.") public IndexValidationStringency INDEX_VALIDATION_STRINGENCY = IndexValidationStringency.EXHAUSTIVE; @Option(shortName = "BISULFITE", doc = "Whether the SAM or BAM file consists of bisulfite sequenced reads. " + "If so, C->T is not counted as an error in computing the value of the NM tag.") public boolean IS_BISULFITE_SEQUENCED = false; @Option(doc = "Relevant for a coordinate-sorted file containing read pairs only. " + "Maximum number of file handles to keep open when spilling mate info to disk. " + "Set this number a little lower than the per-process maximum number of file that may be open. " + "This number can be found by executing the 'ulimit -n' command on a Unix system.") public int MAX_OPEN_TEMP_FILES = 8000; public static void main(final String[] args) { System.exit(new ValidateSamFile().instanceMain(args)); } @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); ReferenceSequenceFile reference = null; if (REFERENCE_SEQUENCE != null) { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); } final PrintWriter out; if (OUTPUT != null) { IOUtil.assertFileIsWritable(OUTPUT); try { out = new PrintWriter(OUTPUT); } catch (FileNotFoundException e) { // we already asserted this so we should not get here throw new PicardException("Unexpected exception", e); } } else { out = new PrintWriter(System.out); } boolean result; final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) .validationStringency(ValidationStringency.SILENT) .enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS); final SamReader samReader = factory.open(INPUT); if (samReader.type() != SamReader.Type.BAM_TYPE) VALIDATE_INDEX = false; factory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, VALIDATE_INDEX); factory.reapplyOptions(samReader); final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES); validator.setErrorsToIgnore(IGNORE); if (IGNORE_WARNINGS) { validator.setIgnoreWarnings(IGNORE_WARNINGS); } if (MODE == Mode.SUMMARY) { validator.setVerbose(false, 0); } else { validator.setVerbose(true, MAX_OUTPUT); } if (IS_BISULFITE_SEQUENCED) { validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED); } if (VALIDATE_INDEX) { validator.setIndexValidationStringency(VALIDATE_INDEX ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE); } if (IOUtil.isRegularPath(INPUT)) { // Do not check termination if reading from a stream validator.validateBamFileTermination(INPUT); } result = false; switch (MODE) { case SUMMARY: result = validator.validateSamFileSummary(samReader, reference); break; case VERBOSE: result = validator.validateSamFileVerbose(samReader, reference); break; } out.flush(); return result ? 0 : 1; } @Override protected String[] customCommandLineValidation() { if ((!VALIDATE_INDEX && INDEX_VALIDATION_STRINGENCY != IndexValidationStringency.NONE) || (VALIDATE_INDEX && INDEX_VALIDATION_STRINGENCY == IndexValidationStringency.NONE)) { return new String[]{"VALIDATE_INDEX and INDEX_VALIDATION_STRINGENCY must be consistent: " + "VALIDATE_INDEX is " + VALIDATE_INDEX + " and INDEX_VALIDATION_STRINGENCY is " + INDEX_VALIDATION_STRINGENCY}; } return super.customCommandLineValidation(); } }