package net.sf.cram; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.structure.CramHeader; import htsjdk.samtools.util.Log; import net.sf.cram.CramTools.LevelConverter; import net.sf.cram.FixBAMFileHeader.MD5MismatchError; import net.sf.cram.common.Utils; import net.sf.cram.ref.ReferenceSource; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; import com.beust.jcommander.converters.FileConverter; public class CramFixHeader { private static Log log = Log.getInstance(CramFixHeader.class); public static final String COMMAND = "fixheader"; public static void main(String[] args) throws IOException, MD5MismatchError { Params params = new Params(); JCommander jc = new JCommander(params); jc.setProgramName(COMMAND); try { jc.parse(args); } catch (Exception e) { System.out.println("Failed to parse parameters, detailed message below: "); System.out.println(e.getMessage()); System.out.println(); System.out.println("See usage: -h"); System.exit(1); } if (args.length == 0 || params.help) { Utils.printUsage(jc); System.exit(0); } Log.setGlobalLogLevel(params.logLevel); if (params.cramFile == null) { log.error("CRAM file is required. "); System.exit(1); } if (params.reference == null && params.confirmMD5) { log.error("Reference file is required to confirm MD5s. "); System.exit(1); } try { if (!checkURIPattenIsSensible(params.sequenceUrlPattern)) { log.error("URI pattern is not valid."); System.exit(1); } } catch (URISyntaxException e) { log.error(e.getMessage()); System.exit(1); } ReferenceSource referenceSource = params.reference == null ? null : new ReferenceSource(params.reference); FileInputStream fis = new FileInputStream(params.cramFile); CramHeader cramHeader = CramIO.readCramHeader(fis); FixBAMFileHeader fixer = new FixBAMFileHeader(referenceSource); fixer.setIgnoreMD5Mismatch(true); fixer.fixSequences(cramHeader.getSamFileHeader().getSequenceDictionary().getSequences()); fixer.addCramtoolsPG(cramHeader.getSamFileHeader()); CramHeader newHeader = cramHeader.clone(); if (!CramIO.replaceCramHeader(params.cramFile, newHeader)) { log.error("Failed to replace the header."); System.exit(1); } } private static boolean checkURIPattenIsSensible(String pattern) throws URISyntaxException { String uri = String.format(pattern, "d41d8cd98f00b204e9800998ecf8427e"); URI u = new URI(uri); // the uri has been parsed and contains the md5: return (u.toASCIIString().contains(uri)); } @Parameters(commandDescription = "A tool to fix CRAM header without re-writing the whole file.") static class Params { @Parameter(names = { "-l", "--log-level" }, description = "Change log level: DEBUG, INFO, WARNING, ERROR.", converter = LevelConverter.class) Log.LogLevel logLevel = Log.LogLevel.ERROR; @Parameter(names = { "--input-cram-file", "-I" }, converter = FileConverter.class, description = "The path to the CRAM file.") File cramFile; @Parameter(names = { "--reference-fasta-file", "-R" }, converter = FileConverter.class, description = "Path to the reference fasta file, it must be uncompressed and indexed (use 'samtools faidx' for example). ") File reference; @Parameter(names = { "-h", "--help" }) boolean help = false; @Parameter(names = { "--confirm-md5" }, description = "Calculate MD5 for sequences mentioned in the header. Requires --reference-fasta-file option.") boolean confirmMD5 = false; @Parameter(names = { "--inject-uri" }, description = "Inject URI for all reference sequences in the header.") boolean injectURI = false; @Parameter(names = { "--uri-pattern" }, description = "String formatting pattern for sequence URI to be injected.") String sequenceUrlPattern = "http://www.ebi.ac.uk/ena/cram/md5/%s"; } }