package htsjdk.samtools.cram.paralell; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.cram.CramLossyOptions; import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.common.CramVersions; import htsjdk.samtools.cram.structure.CramHeader; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Log.LogLevel; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.lang.Thread.UncaughtExceptionHandler; import java.util.ArrayList; import java.util.List; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy; import java.util.concurrent.TimeUnit; import net.sf.cram.Bam2Cram; import net.sf.cram.CramTools; import net.sf.cram.ref.ReferenceSource; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; import com.beust.jcommander.converters.FileConverter; public class BamToCram { static Log log = Log.getInstance(BamToCram.class); private static void printUsage(JCommander jc) { StringBuilder sb = new StringBuilder(); sb.append("\n"); jc.usage(sb); System.out.println("Version " + Bam2Cram.class.getPackage().getImplementationVersion()); System.out.println(sb.toString()); } public static void main(String[] args) throws IOException, InterruptedException { Params params = new Params(); JCommander jc = new JCommander(params); try { jc.parse(args); } catch (Exception e) { System.out.println("Failed to parse parameteres, detailed message below: "); System.out.println(e.getMessage()); System.out.println(); System.out.println("See usage: -h"); System.exit(1); } if (args.length == 0 || params.help) { printUsage(jc); System.exit(1); } Log.setGlobalLogLevel(params.logLevel); if (params.referenceFasta == null) log.warn("No reference file specified, remote access over internet may be used to download public sequences. "); ReferenceSource referenceSource = new ReferenceSource(params.referenceFasta); Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { System.err.println("Exception in thread " + t); e.printStackTrace(); System.exit(1); } }); Log.setGlobalLogLevel(LogLevel.INFO); InputStream bamInputStream = new BufferedInputStream(params.bamFile == null ? System.in : new FileInputStream( params.bamFile)); OutputStream cramOutputStream = params.outputCramFile == null ? System.out : new FileOutputStream( params.outputCramFile); if (params.threads == 0) { params.threads = Math.max(5, Runtime.getRuntime().availableProcessors()); } else if (params.threads < 5) { System.err.println("Too few threads: minimum 4 threads required. "); System.exit(1); } final int threadsInThePool = params.threads - 1; final int bgzfUncompressionThreads = 1; final int cramWritingThreads = 1; final int bamSlicingThreads = 1; final int conversionThreads = threadsInThePool - bgzfUncompressionThreads - cramWritingThreads - bamSlicingThreads; final int queuesCapacity = conversionThreads * 2; log.info(String.format("thread pool size=%d, converion threads=%d, queues capacity=%d", threadsInThePool, conversionThreads, queuesCapacity)); log.info("Creating thread pool with size " + threadsInThePool); ThreadPoolExecutor executor = new ThreadPoolExecutor(threadsInThePool, threadsInThePool, 60L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(threadsInThePool * 2), new CallerRunsPolicy()); int bufSize = 1024 * 1024; PipedOutputStream uncompressedBamOutputStream = new PipedOutputStream(); PipedInputStream uncompressedBamInputStream = new PipedInputStream(uncompressedBamOutputStream, bufSize); StreamPump BGZF_uncompressionPump = new StreamPump(new BlockCompressedInputStream(bamInputStream), uncompressedBamOutputStream); BGZF_uncompressionPump.setName("BGZF_UNC_PUMP"); executor.execute(BGZF_uncompressionPump); SAMFileHeader samFileHeader = SAMFileHeader_Utils.readHeader(new BinaryCodec(uncompressedBamInputStream), ValidationStringency.SILENT, null); CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v3, new File(args[0]).getName(), samFileHeader); CramIO.writeCramHeader(cramHeader, cramOutputStream); Conveyer<OrderedByteArray> bam_OBA_conveyer = Conveyer.createWithQueueCapacity(queuesCapacity); SupplierJob<OrderedByteArray> bam_OBA_supplier = new SupplierJob<OrderedByteArray>(bam_OBA_conveyer, new Bam_OBA_Supplier(new BufferedInputStream(uncompressedBamInputStream))); bam_OBA_supplier.setName("BAM_SLICER_SUPPLIER"); Conveyer<OrderedByteArray> cram_OBA_conveyer = new OrderingConveyer<OrderedByteArray>(); List<Job> converterJobs = new ArrayList<Job>(); CramLossyOptions lossyOptions = CramLossyOptions.lossless(); for (int i = 0; i < conversionThreads; i++) { BamToCram_OBA_Function convertFunction = new BamToCram_OBA_Function(cramHeader, referenceSource, lossyOptions); convertFunction.setCaptureTags(params.captureTags); convertFunction.setIgnoreTags(params.ignoreTags); Job job = new TransformerJob<OrderedByteArray, OrderedByteArray>(bam_OBA_conveyer, cram_OBA_conveyer, convertFunction); job.setName("BC_CONVERTER_" + i); converterJobs.add(job); } executor.execute(bam_OBA_supplier); for (Job job : converterJobs) { executor.execute(job); } ConsumerJob<OrderedByteArray> cram_OBA_writeJob = new ConsumerJob<OrderedByteArray>(cram_OBA_conveyer, new OBAWriteConsumer(cramOutputStream)); cram_OBA_writeJob.setName("CRAM_BYTE_WRITE_JOB"); executor.execute(cram_OBA_writeJob); long time = System.currentTimeMillis(); while (!cram_OBA_writeJob.isDone()) { Thread.sleep(100); if (System.currentTimeMillis() - time > 1000) { log.info(String.format("BAM_OBA %s; CRAM_OBA %s", bam_OBA_conveyer.toString(), cram_OBA_conveyer.toString())); time = System.currentTimeMillis(); } } executor.shutdown(); CramIO.issueEOF(cramHeader.getVersion(), cramOutputStream); cramOutputStream.close(); } @Parameters(commandDescription = "BAM to CRAM multithreaded converter. ") static class Params { @Parameter(names = { "-l", "--log-level" }, description = "Change log level: DEBUG, INFO, WARNING, ERROR.", converter = CramTools.LevelConverter.class) Log.LogLevel logLevel = Log.LogLevel.ERROR; @Parameter(names = { "--input-bam-file", "-I" }, converter = FileConverter.class, description = "Path to a BAM file to be converted to CRAM. Omit if standard input (pipe).") File bamFile; @Parameter(names = { "--reference-fasta-file", "-R" }, converter = FileConverter.class, description = "The reference fasta file, uncompressed and indexed (.fai file, use 'samtools faidx'). ") File referenceFasta; @Parameter(names = { "--output-cram-file", "-O" }, converter = FileConverter.class, description = "The path for the output CRAM file. Omit if standard output (pipe).") File outputCramFile = null; @Parameter(names = { "-h", "--help" }, description = "Print help and quit") boolean help = false; @Parameter(names = { "--ignore-tags" }, description = "Ignore the tags listed, for example 'OQ:XA:XB'") String ignoreTags = ""; @Parameter(names = { "--capture-tags" }, description = "Capture the tags listed, for example 'OQ:XA:XB'") String captureTags = ""; @Parameter(names = { "--capture-all-tags" }, description = "Capture all tags.") boolean captureAllTags = false; @Parameter(names = { "--threads" }, description = "Number of threads to use (minimum 5; use 0 for number of available cores.") public int threads = 5; } }