package htsjdk.samtools.cram.paralell; import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.structure.CramHeader; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.BlockCompressedStreamConstants; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Log.LogLevel; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.Thread.UncaughtExceptionHandler; import java.util.ArrayList; import java.util.List; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy; import java.util.concurrent.TimeUnit; import net.sf.cram.Bam2Cram; import net.sf.cram.CramTools; import net.sf.cram.ref.ReferenceSource; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; import com.beust.jcommander.converters.FileConverter; public class CramToBam { static Log log = Log.getInstance(CramToBam.class); private static void printUsage(JCommander jc) { StringBuilder sb = new StringBuilder(); sb.append("\n"); jc.usage(sb); System.out.println("Version " + Bam2Cram.class.getPackage().getImplementationVersion()); System.out.println(sb.toString()); } public static void main(String[] args) throws IOException, InterruptedException { Params params = new Params(); JCommander jc = new JCommander(params); try { jc.parse(args); } catch (Exception e) { System.out.println("Failed to parse parameteres, detailed message below: "); System.out.println(e.getMessage()); System.out.println(); System.out.println("See usage: -h"); System.exit(1); } if (args.length == 0 || params.help) { printUsage(jc); System.exit(1); } Log.setGlobalLogLevel(params.logLevel); if (params.referenceFasta == null) log.warn("No reference file specified, remote access over internet may be used to download public sequences. "); ReferenceSource referenceSource = new ReferenceSource(params.referenceFasta); Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { System.err.println("Exception in thread " + t); e.printStackTrace(); System.exit(1); } }); Log.setGlobalLogLevel(LogLevel.INFO); InputStream cramInputStream = new BufferedInputStream(params.cramFile == null ? System.in : new FileInputStream(params.cramFile)); OutputStream bamOutputStream = params.outputBamFile == null ? System.out : new FileOutputStream( params.outputBamFile); if (params.threads == 0) { params.threads = Math.max(4, Runtime.getRuntime().availableProcessors()); } else if (params.threads < 4) { System.err.println("Too few threads: minimum 4 threads required. "); System.exit(1); } final int threadsInThePool = params.threads - 1; final int cramContainerSupplierThreads = 1; final int bamObaConsumerThreads = 1; final int conversionThreads = threadsInThePool - cramContainerSupplierThreads - bamObaConsumerThreads; final int queuesCapacity = conversionThreads * 2; log.info(String.format("thread pool size=%d, converion threads=%d, queues capacity=%d", threadsInThePool, conversionThreads, queuesCapacity)); CramHeader cramHeader = CramIO.readCramHeader(cramInputStream); CramContainer_OBA_Supplier container_OBA_Supplier = new CramContainer_OBA_Supplier(cramInputStream, cramHeader); Conveyer<OrderedByteArray> cramContainer_OBA_conveyer = Conveyer.createWithQueueCapacity(queuesCapacity); SupplierJob<OrderedByteArray> cramContainer_OBA_SupplierJob = new SupplierJob<OrderedByteArray>( cramContainer_OBA_conveyer, container_OBA_Supplier); Conveyer<OrderedByteArray> bam_OBA_conveyer = new OrderingConveyer<OrderedByteArray>(); List<Job> converterJobs = new ArrayList<Job>(); for (int i = 0; i < conversionThreads; i++) { CramToBam_OBA_Function f = new CramToBam_OBA_Function(cramHeader, referenceSource); TransformerJob<OrderedByteArray, OrderedByteArray> job = new TransformerJob<OrderedByteArray, OrderedByteArray>( cramContainer_OBA_conveyer, bam_OBA_conveyer, f); converterJobs.add(job); } BlockCompressedOutputStream blockOS = new BlockCompressedOutputStream(bamOutputStream, null); BinaryCodec outputBinaryCodec = new BinaryCodec(); outputBinaryCodec.setOutputStream(blockOS); SAMFileHeader_Utils.writeHeader(outputBinaryCodec, cramHeader.getSamFileHeader()); blockOS.flush(); OBAWriteConsumer bam_OBA_writeConsumer = new OBAWriteConsumer(bamOutputStream); ConsumerJob<OrderedByteArray> bam_OBA_writeJob = new ConsumerJob<OrderedByteArray>(bam_OBA_conveyer, bam_OBA_writeConsumer); log.info("Creating thread pool with size " + threadsInThePool); ThreadPoolExecutor executor = new ThreadPoolExecutor(threadsInThePool, threadsInThePool, 60L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(threadsInThePool * 2), new CallerRunsPolicy()); executor.execute(cramContainer_OBA_SupplierJob); for (Job job : converterJobs) executor.execute(job); executor.execute(bam_OBA_writeJob); long time = System.currentTimeMillis(); while (!bam_OBA_writeJob.isDone()) { Thread.sleep(100); if (System.currentTimeMillis() - time > 1000) { log.info(String.format("CRAM_OBA %s; BAM_OBA %s", cramContainer_OBA_conveyer.toString(), bam_OBA_conveyer.toString())); time = System.currentTimeMillis(); } } executor.shutdown(); bamOutputStream.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); bamOutputStream.close(); } @Parameters(commandDescription = "BAM to CRAM multithreaded converter. ") static class Params { @Parameter(names = { "-l", "--log-level" }, description = "Change log level: DEBUG, INFO, WARNING, ERROR.", converter = CramTools.LevelConverter.class) Log.LogLevel logLevel = Log.LogLevel.ERROR; @Parameter(names = { "--input-cram-file", "-I" }, converter = FileConverter.class, description = "Path to a BAM file to be converted to CRAM. Omit if standard input (pipe).") File cramFile; @Parameter(names = { "--reference-fasta-file", "-R" }, converter = FileConverter.class, description = "The reference fasta file, uncompressed and indexed (.fai file, use 'samtools faidx'). ") File referenceFasta; @Parameter(names = { "--output-bam-file", "-O" }, converter = FileConverter.class, description = "The path for the output CRAM file. Omit if standard output (pipe).") File outputBamFile = null; @Parameter(names = { "-h", "--help" }, description = "Print help and quit") boolean help = false; @Parameter(names = { "--threads" }, description = "Number of threads to use (minimum 4; use 0 for number of available cores.") public int threads = 4; } }