/* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ package abra; import joptsimple.OptionParser; /** * Manages ABRA command line options * * @author Lisle E. Mose (lmose at unc dot edu) */ public class ReAlignerOptions extends Options { private static final String INPUT_SAM = "in"; private static final String OUTPUT_SAM = "out"; private static final String REFERENCE = "ref"; private static final String BWA_INDEX = "bwa-ref"; private static final String TARGET_REGIONS = "targets"; private static final String TARGET_REGIONS_WITH_KMERS = "target-kmers"; private static final String WORKING_DIR = "working"; private static final String KMER_SIZE = "kmer"; private static final String MIN_NODE_FREQUENCY = "mnf"; private static final String MIN_UNALIGNED_NODE_FREQUENCY = "umnf"; private static final String MIN_CONTIG_LENGTH = "mcl"; private static final String MAX_POTENTIAL_CONTIGS = "mpc"; private static final String MIN_CONTIG_MAPQ = "mc-mapq"; private static final String MIN_MAPQ = "mapq"; private static final String NUM_THREADS = "threads"; private static final String UNALIGNED_ASSEMBLY = "aur"; private static final String MAX_UNALIGNED_READS = "mur"; private static final String SINGLE_END = "single"; private static final String RNA = "rna"; private static final String RNA_OUTPUT = "rna-out"; private static final String MIN_BASE_QUALITY = "mbq"; private static final String MIN_READ_CANDIDATE_FRACTION = "rcf"; private static final String MAX_AVERAGE_REGION_DEPTH = "mad"; private static final String SEARCH_FOR_STRUCTURAL_VARIATION = "sv"; private static final String SEARCH_FOR_LOCAL_REPEATS = "lr"; private static final String AVERAGE_DEPTH_CEILING = "adc"; private static final String MIN_EDGE_RATIO = "mer"; private static final String USE_INTERMEDIATE_BAM = "ib"; private static final String NO_DEBUG = "no-debug"; private static final String MAX_NODES = "maxn"; private OptionParser parser; private boolean isValid; @Override protected OptionParser getOptionParser() { if (parser == null) { parser = new OptionParser(); parser.accepts(INPUT_SAM, "Required list of input sam or bam file(s) separated by comma").withRequiredArg().ofType(String.class); parser.accepts(OUTPUT_SAM, "Required list of output sam or bam file(s) separated by comma").withRequiredArg().ofType(String.class); parser.accepts(REFERENCE, "Genome reference location").withRequiredArg().ofType(String.class); parser.accepts(BWA_INDEX, "BWA index prefix. Use this only if the bwa index prefix does not match the ref option.").withRequiredArg().ofType(String.class); parser.accepts(TARGET_REGIONS, "BED file containing target regions").withRequiredArg().ofType(String.class); parser.accepts(TARGET_REGIONS_WITH_KMERS, "BED-like file containing target regions with per region kmer sizes in 4th column").withRequiredArg().ofType(String.class); parser.accepts(WORKING_DIR, "Working directory for intermediate output. Must not already exist").withRequiredArg().ofType(String.class); parser.accepts(KMER_SIZE, "Optional assembly kmer size(delimit with commas if multiple sizes specified)").withOptionalArg().ofType(String.class); parser.accepts(MIN_NODE_FREQUENCY, "Assembly minimum node frequency").withRequiredArg().ofType(Integer.class).defaultsTo(2); parser.accepts(MIN_UNALIGNED_NODE_FREQUENCY, "Assembly minimum unaligned node frequency").withOptionalArg().ofType(Integer.class).defaultsTo(2); parser.accepts(MIN_CONTIG_LENGTH, "Assembly minimum contig length").withOptionalArg().ofType(Integer.class).defaultsTo(-1); parser.accepts(MAX_POTENTIAL_CONTIGS, "Maximum number of potential contigs for a region").withOptionalArg().ofType(Integer.class).defaultsTo(5000); parser.accepts(NUM_THREADS, "Number of threads").withRequiredArg().ofType(Integer.class).defaultsTo(4); parser.accepts(MIN_CONTIG_MAPQ, "Minimum contig mapping quality").withOptionalArg().ofType(Integer.class).defaultsTo(25); parser.accepts(MIN_MAPQ, "Minimum mapping quality for a read to be used in assembly and be eligible for realignment").withOptionalArg().ofType(Integer.class).defaultsTo(20); parser.accepts(UNALIGNED_ASSEMBLY, "Assemble unaligned reads (currently disabled)."); parser.accepts(MAX_UNALIGNED_READS, "Maximum number of unaligned reads to assemble").withOptionalArg().ofType(Integer.class).defaultsTo(50000000); parser.accepts(SINGLE_END, "Input is single end"); parser.accepts(RNA, "Input RNA sam or bam file (currently disabled)").withOptionalArg().ofType(String.class); parser.accepts(RNA_OUTPUT, "Output RNA sam or bam file (required if RNA input file specified)").withRequiredArg().ofType(String.class); parser.accepts(MIN_BASE_QUALITY, "Minimum base quality for inclusion in assembly. This value is compared against the sum of base qualities per kmer position").withOptionalArg().ofType(Integer.class).defaultsTo(60); parser.accepts(MIN_READ_CANDIDATE_FRACTION, "Minimum read candidate fraction for triggering assembly").withRequiredArg().ofType(Double.class).defaultsTo(.01); parser.accepts(MAX_AVERAGE_REGION_DEPTH, "Regions with average depth exceeding this value will be downsampled").withRequiredArg().ofType(Integer.class).defaultsTo(250); parser.accepts(SEARCH_FOR_STRUCTURAL_VARIATION, "Enable Structural Variation searching (experimental, only supported for paired end)").withRequiredArg().ofType(String.class); parser.accepts(SEARCH_FOR_LOCAL_REPEATS, "Search for potential larger local repeats and output to specified file (only for multiple samples)").withRequiredArg().ofType(String.class); parser.accepts(AVERAGE_DEPTH_CEILING, "Skip regions with average depth greater than this value").withOptionalArg().ofType(Integer.class).defaultsTo(100000); parser.accepts(MIN_EDGE_RATIO, "Min edge pruning ratio. Default value is appropriate for relatively sensitive somatic cases. May be increased for improved speed in germline only cases.").withRequiredArg().ofType(Double.class).defaultsTo(.02); parser.accepts(USE_INTERMEDIATE_BAM, "If specified, write intermediate data to BAM file using the intel deflator when available. Use this to speed up processing."); parser.accepts(NO_DEBUG, "Throttle down debug logging"); parser.accepts(MAX_NODES, "Maximum pre-pruned nodes in regional assembly").withOptionalArg().ofType(Integer.class).defaultsTo(9000); } return parser; } @Override protected void validate() { isValid = true; if (!getOptions().hasArgument(INPUT_SAM)) { isValid = false; System.err.println("Missing required input SAM/BAM file"); } if (!getOptions().hasArgument(OUTPUT_SAM)) { isValid = false; System.err.println("Missing required input SAM/BAM file"); } if (getInputFiles().length != getOutputFiles().length) { System.err.println("Number of input files must equal number of output files"); } if (!getOptions().hasArgument(REFERENCE)) { isValid = false; System.err.println("Missing required reference"); } if (getOptions().hasArgument(TARGET_REGIONS) && getOptions().hasArgument(TARGET_REGIONS_WITH_KMERS)) { isValid = false; System.err.println("Please specifiy only one of: " + TARGET_REGIONS + ", " + TARGET_REGIONS_WITH_KMERS); } if (!getOptions().hasArgument(TARGET_REGIONS) && !getOptions().hasArgument(TARGET_REGIONS_WITH_KMERS)) { isValid = false; System.err.println("Missing required target regions"); } if (!getOptions().hasArgument(WORKING_DIR)) { isValid = false; System.err.println("Missing required working directory"); } if ((getOptions().hasArgument(NUM_THREADS) && (Integer) getOptions().valueOf(NUM_THREADS) < 1)) { isValid = false; System.err.println("Num threads must be greater than zero."); } if (!isValid) { printHelp(); } } public String[] getInputFiles() { String[] files = new String[0]; String sams = (String) getOptions().valueOf(INPUT_SAM); if (sams != null) { files = sams.split(","); } return files; } public String[] getOutputFiles() { String[] files = new String[0]; String sams = (String) getOptions().valueOf(OUTPUT_SAM); if (sams != null) { files = sams.split(","); } return files; } public String getReference() { return (String) getOptions().valueOf(REFERENCE); } public String getBwaIndex() { String index = null; if (getOptions().hasArgument(BWA_INDEX)) { index = (String) getOptions().valueOf(BWA_INDEX); } else { index = (String) getOptions().valueOf(REFERENCE); } return index; } public String getTargetRegionFile() { String file = null; if (getOptions().hasArgument(TARGET_REGIONS_WITH_KMERS)) { file = (String) getOptions().valueOf(TARGET_REGIONS_WITH_KMERS); } else { file = (String) getOptions().valueOf(TARGET_REGIONS); } return file; } public boolean hasPresetKmers() { return getOptions().hasArgument(TARGET_REGIONS_WITH_KMERS); } public String getWorkingDir() { return (String) getOptions().valueOf(WORKING_DIR); } public int[] getKmerSizes() { int[] kmers; if (getOptions().has(KMER_SIZE)) { String[] kmerStr = ((String) getOptions().valueOf(KMER_SIZE)).split(","); kmers = new int[kmerStr.length]; for (int i=0; i<kmerStr.length; i++) { kmers[i] = Integer.parseInt(kmerStr[i]); } } else { kmers = new int[0]; } return kmers; } public int getMinNodeFrequency() { return (Integer) getOptions().valueOf(MIN_NODE_FREQUENCY); } public int getMinUnalignedNodeFrequency() { return (Integer) getOptions().valueOf(MIN_UNALIGNED_NODE_FREQUENCY); } public int getMinContigLength() { return (Integer) getOptions().valueOf(MIN_CONTIG_LENGTH); } public int getMaxPotentialContigs() { return (Integer) getOptions().valueOf(MAX_POTENTIAL_CONTIGS); } public int getNumThreads() { return getOptions().hasArgument(NUM_THREADS) ? (Integer) getOptions().valueOf(NUM_THREADS) : 4; } public int getMinContigMapq() { return (Integer) getOptions().valueOf(MIN_CONTIG_MAPQ); } public boolean isSkipUnalignedAssembly() { return !getOptions().has(UNALIGNED_ASSEMBLY); } public boolean isDebug() { return !getOptions().has(NO_DEBUG); } public boolean useIntermediateBam() { return getOptions().has(USE_INTERMEDIATE_BAM); } public int getMaxUnalignedReads() { return (Integer) getOptions().valueOf(MAX_UNALIGNED_READS); } public boolean isPairedEnd() { return !getOptions().has(SINGLE_END); } public String getRnaSam() { return (String) getOptions().valueOf(RNA); } public String getRnaSamOutput() { return (String) getOptions().valueOf(RNA_OUTPUT); } public int getMinBaseQuality() { return (Integer) getOptions().valueOf(MIN_BASE_QUALITY); } public double getMinReadCandidateFraction() { return (Double) getOptions().valueOf(MIN_READ_CANDIDATE_FRACTION); } public double getMinEdgeRatio() { return (Double) getOptions().valueOf(MIN_EDGE_RATIO); } public int getMaxAverageRegionDepth() { return (Integer) getOptions().valueOf(MAX_AVERAGE_REGION_DEPTH); } public boolean shouldSearchForStructuralVariation() { return getOptions().has(SEARCH_FOR_STRUCTURAL_VARIATION); } public String getStructuralVariantFile() { return (String) getOptions().valueOf(SEARCH_FOR_STRUCTURAL_VARIATION); } public String getLocalRepeatFile() { return (String) getOptions().valueOf(SEARCH_FOR_LOCAL_REPEATS); } public int getAverageDepthCeiling() { return (Integer) getOptions().valueOf(AVERAGE_DEPTH_CEILING); } public int getMinimumMappingQuality() { return (Integer) getOptions().valueOf(MIN_MAPQ); } public boolean isValid() { return isValid; } public int getMaxNodes() { return (Integer) getOptions().valueOf(MAX_NODES); } }