/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.hpg.bigdata.app.cli.hadoop; import com.beust.jcommander.*; import java.util.HashMap; import java.util.Map; /** * Created by imedina on 03/02/15. */ public class CliOptionsParser { private final JCommander jcommander; private final GeneralOptions generalOptions; private final CommandOptions commandOptions; private final CommonCommandOptions commonCommandOptions; // NGS Sequence command and subcommmands private SequenceCommandOptions sequenceCommandOptions; // NGS Alignments command and subcommmands private AlignmentCommandOptions alignmentCommandOptions; // NGS variant command and subcommmands private VariantCommandOptions variantCommandOptions; public CliOptionsParser() { generalOptions = new GeneralOptions(); jcommander = new JCommander(generalOptions); commandOptions = new CommandOptions(); commonCommandOptions = new CommonCommandOptions(); sequenceCommandOptions = new SequenceCommandOptions(); jcommander.addCommand("sequence", sequenceCommandOptions); JCommander sequenceSubCommands = jcommander.getCommands().get("sequence"); sequenceSubCommands.addCommand("convert", sequenceCommandOptions.convertSequenceCommandOptions); sequenceSubCommands.addCommand("stats", sequenceCommandOptions.statsSequenceCommandOptions); //sequenceSubCommands.addCommand("align", sequenceCommandOptions.alignSequenceCommandOptions); alignmentCommandOptions = new AlignmentCommandOptions(); jcommander.addCommand("alignment", sequenceCommandOptions); JCommander alignmentSubCommands = jcommander.getCommands().get("alignment"); alignmentSubCommands.addCommand("convert", alignmentCommandOptions.convertAlignmentCommandOptions); alignmentSubCommands.addCommand("stats", alignmentCommandOptions.statsAlignmentCommandOptions); alignmentSubCommands.addCommand("depth", alignmentCommandOptions.depthAlignmentCommandOptions); variantCommandOptions = new VariantCommandOptions(); jcommander.addCommand("variant", sequenceCommandOptions); JCommander variantSubCommands = jcommander.getCommands().get("variant"); variantSubCommands.addCommand("convert", variantCommandOptions.convertVariantCommandOptions); variantSubCommands.addCommand("index", variantCommandOptions.indexVariantCommandOptions); // convertCommandOptions = new ConvertCommandOptions(); } public void parse(String[] args) throws ParameterException { jcommander.parse(args); } public String getCommand() { return (jcommander.getParsedCommand() != null) ? jcommander.getParsedCommand(): ""; } public String getSubCommand() { String parsedCommand = jcommander.getParsedCommand(); if (jcommander.getCommands().containsKey(parsedCommand)) { String subCommand = jcommander.getCommands().get(parsedCommand).getParsedCommand(); return subCommand != null ? subCommand: ""; } else { return null; } } /** * This class contains all those parameters that are intended to work without any 'command' */ public class GeneralOptions { @Parameter(names = {"-h", "--help"}, description = "This parameter prints this help", help = true) public boolean help; @Parameter(names = {"--version"}) public boolean version; } /** * This class contains all those parameters available for all 'commands' */ public class CommandOptions { @Parameter(names = {"-h", "--help"}, description = "This parameter prints this help", help = true) public boolean help; public JCommander getSubCommand() { return jcommander.getCommands().get(getCommand()).getCommands().get(getSubCommand()); } public String getParsedSubCommand() { String parsedCommand = jcommander.getParsedCommand(); if (jcommander.getCommands().containsKey(parsedCommand)) { String subCommand = jcommander.getCommands().get(parsedCommand).getParsedCommand(); return subCommand != null ? subCommand: ""; } else { return ""; } } } /** * This class contains all those parameters available for all 'subcommands' */ public class CommonCommandOptions { @Parameter(names = {"-h", "--help"}, description = "This parameter prints this help", help = true) public boolean help; @Parameter(names = {"-L", "--log-level"}, description = "Set the level log, values: debug, info, warning, error, fatal", required = false, arity = 1) public String logLevel = "info"; @Deprecated @Parameter(names = {"-v", "--verbose"}, description = "This parameter set the level of the logging", required = false, arity = 1) public boolean verbose; @Parameter(names = {"--conf"}, description = "Set the configuration file", required = false, arity = 1) public String conf; } /* * Sequence (FASTQ) CLI options */ @Parameters(commandNames = {"sequence"}, commandDescription = "Implements different tools for working with Fastq files") public class SequenceCommandOptions extends CommandOptions { ConvertSequenceCommandOptions convertSequenceCommandOptions; StatsSequenceCommandOptions statsSequenceCommandOptions; AlignSequenceCommandOptions alignSequenceCommandOptions; public SequenceCommandOptions() { this.convertSequenceCommandOptions = new ConvertSequenceCommandOptions(); this.statsSequenceCommandOptions = new StatsSequenceCommandOptions(); this.alignSequenceCommandOptions = new AlignSequenceCommandOptions(); } } @Parameters(commandNames = {"convert"}, commandDescription = "Converts FastQ files to different big data formats such as Avro") class ConvertSequenceCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "HDFS input file in FastQ format", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "HDFS output file to store the FastQ sequences according to the GA4GH/Avro model", required = true, arity = 1) public String output = null; @Parameter(names = {"-x", "--compression"}, description = "Accepted values: snappy, deflate, bzip2, xz, null. Default: snappy", required = false, arity = 1) public String compression = "snappy"; //@Parameter(names = {"--to-avro"}, description = "", required = false) //public boolean toAvro = true; //@Parameter(names = {"--to-fastq"}, description = "", required = false) //public boolean toFastq; } @Parameters(commandNames = {"stats"}, commandDescription = "Calculates different stats from sequencing data") class StatsSequenceCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "HDFS input file containing the FastQ sequences stored in GA4GH/Avro model)", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "Local output directory to save stats results in JSON format ", required = true, arity = 1) public String output = null; //@Parameter(names = {"-f", "--filter"}, description = "", required = false, arity = 1) //public String filter = null; @Parameter(names = {"-k", "--kmers"}, description = "Compute k-mers (according to the indicated length)", required = false, arity = 1) public Integer kmers = 0; } @Parameters(commandNames = {"align"}, commandDescription = "Align reads to a reference genome using HPG Aligner in MapReduce") public class AlignSequenceCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "", required = false, arity = 1) public String output = null; @Parameter(names = {"--index-file"}, description = "", required = false) public String referenceGenomeFile; } /* * Alignment (BAM) CLI options */ @Parameters(commandNames = {"alignment"}, commandDescription = "Implements different tools for working with BAM files") public class AlignmentCommandOptions extends CommandOptions { ConvertAlignmentCommandOptions convertAlignmentCommandOptions; StatsAlignmentCommandOptions statsAlignmentCommandOptions; DepthAlignmentCommandOptions depthAlignmentCommandOptions; public AlignmentCommandOptions() { this.convertAlignmentCommandOptions = new ConvertAlignmentCommandOptions(); this.statsAlignmentCommandOptions = new StatsAlignmentCommandOptions(); this.depthAlignmentCommandOptions = new DepthAlignmentCommandOptions(); } } @Parameters(commandNames = {"convert"}, commandDescription = "Converts BAM files to different big data formats such as Avro and Parquet") class ConvertAlignmentCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "HDFS input file in BAM format", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "HDFS output file to store the BAM alignments according to the GA4GH/Avro model", required = true, arity = 1) public String output = null; @Parameter(names = {"-x", "--compression"}, description = "Accepted values: snappy, deflate, bzip2, xz, null. Default: snappy", required = false, arity = 1) public String compression = "snappy"; //@Parameter(names = {"--to-avro"}, description = "", required = false) //public boolean toAvro; @Parameter(names = {"--to-parquet"}, description = "To save the output file in Parquet", required = false) public boolean toParquet; //@Parameter(names = {"--to-fastq"}, description = "", required = false) //public boolean toFastq; @Parameter(names = {"--adjust-quality"}, description = "Compress quality field using 8 quality levels. Will loss information", required = false) public boolean adjustQuality; } @Parameters(commandNames = {"stats"}, commandDescription = "Compute some stats for a file containing alignments in GA4GH/Avro model") class StatsAlignmentCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "HDFS input file containing alignments stored according to the GA4GH/Avro model)", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "Local output directory to save stats results in JSON format", required = true, arity = 1) public String output = null; //@Parameter(names = {"-f", "--filter"}, description = "", required = false, arity = 1) //public String filter = null; } @Parameters(commandNames = {"depth"}, commandDescription = "Compute the depth for a given file containing alignments in GA4GH/Avro model") class DepthAlignmentCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "HDFS input file containing alignments stored according to the GA4GH/Avro model)", required = true, arity = 1) public String input = null; @Parameter(names = {"-o", "--output"}, description = "Local output directory to save the depth in a text file", required = true, arity = 1) public String output = null; @Parameter(names = {"-r", "--regions"}, description = "Compute depth for the mentioned regions separated by commas. The region format is: chromosome:start-end. Example: 3:230000000-250000000,12:435050000-435100000", required = false, arity = 1) public String regions = null; @Parameter(names = {"-q", "--min-mapq"}, description = "Compute depth for alignments whose mapping quality is greater that this minimum mapping quality", required = false, arity = 1) public int minMapQ = 0; } /* * Variant (VCF) CLI options */ @Parameters(commandNames = {"variant"}, commandDescription = "Implements different tools for working with gVCF/VCF files") public class VariantCommandOptions extends CommandOptions { ConvertVariantCommandOptions convertVariantCommandOptions; IndexVariantCommandOptions indexVariantCommandOptions; public VariantCommandOptions() { this.convertVariantCommandOptions = new ConvertVariantCommandOptions(); this.indexVariantCommandOptions = new IndexVariantCommandOptions(); } } @Parameters(commandNames = {"convert"}, commandDescription = "Convert gVCF/VCF files to different data formats such as Avro, Parquet and Json.") class ConvertVariantCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "Input file name, usually a gVCF/VCF but it can be an Avro file when converting to Parquet.", required = true, arity = 1) public String input; @Parameter(names = {"--to"}, description = "Destination Serialization format. Accepted values: avro, parquet and json", required = true) public String to; @Parameter(names = {"-o", "--output"}, description = "Output file name.", required = false, arity = 1) public String output; @Parameter(names = {"-O"}, description = "Use the standard output.", required = false, arity = 0) public boolean stdOutput; @Parameter(names = {"--from"}, description = "Accepted values: vcf, avro", required = false) public String from; @Parameter(names = {"-x", "--compression"}, description = "Only for commands 'to-avro' and 'to-parquet'. Values: snappy, deflate, bzip2, xz", required = false, arity = 1) public String compression = "snappy"; @Parameter(names = {"--region"}, description = "Filter variant by regions, comma separated list of regions, e.g.: 1:300000-400000000,15:343453463-8787665654", required = false) public String regions; } @Parameters(commandNames = {"index"}, commandDescription = "Load avro gVCF/VCF files into different NoSQL, only HBase implemented so far") public class IndexVariantCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; @Parameter(names = {"-i", "--input"}, description = "GA4GH Avro input file", required = true, arity = 1) public String input = null; @Parameter(names = {"-t", "--type"}, description = "Type can be: vcf, bed, or gff", arity = 1) public String type = "vcf"; @Parameter(names = {"-se", "--storage-engine"}, description = "Database, values: hbase, hive, impala", arity = 1) public String database = "hbase"; @Parameter(names = {"-r", "--regions"}, description = "Database to load data, values: hbase", arity = 1) public String regions = null; @Parameter(names = {"-g", "--genome"}, description = "Load whole genome from gVCF - including non-variant regions", required = false, arity = 1) public boolean includeNonVariants = false; @Parameter(names = {"-e", "--expand"}, description = "Expand non-variant gVCF regions to one entry per base", required = false, arity = 1) public boolean expand = false; @Parameter(names = {"--credentials"}, description = "Database credentials: user, password, host, port", arity = 1) public String credentials; } public void printUsage(){ if(getCommand().isEmpty()) { System.err.println(""); System.err.println("Program: HPG BigData for Hadoop (OpenCB)"); System.err.println("Version: 0.2.0"); System.err.println("Description: Hadoop-based tools for working with NGS data"); System.err.println(""); System.err.println("Usage: hpg-bigdata.sh [-h|--help] [--version] <command> <subcommand> [options]"); System.err.println(""); System.err.println("Commands:"); printMainUsage(); System.err.println(""); } else { String parsedCommand = getCommand(); if(getSubCommand().isEmpty()){ System.err.println(""); System.err.println("Usage: hpg-bigdata.sh " + parsedCommand + " <subcommand> [options]"); System.err.println(""); System.err.println("Subcommands:"); printCommandUsage(jcommander.getCommands().get(getCommand())); System.err.println(""); } else { String parsedSubCommand = getSubCommand(); System.err.println(""); System.err.println("Usage: hpg-bigdata.sh " + parsedCommand + " " + parsedSubCommand + " [options]"); System.err.println(""); System.err.println("Options:"); printSubCommandUsage(jcommander.getCommands().get(parsedCommand).getCommands().get(parsedSubCommand)); System.err.println(""); } } } private void printMainUsage() { // TODO This is a nasty hack. By some unknown reason JCommander only prints the description from first command Map<String, String> commandDescription = new HashMap<>(); commandDescription.put("sequence", "Implements different tools for working with Fastq files"); commandDescription.put("alignment", "Implements different tools for working with SAM/BAM files"); commandDescription.put("variant", "Implements different tools for working with gVCF/VCF files"); for (String s : jcommander.getCommands().keySet()) { System.err.printf("%12s %s\n", s, commandDescription.get(s)); } } private void printCommandUsage(JCommander commander) { for (Map.Entry<String, JCommander> entry : commander.getCommands().entrySet()) { System.err.printf("%12s %s\n", entry.getKey(), commander.getCommandDescription(entry.getKey())); } } private void printSubCommandUsage(JCommander commander) { for (ParameterDescription parameterDescription : commander.getParameters()) { String type = ""; if (parameterDescription.getParameterized().getParameter().arity() > 0) { type = parameterDescription.getParameterized().getGenericType().getTypeName() .replace("java.lang.", "").toUpperCase(); } System.err.printf("%5s %-20s %-10s %s [%s]\n", parameterDescription.getParameterized().getParameter().required() ? "*": "", parameterDescription.getNames(), type, parameterDescription.getDescription(), parameterDescription.getDefault()); } } public GeneralOptions getGeneralOptions() { return generalOptions; } public CommandOptions getCommandOptions() { return commandOptions; } public CommonCommandOptions getCommonCommandOptions() { return commonCommandOptions; } public SequenceCommandOptions getSequenceCommandOptions() { return sequenceCommandOptions; } public AlignmentCommandOptions getAlignmentCommandOptions() { return alignmentCommandOptions; } public VariantCommandOptions getVariantCommandOptions() { return variantCommandOptions; } }