// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.read_sort;
import it.crs4.seal.common.SealToolParser;
import it.crs4.seal.common.ClusterUtils;
import java.util.ArrayList;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.commons.cli.*;
public class ReadSortOptionParser extends SealToolParser {
public static final String ConfigSection = "ReadSort";
private Option ann;
private Option distReference;
@SuppressWarnings("static") // for OptionBuilder
public ReadSortOptionParser()
{
super(ConfigSection, "seal read_sort");
// define the custom options
ann = OptionBuilder
.withDescription("annotation file (.ann) of the BWA reference used to create the SAM data")
.hasArg()
.withArgName("ref.ann")
.withLongOpt("annotations")
.create("ann");
options.addOption(ann);
distReference = OptionBuilder
.withDescription("BWA reference on HDFS used to create the SAM data, to be distributed by DistributedCache")
.hasArg()
.withArgName("archive")
.withLongOpt("distributed-reference")
.create("distref");
options.addOption(distReference);
this.setMinReduceTasks(1);
}
@Override
protected CommandLine parseOptions(Configuration conf, String[] args)
throws IOException, ParseException
{
CommandLine line = super.parseOptions(conf, args);
/********* distributed reference and annotations *********/
if (line.hasOption(distReference.getOpt()))
{
// Distribute the reference archive, and create a // symlink "reference" to the directory
Path optPath = new Path(line.getOptionValue(distReference.getOpt()));
optPath = optPath.makeQualified(optPath.getFileSystem(conf));
Path cachePath = new Path(optPath.toString() + "#reference");
conf.set("mapred.cache.archives", cachePath.toString());
conf.set("mapred.create.symlink", "yes");
if (line.hasOption(ann.getOpt()))
conf.set(ReadSort.REF_ANN_PROP_NAME, "reference/" + line.getOptionValue(ann.getOpt()));
else
throw new ParseException("You must specify the name of the annotation file within the distributed reference archive with -" + ann.getOpt());
}
else if (line.hasOption(ann.getOpt()))
{
// direct access to the reference annotation
conf.set(ReadSort.REF_ANN_PROP_NAME, line.getOptionValue(ann.getOpt()));
}
else
throw new ParseException("You must provide the path the reference annotation file (<ref>.ann)");
conf.set(ClusterUtils.NUM_RED_TASKS_PROPERTY, String.valueOf(getNReduceTasks()));
return line;
}
}