/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.hpg.bigdata.app.cli.hadoop; import java.io.IOException; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.opencb.hpg.bigdata.app.cli.CommandExecutor; import org.opencb.hpg.bigdata.tools.sequence.Fastq2AvroMR; import org.opencb.hpg.bigdata.tools.sequence.stats.ReadKmersMR; import org.opencb.hpg.bigdata.tools.sequence.stats.ReadStatsMR; import org.opencb.hpg.bigdata.core.utils.PathUtils; /** * Created by imedina on 03/02/15. */ public class SequenceCommandExecutor extends CommandExecutor { private CliOptionsParser.SequenceCommandOptions sequenceCommandOptions; public SequenceCommandExecutor(CliOptionsParser.SequenceCommandOptions sequenceCommandOptions) { this.sequenceCommandOptions = sequenceCommandOptions; } /* * Parse specific 'sequence' command options */ public void execute() throws Exception { String subCommand = sequenceCommandOptions.getParsedSubCommand(); switch (subCommand) { case "convert": convert(); break; case "stats": stats(); break; case "align": System.out.println("Sub-command 'align': Not yet implemented for the command 'sequence' !"); break; default: break; } } private void convert() throws Exception { CliOptionsParser.ConvertSequenceCommandOptions convertSequenceCommandOptions = sequenceCommandOptions.convertSequenceCommandOptions; // get input parameters String input = convertSequenceCommandOptions.input; String output = convertSequenceCommandOptions.output; String codecName = convertSequenceCommandOptions.compression; // sanity check if (codecName.equals("null")) { codecName = null; } // run MapReduce job to convert to GA4GH/Avro model try { Fastq2AvroMR.run(input, output, codecName); } catch (Exception e) { throw e; } } private void stats() throws Exception { CliOptionsParser.StatsSequenceCommandOptions statsSequenceCommandOptions = sequenceCommandOptions.statsSequenceCommandOptions; // get input parameters String input = statsSequenceCommandOptions.input; String output = statsSequenceCommandOptions.output; int kvalue = statsSequenceCommandOptions.kmers; // prepare the HDFS output folder Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String outHdfsDirname = Long.toString(new Date().getTime()); // run MapReduce job to compute stats ReadStatsMR.run(input, outHdfsDirname, kvalue); // post-processing Path outFile = new Path(outHdfsDirname + "/part-r-00000"); try { if (!fs.exists(outFile)) { logger.error("Stats results file not found: {}", outFile.getName()); } else { String outRawFileName = output + "/stats.json"; fs.copyToLocalFile(outFile, new Path(outRawFileName)); //Utils.parseStatsFile(outRawFileName, out); } fs.delete(new Path(outHdfsDirname), true); } catch (IOException e) { throw e; } } @Deprecated private void kmers(String input, String output, int kvalue) throws Exception { // clean paths String in = PathUtils.clean(input); String out = PathUtils.clean(output); if (!PathUtils.isHdfs(input)) { throw new IOException("To run fastq kmers, input files '" + input + "' must be stored in the HDFS/Haddop. Use the command 'convert fastq2sa' to import your file."); } try { ReadKmersMR.run(in, out, kvalue); } catch (Exception e) { throw e; } } }