/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.hpg.bigdata.app.cli.local; import htsjdk.samtools.fastq.FastqReader; import org.apache.avro.file.DataFileStream; import org.apache.avro.specific.SpecificDatumReader; import org.opencb.biodata.models.sequence.Read; import org.opencb.biodata.tools.alignment.stats.SequenceStats; import org.opencb.biodata.tools.alignment.stats.SequenceStatsCalculator; import org.opencb.hpg.bigdata.app.cli.CommandExecutor; import org.opencb.hpg.bigdata.core.converters.FastqRecord2ReadConverter; import org.opencb.hpg.bigdata.core.io.avro.AvroWriter; import org.opencb.hpg.bigdata.core.utils.AvroUtils; import java.io.*; /** * Created by imedina on 03/02/15. */ public class SequenceCommandExecutor extends CommandExecutor { private LocalCliOptionsParser.SequenceCommandOptions sequenceCommandOptions; public SequenceCommandExecutor(LocalCliOptionsParser.SequenceCommandOptions sequenceCommandOptions) { this.sequenceCommandOptions = sequenceCommandOptions; } /** * Parse specific 'sequence' command options. * * @throws IOException Exception thrown if file does not exist */ public void execute() throws IOException { String subCommand = sequenceCommandOptions.getParsedSubCommand(); switch (subCommand) { case "convert": convert(); break; case "stats": stats(); break; default: break; } } private void convert() throws IOException { LocalCliOptionsParser.ConvertSequenceCommandOptions convertSequenceCommandOptions = sequenceCommandOptions.convertSequenceCommandOptions; // get input parameters String input = convertSequenceCommandOptions.input; String output = convertSequenceCommandOptions.output; String codecName = convertSequenceCommandOptions.compression; try { // reader FastqReader reader = new FastqReader(new File(input)); // writer OutputStream os = new FileOutputStream(output); AvroWriter<Read> writer = new AvroWriter<>(Read.getClassSchema(), AvroUtils.getCodec(codecName), os); // main loop FastqRecord2ReadConverter converter = new FastqRecord2ReadConverter(); while (reader.hasNext()) { writer.write(converter.forward(reader.next())); } // close reader.close(); writer.close(); os.close(); } catch (Exception e) { throw e; } } private void stats() throws IOException { LocalCliOptionsParser.StatsSequenceCommandOptions statsSequenceCommandOptions = sequenceCommandOptions.statsSequenceCommandOptions; // get input parameters String input = statsSequenceCommandOptions.input; String output = statsSequenceCommandOptions.output; int kvalue = statsSequenceCommandOptions.kmers; try { // reader InputStream is = new FileInputStream(input); DataFileStream<Read> reader = new DataFileStream<>(is, new SpecificDatumReader<>(Read.class)); SequenceStats stats; SequenceStats totalStats = new SequenceStats(kvalue); SequenceStatsCalculator calculator = new SequenceStatsCalculator(); // main loop for (Read read : reader) { stats = calculator.compute(read, kvalue); calculator.update(stats, totalStats); } // close reader reader.close(); is.close(); // write results PrintWriter writer = new PrintWriter(new File(output + "/stats.json")); writer.write(totalStats.toJSON()); writer.close(); } catch (Exception e) { throw e; } } }