/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package org.opencb.opencga.storage.hadoop.variant;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.io.DatumReader;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantAvro;
/**
* @author Matthias Haimel mh719+git@cam.ac.uk
*
*/
public class Echo extends Mapper<AvroKey<VariantAvro>, NullWritable, ImmutableBytesWritable, Put> {
/**
*
*/
public Echo() {
// TODO Auto-generated constructor stub
}
@Override
protected void map(AvroKey<VariantAvro> key, NullWritable value,
Mapper<AvroKey<VariantAvro>, NullWritable, ImmutableBytesWritable, Put>.Context context) throws IOException,
InterruptedException {
// VariantAvro varAvro = key.datum();
// Variant variant = new Variant(varAvro);
context.getCounter("Echo", "Count").increment(1);
}
/**
* @param args
* abc
* @throws Exception
* x
*/
public static void main(String[] args) throws Exception {
System.out.println(String.join(",", args));
if (args.length > 1) {
System.exit(localMain(args));
} else {
System.exit(privateMain(args));
}
}
private static int localMain(String[] args) {
File f = new File(args[0]);
DatumReader<VariantAvro> reader = new SpecificDatumReader<VariantAvro>(VariantAvro.class);
int cnt = 0;
try (DataFileReader<VariantAvro> freader = new DataFileReader<VariantAvro>(f, reader)) {
while (freader.hasNext()) {
VariantAvro next = freader.next();
Variant variant = new Variant(next);
++cnt;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Read " + cnt + " entries !!!");
return 0;
}
private static int privateMain(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Echo echo = new Echo();
/* Alternative to using tool runner */
// int exitCode = ToolRunner.run(conf,new GenomeVariantDriver(), args);
return echo.run(conf, otherArgs);
}
public int run(Configuration conf, String[] args) throws Exception {
URI inputFile = URI.create(args[0]);
/* JOB setup */
Job job = Job.getInstance(conf, "Echo");
job.setJarByClass(getClass());
conf = job.getConfiguration();
conf.set("mapreduce.job.user.classpath.first", "true");
// input
FileInputFormat.addInputPath(job, new Path(inputFile));
AvroJob.setInputKeySchema(job, VariantAvro.getClassSchema());
job.setInputFormatClass(AvroKeyInputFormat.class);
// mapper
job.setMapperClass(Echo.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
// TODO: Update list of indexed files
return job.waitForCompletion(true) ? 0 : 1;
}
}