package org.opencb.opencga.storage.hadoop.variant.annotation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
import org.apache.phoenix.util.SchemaUtil;
import org.opencb.opencga.storage.hadoop.variant.AbstractAnalysisTableDriver;
import org.opencb.opencga.storage.hadoop.variant.index.phoenix.VariantPhoenixHelper.VariantColumn;
import java.io.IOException;
import java.util.Arrays;
/**
* Created by mh719 on 15/12/2016.
*/
public class AnalysisTableAnnotateDriver extends AbstractAnalysisTableDriver {
public static final String CONFIG_VARIANT_TABLE_ANNOTATE_PARALLEL = "opencga.variant.table.annotate.parallel";
public AnalysisTableAnnotateDriver() { /* nothing */ }
public AnalysisTableAnnotateDriver(Configuration conf) {
super(conf);
}
@Override
protected void parseAndValidateParameters() {
int parallel = getConf().getInt(CONFIG_VARIANT_TABLE_ANNOTATE_PARALLEL, 5);
getConf().setInt("mapreduce.job.running.map.limit", parallel);
getConf().setLong("phoenix.upsert.batch.size", 200L);
}
@Override
protected Class<? extends TableMapper> getMapperClass() {
return AnalysisAnnotateMapper.class;
}
@Override
protected void initMapReduceJob(String inTable, Job job, Scan scan, boolean addDependencyJar) throws IOException {
TableMapReduceUtil.setScannerCaching(job, 200);
super.initMapReduceJob(inTable, job, scan, addDependencyJar);
String[] fieldNames = Arrays.stream(VariantColumn.values()).map(v -> v.toString()).toArray(String[]::new);
PhoenixMapReduceUtil.setOutput(job, SchemaUtil.getEscapedFullTableName(inTable), fieldNames);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(PhoenixVariantAnnotationWritable.class);
job.setNumReduceTasks(0);
}
public static void main(String[] args) throws Exception {
try {
System.exit(privateMain(args, null, new AnalysisTableAnnotateDriver()));
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
}