package com.produban.openbus.esdump;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.elasticsearch.hadoop.mr.EsInputFormat;
import org.elasticsearch.hadoop.mr.LinkedMapWritable;
/**
* A MapReduce job to dump an ElasticSearch Index into HDFS files
*/
public class ESDumpDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
if (args.length < 4) {
System.err.printf("Usage: %s [generic options] <ES nodes> <ES resource> <HDFS output path>\n" +
"Example: %s EShost1:9200,EShost2:9200 twitter/tweet /tmp/esdump/\n\n",
getClass().getSimpleName(), getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err); return -1;
}
String esNodes = args[0];
String esResource = args[1];
String outputhHdfsPath = args[2];
Configuration conf = new Configuration();
conf.set("es.nodes", esNodes);
conf.set("es.resource", esResource);
Job job = new Job(conf);
job.setNumReduceTasks(0);
job.setJarByClass(ESDumpDriver.class);
job.setInputFormatClass(EsInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LinkedMapWritable.class);
job.setMapperClass(ESDumpMapper.class);
FileOutputFormat.setOutputPath(job, new Path(outputhHdfsPath));
return job.waitForCompletion(true) ? 0 : 1;
}
}