package edu.isi.dig.elasticsearch.mapreduce.driver;
import java.io.File;
import java.io.FileInputStream;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import edu.isi.dig.elasticsearch.mapreduce.inputformat.ESInputFormat;
public class ESProcessor extends Configured implements Tool{
public Job configure(Properties p) throws Exception
{
Configuration conf = new Configuration();
if(p.getProperty("elasticsearch.hostname") != null)
{
//System.out.println(p.getProperty("elasticsearch.hostname"));
//System.out.println(conf.getClass().getName());
conf.setIfUnset("elasticsearch.hostname", p.getProperty("elasticsearch.hostname"));
}
if(p.getProperty("elasticsearch.port") != null)
{
conf.setIfUnset("elasticsearch.port", p.getProperty("elasticsearch.port"));
}
if(p.getProperty("elasticsearch.batchsize") != null)
{
conf.setIfUnset("elasticsearch.batchsize", p.getProperty("elasticsearch.batchsize"));
}
if(p.getProperty("elasticsearch.index") != null)
{
conf.setIfUnset("elasticsearch.index", p.getProperty("elasticsearch.index"));
}
if(p.getProperty("elasticsearch.username") != null)
{
conf.setIfUnset("elasticsearch.username", p.getProperty("elasticsearch.username"));
}
if(p.getProperty("elasticsearch.password") != null)
{
conf.setIfUnset("elasticsearch.password", p.getProperty("elasticsearch.password"));
}
if(p.getProperty("elasticsearch.starttimestamp") != null)
{
conf.setIfUnset("elasticsearch.starttimestamp", p.getProperty("elasticsearch.starttimestamp"));
}
if(p.getProperty("elasticsearch.endtimestamp") != null)
{
conf.setIfUnset("elasticsearch.endtimestamp", p.getProperty("elasticsearch.endtimestamp"));
}
if(p.getProperty("elasticsearch.protocol") != null)
{
conf.setIfUnset("elasticsearch.protocol", p.getProperty("elasticsearch.protocol"));
}
if(p.getProperty("elasticsearch.doctype") != null)
{
conf.setIfUnset("elasticsearch.doctype", p.getProperty("elasticsearch.doctype"));
}
Job job = Job.getInstance(conf);
job.setInputFormatClass(ESInputFormat.class);
job.setJarByClass(ESProcessor.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(ESMapper.class);
//job.setReducerClass(JSONReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//FileInputFormat.setInputPaths(job, new Path(p.getProperty("input.directory")));
FileOutputFormat.setOutputPath(job, new Path(p.getProperty("output.directory")));
job.setNumReduceTasks(0);
return job;
}
@Override
public int run(String[] args) throws Exception {
Properties p = new Properties();
try
{
p.load(new FileInputStream(new File(args[0])));
}catch(Exception e)
{ System.out.println("Error:");
e.printStackTrace();
}
Job job = configure(p);
if(!job.waitForCompletion(true))
{
System.err.println("Unable to finish job");
return -1;
}
return 0;
}
}