package com.esri;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* mvn -P job clean package
* hadoop fs -rm -R -skipTrash output
* hadoop jar target/HBaseToolbox-1.0-SNAPSHOT-job.jar /user/cloudera/points.txt /user/cloudera/output
*/
public class FreqDistJob extends Configured implements Tool
{
public static void main(final String[] args) throws Exception
{
System.exit(ToolRunner.run(new FreqDistJob(), args));
}
public static Job createSubmittableJob(
final Configuration configuration,
final String[] args
) throws IOException
{
final Job job = Job.getInstance(configuration, FreqDistJob.class.getSimpleName());
job.setJarByClass(FreqDistJob.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(FreqDistMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setCombinerClass(FreqDistReducer.class);
job.setReducerClass(FreqDistReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
return job;
}
@Override
public int run(final String[] args) throws Exception
{
setConf(HBaseConfiguration.create(getConf()));
final String[] remainingArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
final int rc;
if (remainingArgs.length != 2)
{
System.err.println("Arguments format: input-path output-path");
ToolRunner.printGenericCommandUsage(System.err);
rc = -1;
}
else
{
rc = createSubmittableJob(getConf(), remainingArgs).waitForCompletion(true) ? 0 : 1;
}
return rc;
}
}