package com.hadooparchitecturebook.mapreduce.joinfilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import com.cloudera.sa.examples.crunch.JoinFilterExampleCrunch; public class JoinFilterExampleMRJob implements Tool { public static final String FOO_TABLE_CONF = "custom.foo.table.file"; public static final String BAR_TABLE_CONF = "custom.bar.table.file"; public static final String FOO_VAL_MAX_CONF = "custom.foo.val.max"; public static final String JOIN_VAL_MAX_CONF = "custom.join.val.max"; public static final String FOO_SORT_FLAG = "B"; public static final String BAR_SORT_FLAG = "A"; public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new JoinFilterExampleMRJob(), args); } public int run(String[] args) throws Exception { String inputFoo = args[0]; String inputBar = args[1]; String output = args[2]; String fooValueMaxFilter = args[3]; String joinValueMaxFilter = args[4]; int numberOfReducers = Integer.parseInt(args[5]); //A Job job = Job.getInstance(); //B job.setJarByClass(JoinFilterExampleMRJob.class); job.setJobName("JoinFilterExampleMRJob"); //C Configuration config = job.getConfiguration(); config.set(FOO_TABLE_CONF, inputFoo); config.set(BAR_TABLE_CONF, inputBar); config.set(FOO_VAL_MAX_CONF, fooValueMaxFilter); config.set(JOIN_VAL_MAX_CONF, joinValueMaxFilter); // D job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputFoo)); TextInputFormat.addInputPath(job, new Path(inputBar)); // E job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(output)); // F job.setMapperClass(JoinFilterMapper.class); job.setReducerClass(JoinFilterReducer.class); job.setPartitionerClass(JoinFilterPartitioner.class); // G job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //H job.setNumReduceTasks(numberOfReducers); // I job.waitForCompletion(true); return 0; } @Override public void setConf(Configuration conf) { // TODO Auto-generated method stub } @Override public Configuration getConf() { // TODO Auto-generated method stub return null; } }