package hip.ch6.joins.semijoin; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.util.HashSet; import java.util.Set; public class UniqueHashedKeyJob { public static void runJob(Configuration conf, Path inputPath, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(UniqueHashedKeyJob.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } } public static class Map extends Mapper<Text, Text, Text, NullWritable> { private Set<String> keys = new HashSet<String>(); @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { System.out.println("K[" + key + "]"); keys.add(key.toString()); } @Override protected void cleanup( Context context) throws IOException, InterruptedException { Text outputKey = new Text(); for (String key : keys) { System.out.println("OutK[" + key + "]"); outputKey.set(key); context.write(outputKey, NullWritable.get()); } } } public static class Reduce extends Reducer<Text, NullWritable, Text, NullWritable> { @Override protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key, NullWritable.get()); } } }