package com.manning.hip.ch11;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import java.io.IOException;
import java.util.*;
public final class LogMapReduce {
public static class Map extends Mapper<Text, Text,
Text, Text> {
private Text outputKey = new Text();
@Override
protected void map(Text key, Text value,
Context context)
throws
IOException, InterruptedException {
String parts[] = StringUtils.split(value.toString());
outputKey.set(parts[1]);
context.write(outputKey, key);
}
}
public static class Reduce
extends Reducer<Text, Text, Text, Text> {
private Text outputVal = new Text();
public void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
Set<String> ips = new HashSet<String>();
for (Text val : values) {
ips.add(val.toString());
}
outputVal.set(StringUtils.join(ips, ":"));
context.write(key, outputVal);
}
}
public static void main(String... args) throws Exception {
runJob(args[0], args[1]);
}
public static void runJob(String input,
String output)
throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(LogMapReduce.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(input));
Path outPath = new Path(output);
FileOutputFormat.setOutputPath(job, outPath);
outPath.getFileSystem(conf).delete(outPath, true);
job.waitForCompletion(true);
}
}