package hip.ch8;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.IOException;
import java.util.Iterator;
public class CombineJob {
public static class Map implements
Mapper<LongWritable, Text, Text, Text> {
Text outputKey = new Text();
Text outputValue = new Text();
@Override
public void configure(JobConf job) {
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
String v = value.toString();
if (!v.startsWith("10.")) {
String[] parts = StringUtils.split(v, ".", 3);
outputKey.set(parts[0]);
outputValue.set(parts[1]);
output.collect(outputKey, outputValue);
}
}
@Override
public void close() throws IOException {
}
}
public static class Combine
implements Reducer<Text, Text, Text, Text> {
private JobConf job;
@Override
public void configure(JobConf job) {
this.job = job;
}
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text,
Text> output,
Reporter reporter) throws IOException {
Text prev = null;
while (values.hasNext()) {
Text t = values.next();
if (!t.equals(prev)) {
output.collect(key, t);
}
prev = ReflectionUtils.copy(job, t, prev);
}
}
@Override
public void close() throws IOException {
}
}
public static void main(String... args) throws Exception {
JobConf job = new JobConf();
job.setJarByClass(CombineJob.class);
Path input = new Path(args[0]);
Path output = new Path(args[1]);
output.getFileSystem(job).delete(output, true);
job.setMapperClass(Map.class);
job.setCombinerClass(Combine.class);
job.setMapOutputKeyClass(Text.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
JobClient.runJob(job);
}
}