package com.manning.hip.ch3.binary;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public final class CustomBinaryMapReduce {
public static class Map extends Mapper<LongWritable, BytesWritable,
LongWritable, LongWritable> {
@Override
protected void map(LongWritable key, BytesWritable value,
Context context)
throws
IOException, InterruptedException {
int len = value.getLength();
context.write(
key,
new LongWritable(len));
}
}
public static void main(String... args) throws Exception {
runJob(args[0], args[1]);
}
public static void runJob(String input,
String output)
throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(CustomBinaryMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map.class);
job.setInputFormatClass(CustomBinaryInputFormat.class);
job.setNumReduceTasks(0);
job.setMapOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path(input));
Path outPath = new Path(output);
FileOutputFormat.setOutputPath(job, outPath);
outPath.getFileSystem(conf).delete(outPath, true);
job.waitForCompletion(true);
}
}