package com.manning.hip.ch3.binary; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public final class CustomBinaryMapReduce { public static class Map extends Mapper<LongWritable, BytesWritable, LongWritable, LongWritable> { @Override protected void map(LongWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { int len = value.getLength(); context.write( key, new LongWritable(len)); } } public static void main(String... args) throws Exception { runJob(args[0], args[1]); } public static void runJob(String input, String output) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(CustomBinaryMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(CustomBinaryInputFormat.class); job.setNumReduceTasks(0); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); job.waitForCompletion(true); } }