package hip.ch4;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class CompressedMapReduce {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path inputFile = new Path(args[0]);
Path outputFile = new Path(args[1]);
FileSystem hdfs = outputFile.getFileSystem(conf);
hdfs.delete(outputFile, true);
Class<?> codecClass = Class.forName(args[2]);
conf.setBoolean("mapred.output.compress", true);
conf.setClass("mapred.output.compression.codec",
codecClass,
CompressionCodec.class);
conf.setBoolean("mapred.compress.map.output", true);
conf.setClass("mapred.map.output.compression.codec",
codecClass,
CompressionCodec.class);
Job job = new Job(conf);
job.setJarByClass(CompressedMapReduce.class);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, inputFile);
FileOutputFormat.setOutputPath(job, outputFile);
job.waitForCompletion(true);
}
}