package demo.cards.drivers; /** * Created with IntelliJ IDEA. * User: cloudera * Date: 9/22/13 * Time: 1:27 PM * To change this template use File | Settings | File Templates. */ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import demo.cards.parsers.CardParser; import java.io.IOException; /* This will demonstrate how we should design and develop efficient map reduce programs * select suit, count(1) from deck where pip='J' and color='RED' group by suit; * You will learn using fs package to eliminate unnecessary directories, significance * of setup method in map reduce life cycle as well as passing parameters from command line to * map/reduce functions. * * 1) Understand requirements * 2) Design directories and files (partition by pip) * 3) Eliminate unnecessary directories as part of job configuration using org.apache.hadoop.fs * 4) Pass on color as parameter to map function and filter out all non RED colored cards * 5) Review counters * * Usage: hadoop jar performance_tuning.jar * demo.cards.drivers.CardCountBySuitFilteredByPipAndColor * /user/hduser/cards /user/hduser/output.cards * J RED */ public class CardCountBySuitFilteredByPipAndColor extends Configured implements Tool { public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private CardParser parser = new CardParser(); private String suit; private Configuration jobconf = null; public void setup(Context context) throws IOException, InterruptedException { this.jobconf = context.getConfiguration(); } public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException { parser.parse(value.toString()); String param = jobconf.get("param.color"); if (parser.getColor().equals(param)) { suit = parser.getSuit(); output.write(new Text(suit), one); } } } public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context output) throws IOException, InterruptedException { int sum = 0; while (values.iterator().hasNext()) { sum += values.iterator().next().get(); } output.write(key, new IntWritable(sum)); } } public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); conf.set("param.color", args[3]); Job job = Job.getInstance(conf, "select suit, count(1) from deck " + "where pip='J' and color='RED' group by suit"); job.setJarByClass(getClass()); String pip = args[2]; Path inputPath = new Path(args[0] + "/" + "pip=" + pip); FileInputFormat.setInputPaths(job, inputPath); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); // job.setGroupingComparatorClass(TextPair.Comparator.class); // job.setSortComparatorClass(); // job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(2); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run( new CardCountBySuitFilteredByPipAndColor(), args); System.exit(exitCode); } }