WordCount.java example

Explorer
tap-master
package thinkbig;


import java.util.StringTokenizer;

import org.apache.hadoop.util.GenericOptionsParser;

import tap.CommandOptions;
import tap.Pipe;
import tap.Tap;
import tap.TapMapper;
import tap.TapReducer;


import thinkbig.examples.messages.*;


public class WordCount {

	public static void main(String[] args) throws Exception {
	
		
		
		 CommandOptions o = new CommandOptions(args);
    	 
	        /* Set up a basic pipeline of map reduce */
	        Tap wordcount = new Tap(o).named("wordcount");
	        /* Parse options - just use the standard options - input and output location, time window, etc. */
	       
	        if (o.input == null) {
	            System.err.println("Must specify input directory");
	            return;
	        }
	        if (o.output == null) {
	            System.err.println("Must specify output directory");
	            return;
	        }

	        wordcount.getConf().setJarByClass(WordCount.class);
	        
	        wordcount.createPhase().reads(o.input).writes(o.output).map(Mapper.class).
	            groupBy("word").reduce(Reducer.class);

	        wordcount.make();
	    }

	   
	    

	    public static class Mapper extends TapMapper<String,Wordcountmsg.WordCountRecord> {
	    	private Wordcountmsg.WordCountRecord outrec;
	    	
	        @Override
	        public void map(String line, Pipe<Wordcountmsg.WordCountRecord> out) {
	            StringTokenizer tokenizer = new StringTokenizer(line);
	            while (tokenizer.hasMoreTokens()) {
	            	outrec = Wordcountmsg.WordCountRecord.newBuilder().setWord(tokenizer.nextToken()).setCount(1).build();
	                out.put(outrec);
	            }
	        }        
	    }

	    public static class Reducer extends TapReducer<Wordcountmsg.WordCountRecord,Wordcountmsg.WordCountRecord> {
	    	
	    	private Wordcountmsg.WordCountRecord outrec;
	    	
	        @Override
	        public void reduce(Pipe<Wordcountmsg.WordCountRecord> in, Pipe<Wordcountmsg.WordCountRecord> out) {
	            int count = 0;
	            String word = null;
	            for (Wordcountmsg.WordCountRecord rec : in) {
	            	word = rec.getWord();
	            	count++;
	            }
	            outrec = Wordcountmsg.WordCountRecord.newBuilder().setWord(word).setCount(count).build();
	            out.put(outrec);
	            System.out.println(outrec.toString());
	        }
	        
	    }
	}