package com.ikai.mapperdemo.mappers;
import java.util.logging.Logger;
import org.apache.hadoop.io.NullWritable;
import com.google.appengine.api.datastore.Entity;
import com.google.appengine.api.datastore.Key;
import com.google.appengine.tools.mapreduce.AppEngineMapper;
/**
*
* This mapper counts the number of total words across all comments. It cheats a
* bit by just splitting on whitespace and just using the length. This mapper
* demonstrates use of counters as well as using a completion callback.
*
* @author Ikai Lan
*
*/
public class CountWordsMapper extends
AppEngineMapper<Key, Entity, NullWritable, NullWritable> {
private static final Logger log = Logger.getLogger(CountWordsMapper.class
.getName());
/*
* This is a bit of a lazy implementation more to prove a point than to
* actually be correct. Split on whitespace, count words
*/
@Override
public void map(Key key, Entity value, Context context) {
String comment = (String) value.getProperty("comment");
if (comment != null) {
String[] words = comment.split("\\s+");
int wordCount = words.length;
// Takes a "group" and a "counter"
// We'll use these later to store the final count back in the
// datastore
context.getCounter("CommentWords", "count").increment(wordCount);
}
}
}