package uk.bl.wa.hadoop.outlinks;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import uk.bl.wa.hadoop.mapreduce.MutableInt;
@SuppressWarnings( { "deprecation" } )
/**
*
*/
public class FrequencyCountingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
@Override
public void reduce( Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter ) throws IOException {
// Use a simple set to collect only distinct results for this key:
Map<String, MutableInt> matches = new HashMap<String, MutableInt>();
while( iterator.hasNext() ) {
String m = iterator.next().toString();
// Get or set up the counter:
MutableInt value = matches.get( m );
if( value == null ) {
value = new MutableInt();
matches.put( m, value );
}
// Increment the counter for this match:
value.inc();
}
// Loop through and collect all distinct matches:
for( String match : matches.keySet() ) {
// This ignores the count:
// output.collect( key, new Text( match ) );
// This also outputs the count:
output.collect( key, new Text( match + "\t" + matches.get( match ).get() ) );
}
}
}