package com.mongodb.hadoop.examples.enron; import com.mongodb.hadoop.io.BSONWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Mapper; import org.bson.BSONObject; import java.io.IOException; public class EnronMailMapper extends Mapper<Object, BSONObject, MailPair, IntWritable> implements org.apache.hadoop.mapred.Mapper<Object, BSONWritable, MailPair, IntWritable> { private final IntWritable intw; private final MailPair mp; public EnronMailMapper() { super(); intw = new IntWritable(1); mp = new MailPair(); } @Override public void map(final Object key, final BSONObject val, final Context context) throws IOException, InterruptedException { BSONObject headers = (BSONObject) val.get("headers"); String to = (String) headers.get("To"); if (null != to) { String[] recipients = to.split(","); for (final String recip1 : recipients) { String recip = recip1.trim(); if (recip.length() > 0) { mp.setFrom((String) key); mp.setTo(recip); context.write(mp, intw); } } } } @Override public void map(final Object key, final BSONWritable writable, final OutputCollector<MailPair, IntWritable> output, final Reporter reporter) throws IOException { BSONObject headers = (BSONObject) writable.getDoc().get("headers"); String to = (String) headers.get("To"); String from = (String) headers.get("From"); if (null != to) { String[] recipients = to.split(","); for (final String recip1 : recipients) { String recip = recip1.trim(); if (recip.length() > 0) { mp.setFrom(from); mp.setTo(recip); output.collect(mp, intw); } } } } @Override public void close() throws IOException { } @Override public void configure(final JobConf job) { } }