package com.mongodb.hadoop.mapred;
import com.mongodb.hadoop.io.BSONWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import static com.mongodb.hadoop.testutils.BaseHadoopTest.EXAMPLE_DATA_HOME;
import static org.junit.Assert.assertEquals;
public class BSONFileInputFormatTest {
@Test
public void enronEmails() throws IOException {
BSONFileInputFormat inputFormat = new BSONFileInputFormat();
JobConf job = new JobConf();
String inputDirectory =
new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson")
.getAbsoluteFile().toURI().toString();
// Hadoop 2.X
job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
// Hadoop 1.2.X
job.set("mapred.input.dir", inputDirectory);
FileSplit[] splits = inputFormat.getSplits(job, 5);
int count = 0;
BSONWritable writable = new BSONWritable();
for (FileSplit split : splits) {
RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
while (recordReader.next(null, writable)) {
count++;
}
}
assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
}