package com.mongodb.hadoop.splitter; import com.mongodb.BasicDBObjectBuilder; import com.mongodb.DBCollection; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.hadoop.input.MongoInputSplit; import com.mongodb.hadoop.testutils.BaseHadoopTest; import com.mongodb.hadoop.util.MongoConfigUtil; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.util.ArrayList; import java.util.List; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeFalse; import static org.junit.Assume.assumeTrue; public class SampleSplitterTest extends BaseHadoopTest { private SampleSplitter splitter = new SampleSplitter(); private static MongoClient client = new MongoClient("localhost:27017"); private static MongoClientURI uri = new MongoClientURI( "mongodb://localhost:27017/mongo_hadop.sample_splitter"); @BeforeClass public static void setUpClass() { DBCollection inputCollection = client.getDB(uri.getDatabase()) .getCollection(uri.getCollection()); // Fill up with 10MB. Average object size is just over 2KB. StringBuilder paddingBuilder = new StringBuilder(); for (int i = 0; i < 2048; ++i) { paddingBuilder.append("-"); } String padding = paddingBuilder.toString(); List<DBObject> documents = new ArrayList<DBObject>(); for (int i = 0; i < 10 * 512; i++) { documents.add( new BasicDBObjectBuilder() .add("_id", i) .add("i", i) .add("padding", padding).get()); } inputCollection.insert(documents); } @AfterClass public static void tearDownClass() { client.dropDatabase(uri.getDatabase()); } @Test public void testCalculateSplits() throws SplitFailedException { assumeTrue(isSampleOperatorSupported(uri)); Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, uri.getURI()); MongoConfigUtil.setSplitSize(conf, 1); splitter.setConfiguration(conf); List<InputSplit> splits = splitter.calculateSplits(); assertEquals(12, splits.size()); MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0); assertTrue(firstSplit.getMin().toMap().isEmpty()); MongoInputSplit lastSplit = (MongoInputSplit) splits.get(11); assertTrue(lastSplit.getMax().toMap().isEmpty()); // Ranges for splits are ascending. int lastKey = (Integer) firstSplit.getMax().get("_id"); for (int i = 1; i < splits.size() - 1; i++) { MongoInputSplit split = (MongoInputSplit) splits.get(i); int currentKey = (Integer) split.getMax().get("_id"); assertTrue(currentKey > lastKey); lastKey = currentKey; } } @Test public void testAllOnOneSplit() throws SplitFailedException { assumeTrue(isSampleOperatorSupported(uri)); Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, uri.getURI()); // Split size is enough to encapsulate all documents. MongoConfigUtil.setSplitSize(conf, 12); splitter.setConfiguration(conf); List<InputSplit> splits = splitter.calculateSplits(); assertEquals(1, splits.size()); MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0); assertTrue(firstSplit.getMin().toMap().isEmpty()); assertTrue(firstSplit.getMax().toMap().isEmpty()); } @Test public void testAlternateSplitKey() throws SplitFailedException { assumeTrue(isSampleOperatorSupported(uri)); Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, uri.getURI()); MongoConfigUtil.setSplitSize(conf, 1); MongoConfigUtil.setInputSplitKeyPattern(conf, "{\"i\": 1}"); splitter.setConfiguration(conf); List<InputSplit> splits = splitter.calculateSplits(); assertEquals(12, splits.size()); MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0); assertTrue(firstSplit.getMin().toMap().isEmpty()); MongoInputSplit lastSplit = (MongoInputSplit) splits.get(11); assertTrue(lastSplit.getMax().toMap().isEmpty()); // Ranges for splits are ascending. int lastKey = (Integer) firstSplit.getMax().get("i"); for (int i = 1; i < splits.size() - 1; i++) { MongoInputSplit split = (MongoInputSplit) splits.get(i); int currentKey = (Integer) split.getMax().get("i"); assertTrue(currentKey > lastKey); lastKey = currentKey; } } @Test public void testSampleSplitterOldMongoDB() { assumeFalse(isSampleOperatorSupported(uri)); Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, uri.getURI()); MongoConfigUtil.setSplitSize(conf, 1); splitter.setConfiguration(conf); try { splitter.calculateSplits(); Assert.fail( "MongoDB < 3.2 should throw SplitFailedException should fail to" + " use SampleSplitter."); } catch (SplitFailedException e) { // Good. } } }