package com.mongodb.hadoop.splitter; import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObjectBuilder; import com.mongodb.DBCollection; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.hadoop.input.MongoInputSplit; import com.mongodb.hadoop.util.MongoClientURIBuilder; import com.mongodb.hadoop.util.MongoConfigUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import org.junit.BeforeClass; import org.junit.Test; import java.net.UnknownHostException; import java.util.List; import static com.mongodb.hadoop.splitter.MongoSplitterTestUtils.assertSplitsCount; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; public class StandaloneMongoSplitterTest { private static MongoClientURI uri; private static DBCollection collection; @BeforeClass public static void setUp() { MongoClient client = new MongoClient("localhost", 27017); uri = new MongoClientURIBuilder() .collection("mongo_hadoop", "splitter_test") .build(); collection = client.getDB(uri.getDatabase()).getCollection(uri.getCollection()); collection.drop(); collection.createIndex("value"); for (int i = 0; i < 40000; i++) { collection.insert(new BasicDBObject("_id", i).append("value", i)); } } @Test public void unshardedCollection() throws UnknownHostException, SplitFailedException { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); MongoConfigUtil.setInputURI(config, uri); List<InputSplit> inputSplits = splitter.calculateSplits(); assertFalse("Should find at least one split", inputSplits.isEmpty()); } @Test public void unshardedCollectionMinMax() throws UnknownHostException, SplitFailedException { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); MongoConfigUtil.setInputURI(config, uri); DBObject inputSplitKey = BasicDBObjectBuilder.start("value", 1).get(); MongoConfigUtil.setInputSplitKey(config, inputSplitKey); MongoConfigUtil.setSplitSize(config, 1); List<InputSplit> regularSplits = splitter.calculateSplits(); MongoConfigUtil.setMinSplitKey(config, "{value:100}"); MongoConfigUtil.setMaxSplitKey(config, "{value:39900}"); List<InputSplit> inputSplits = splitter.calculateSplits(); assertTrue("should be fewer splits with min/max set", regularSplits.size() >= inputSplits.size()); } @Test public void testNullBounds() throws Exception { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); MongoInputSplit split = splitter.createSplitFromBounds(null, null); assertEquals(new BasicDBObject(), split.getMin()); assertEquals(new BasicDBObject(), split.getMax()); } @Test public void testNullLowerBound() throws Exception { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); BasicDBObject upperBound = new BasicDBObject("a", 10); MongoInputSplit split = splitter.createSplitFromBounds(null, upperBound); assertEquals(new BasicDBObject(), split.getMin()); assertEquals(10, split.getMax().get("a")); } @Test public void testNullUpperBound() throws Exception { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); BasicDBObject lowerBound = new BasicDBObject("a", 10); MongoInputSplit split = splitter.createSplitFromBounds(lowerBound, null); assertEquals(10, split.getMin().get("a")); assertEquals(new BasicDBObject(), split.getMax()); } @Test public void testLowerUpperBounds() throws Exception { Configuration config = new Configuration(); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); BasicDBObject lowerBound = new BasicDBObject("a", 0); BasicDBObject upperBound = new BasicDBObject("a", 10); MongoInputSplit split = splitter.createSplitFromBounds(lowerBound, upperBound); assertEquals(0, split.getMin().get("a")); assertEquals(10, split.getMax().get("a")); } @Test public void testFilterEmptySplitsNoQuery() throws SplitFailedException { Configuration config = new Configuration(); MongoConfigUtil.setInputURI(config, uri); MongoConfigUtil.setEnableFilterEmptySplits(config, true); MongoConfigUtil.setSplitSize(config, 1); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); List<InputSplit> splits = splitter.calculateSplits(); // No splits should be elided, because there's no query. for (InputSplit split : splits) { assertNotEquals( 0, (((MongoInputSplit) split).getCursor().itcount())); } assertSplitsCount(collection.count(), splits); } @Test public void testFilterEmptySplits() throws SplitFailedException { Configuration config = new Configuration(); DBObject query = new BasicDBObject( "$or", new BasicDBObject[]{ new BasicDBObject("value", new BasicDBObject("$lt", 20000)), new BasicDBObject("value", new BasicDBObject("$gt", 35000))}); MongoConfigUtil.setInputURI(config, uri); MongoConfigUtil.setEnableFilterEmptySplits(config, true); MongoConfigUtil.setQuery(config, query); // 1 MB per document results in 4 splits; the 3rd one is empty per // the above query. MongoConfigUtil.setSplitSize(config, 1); StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config); List<InputSplit> splits = splitter.calculateSplits(); // No splits are empty. for (InputSplit split : splits) { // Cursor is closed on the split, so copy it to create a new one. MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split); assertNotEquals(0, mis.getCursor().itcount()); } assertSplitsCount(collection.count(query), splits); } }