package com.mongodb.hadoop.splitter;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection;
import com.mongodb.hadoop.input.MongoInputSplit;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.bson.Document;
import org.junit.Before;
import org.junit.Test;
import java.util.List;
import static com.mongodb.hadoop.splitter.MongoSplitterTestUtils.assertSplitRange;
import static com.mongodb.hadoop.splitter.MongoSplitterTestUtils.assertSplitsCount;
import static org.junit.Assert.assertEquals;
public class MongoPaginatingSplitterTest {
private static MongoCollection<Document> collection;
private static MongoClientURI uri;
@Before
public void setUp() {
uri = new MongoClientURI(
"mongodb://localhost:27017/mongo_hadoop.pag_split_test");
MongoClient client = new MongoClient("localhost", 27017);
collection =
client.getDatabase("mongo_hadoop").getCollection("pag_split_test");
collection.drop();
for (int i = 0; i < 40000; ++i) {
collection.insertOne(new Document("_id", i).append("value", i));
}
}
@Test
public void testQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
DBObject query = new BasicDBObject(
"$or", new BasicDBObject[]{
new BasicDBObject("value", new BasicDBObject("$lt", 25000)),
new BasicDBObject("value", new BasicDBObject("$gte", 31000))});
MongoConfigUtil.setQuery(conf, query);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(7, splits.size());
assertSplitRange((MongoInputSplit) splits.get(0), null, 5000);
assertSplitRange((MongoInputSplit) splits.get(1), 5000, 10000);
assertSplitRange((MongoInputSplit) splits.get(2), 10000, 15000);
assertSplitRange((MongoInputSplit) splits.get(3), 15000, 20000);
assertSplitRange((MongoInputSplit) splits.get(4), 20000, 31000);
assertSplitRange((MongoInputSplit) splits.get(5), 31000, 36000);
assertSplitRange((MongoInputSplit) splits.get(6), 36000, null);
// 6000 documents excluded by query.
assertSplitsCount(collection.count() - 6000, splits);
}
@Test
public void testNoQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(8, splits.size());
for (int i = 0; i < splits.size(); ++i) {
Integer min = i == 0 ? null : i * 5000;
Integer max = i == splits.size() - 1 ? null : (i + 1) * 5000;
assertSplitRange((MongoInputSplit) splits.get(i), min, max);
}
assertSplitsCount(collection.count(), splits);
}
}