package com.mongodb.hadoop; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBObject; import com.mongodb.MongoClientURI; import com.mongodb.ReadPreference; import com.mongodb.WriteConcern; import com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfig; import com.mongodb.hadoop.mapred.output.MongoOutputCommitter; import com.mongodb.hadoop.testutils.MapReduceJob; import com.mongodb.hadoop.util.MongoClientURIBuilder; import org.junit.Test; import java.util.List; import static com.mongodb.hadoop.util.MongoConfigUtil.INPUT_MONGOS_HOSTS; import static com.mongodb.hadoop.util.MongoConfigUtil.INPUT_QUERY; import static com.mongodb.hadoop.util.MongoConfigUtil.SPLITS_SLAVE_OK; import static com.mongodb.hadoop.util.MongoConfigUtil.SPLITS_USE_CHUNKS; import static com.mongodb.hadoop.util.MongoConfigUtil.SPLITS_USE_RANGEQUERY; import static com.mongodb.hadoop.util.MongoConfigUtil.SPLITS_USE_SHARDS; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; public class TestSharded extends BaseShardedTest { @Test public void testBasicInputSource() { MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .inputUris(getInputUri()) .outputUri(getOutputUri()); if (isHadoopV1()) { job.outputCommitter(MongoOutputCommitter.class); } job.execute(isRunTestInVm()); compareResults(getMongos().getDB("mongo_hadoop") .getCollection("yield_historical.out"), getReference()); } @Test public void testMultiMongos() { MongoClientURI outputUri = getOutputUri(); MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .param(INPUT_MONGOS_HOSTS, "localhost:27017 localhost:27018") .inputUris(getInputUri()) .outputUri(outputUri); if (isHadoopV1()) { job.outputCommitter(MongoOutputCommitter.class); } job.execute(isRunTestInVm()); compareResults(getMongos().getDB(outputUri.getDatabase()) .getCollection(outputUri.getCollection()), getReference()); } @Test public void testRangeQueries() { DBCollection collection = getMongos().getDB(getOutputUri().getDatabase()).getCollection(getOutputUri().getCollection()); collection.drop(); MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .inputUris(getInputUri()) .outputUri(getOutputUri()) .param(SPLITS_USE_RANGEQUERY, "true"); if (isHadoopV1()) { job.outputCommitter(MongoOutputCommitter.class); } job.execute(isRunTestInVm()); compareResults(collection, getReference()); collection.drop(); job.param(INPUT_QUERY, "{\"_id\":{\"$gt\":{\"$date\":1182470400000}}}").execute(isRunTestInVm()); // Make sure that this fails when rangequery is used with a query that conflicts assertFalse("This collection shouldn't exist because of the failure", getMongos().getDB("mongo_hadoop").getCollectionNames().contains("yield_historical.out")); } public void testDirectAccess() { DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out"); collection.drop(); // HADOOP61 - simulate a failed migration by having some docs from one chunk // also exist on another shard who does not own that chunk(duplicates) DB config = getMongos().getDB("config"); DBObject chunk = config.getCollection("chunks").findOne(new BasicDBObject("shard", "sh01")); DBObject query = new BasicDBObject("_id", new BasicDBObject("$gte", ((DBObject) chunk.get("min")).get("_id")) .append("$lt", ((DBObject) chunk.get("max")).get("_id"))); List<DBObject> data = toList(getMongos().getDB("mongo_hadoop").getCollection("yield_historical.in").find(query)); DBCollection destination = getShard().getDB("mongo_hadoop").getCollection("yield_historical.in"); for (DBObject doc : data) { destination.insert(doc, WriteConcern.UNACKNOWLEDGED); } MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .param(SPLITS_SLAVE_OK, "true") .param(SPLITS_USE_SHARDS, "true") .param(SPLITS_USE_CHUNKS, "false") .inputUris( new MongoClientURIBuilder(getInputUri()) .readPreference(ReadPreference.secondary()).build()); if (isHadoopV1()) { job.outputCommitter(MongoOutputCommitter.class); } job.execute(isRunTestInVm()); compareResults(collection, getReference()); collection.drop(); MapReduceJob jobWithChunks = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build()) .param(SPLITS_SLAVE_OK, "true") .param(SPLITS_USE_SHARDS, "true") .param(SPLITS_USE_CHUNKS, "true"); if (isHadoopV1()) { jobWithChunks.outputCommitter(MongoOutputCommitter.class); } jobWithChunks.execute(isRunTestInVm()); compareResults(collection, getReference()); } @Test public void testShardedClusterWithGtLtQueryFormats() { DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out"); collection.drop(); MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()) .jar(JOBJAR_PATH) .inputUris(getInputUri()) .outputUri(getOutputUri()) .param(SPLITS_USE_RANGEQUERY, "true"); if (isHadoopV1()) { job.outputCommitter(MongoOutputCommitter.class); } job.execute(isRunTestInVm()); compareResults(collection, getReference()); collection.drop(); job.param(INPUT_QUERY, "{\"_id\":{\"$gt\":{\"$date\":1182470400000}}}") .inputUris(getInputUri()) .execute(isRunTestInVm()); // Make sure that this fails when rangequery is used with a query that conflicts assertEquals(0, collection.count()); } }