package com.mongodb.hadoop.pig; import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObjectBuilder; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.client.ListIndexesIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import com.mongodb.hadoop.testutils.BaseHadoopTest; import org.apache.pig.tools.parameters.ParseException; import org.bson.Document; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.UUID; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; public class PigTest extends BaseHadoopTest { private static final MongoClientURI URI = new MongoClientURI("mongodb://localhost:27017/mongo_hadoop.pigtests"); private MongoClient mongoClient; private DB db; @Before public void setup() throws UnknownHostException { mongoClient = new MongoClient(URI); db = mongoClient.getDB("mongo_hadoop"); db.dropDatabase(); } @After public void tearDown() { db.dropDatabase(); mongoClient.close(); } public void runMongoUpdateStorageTest( final String scriptName, final String[] expected) throws IOException, ParseException { runMongoUpdateStorageTest(scriptName, expected, "results"); } public void runMongoUpdateStorageTest( final String scriptName, final String[] expected, final String alias) throws IOException, ParseException { org.apache.pig.pigunit.PigTest pigTest = new org.apache.pig.pigunit .PigTest(getClass().getResource(scriptName).getPath()); // Let the STORE statement do its job so we can test MongoUpdateStorage. pigTest.unoverride("STORE"); pigTest.assertOutput(alias, expected); } public static void runScript(final String scriptName) throws IOException, ParseException { org.apache.pig.pigunit.PigTest pigTest = new org.apache.pig.pigunit .PigTest(PigTest.class.getResource(scriptName).getPath()); pigTest.unoverride("STORE"); pigTest.runScript(); } private boolean indexExists( final MongoCollection<Document> collection, final String indexName) { ListIndexesIterable<Document> indexes = collection.listIndexes(); for (Document indexSpec : indexes) { String idxName = (String) indexSpec.get("name"); if (idxName.equals(indexName)) { return true; } } return false; } @Test public void mongoUpdateStorage() throws IOException, ParseException { runMongoUpdateStorageTest( "/pig/update_simple_mus.pig", new String[]{ "(Daniel,Alabi,([car#a],[car#b],[car#c],[car#a],[car#b],[car#c]))", "(Tolu,Alabi,([car#d],[car#e],[car#f],[car#d],[car#e],[car#f]))", "(Tinuke,Dada,([car#g],[car#g]))" } ); } @Test public void mongoUpdateStorageMulti() throws IOException, ParseException { runMongoUpdateStorageTest( "/pig/update_age_alabis_mus.pig", new String[]{ "(Daniel,Alabi,22.0)", "(Tolu,Alabi,24.0)", "(Tinuke,Dada,53.0)" } ); } @Test public void testPigUUID() throws IOException, ParseException { UUID uuid = UUID.randomUUID(); BasicDBObject doc = new BasicDBObject("uuid", uuid); db.getCollection("uuid_test").insert(doc); org.apache.pig.pigunit.PigTest test = new org.apache.pig.pigunit.PigTest( getClass().getResource("/pig/pig_uuid.pig").getPath()); test.assertOutput(new String[]{"(" + uuid.toString() + ")"}); } @Test public void testDates() throws IOException, ParseException { mongoClient .getDatabase(URI.getDatabase()) .getCollection(URI.getCollection()).insertOne(new Document( "today", new Date())); MongoCollection<Document> outputCollection = mongoClient .getDatabase("mongo_hadoop") .getCollection("datetests"); PigTest.runScript("/pig/datestest.pig"); for (Document doc : outputCollection.find()) { Object today = doc.get("today"); assertTrue( "Expected a Date, but got a " + today.getClass().getName(), today instanceof Date); } } @Test public void testPigProjection() throws IOException, ParseException { DBCollection collection = mongoClient .getDB("mongo_hadoop").getCollection("projection_test"); String[] expected = new String[100]; for (int i = 0; i < expected.length; ++i) { String letter = String.valueOf((char) ('a' + (i % 26))); // {"_id": ObjectId(...), "i": <int>, // "d": {"s": <string>, "j": <int>, "k": <int>}} collection.insert( new BasicDBObjectBuilder() .add("i", i).push("d") .add("s", letter) .add("j", i + 1) .add("k", i % 5).pop().get()); expected[i] = "(" + i + "," + letter + "," + i % 5 + ")"; } org.apache.pig.pigunit.PigTest test = new org.apache.pig.pigunit.PigTest( getClass().getResource("/pig/projection.pig").getPath()); test.assertOutput(expected); } @Test public void testPigBSONOutput() throws IOException, ParseException { runMongoUpdateStorageTest( "/pig/bson_test.pig", new String[]{ "(Daniel,Alabi,19.0)", "(Tolu,Alabi,21.0)", "(Tinuke,Dada,50.0)" }, "persons_read" ); } @Test public void testPigSchemaless() throws IOException, ParseException { // Seed data used by "schemaless.pig" MongoDatabase db = mongoClient.getDatabase("mongo_hadoop"); List<Document> documents = new ArrayList<Document>(1000); for (int i = 0; i < 1000; ++i) { documents.add(new Document("_id", i)); } db.getCollection("pig.schemaless").insertMany(documents); runScript("/pig/schemaless.pig"); assertEquals(1000, db.getCollection("pig.schemaless.out").count()); assertNotNull( db.getCollection("pig.schemaless.out").find( new Document("_id", 999)).first()); } @Test public void testPigSchemalessFromBSON() throws IOException, ParseException { runMongoUpdateStorageTest( "/pig/bson_schemaless.pig", new String[]{ "(Daniel,Alabi,19.0)", "(Tolu,Alabi,21.0)", "(Tinuke,Dada,50.0)" } ); } @Test public void testMongoStorageEnsureIndex() throws IOException, ParseException { runScript("/pig/ensure_index.pig"); MongoClient client = new MongoClient("localhost:27017"); // There should be an index on the "last" field, ascending. MongoCollection<Document> coll = client.getDatabase("mongo_hadoop") .getCollection("ensure_indexes"); assertTrue("Should have the index \"last_1\"", indexExists(coll, "last_1")); // Drop the index. coll.dropIndex("last_1"); // Run the second pig script, which ensures a different index. runScript("/pig/ensure_index_2.pig"); assertTrue("Should have the index \"first_1\"", indexExists(coll, "first_1")); assertFalse("Should not have the index \"last_1\"", indexExists(coll, "last_1")); } @Test public void testPigUpdateReplace() throws IOException, ParseException { DBCollection replaceCollection = db.getCollection("replace_test"); for (int i = 0; i < 10; ++i) { replaceCollection.insert(new BasicDBObject("i", i)); } runScript("/pig/replace_mus.pig"); DBCursor cursor = replaceCollection.find().sort(new BasicDBObject("i", 1)); for (int i = 1; i <= 10; ++i) { assertEquals(i, cursor.next().get("i")); } } }