package com.mongodb.hadoop.pig;
import com.mongodb.DBRef;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection;
import org.apache.pig.tools.parameters.ParseException;
import org.bson.Document;
import org.bson.types.Binary;
import org.bson.types.MaxKey;
import org.bson.types.MinKey;
import org.bson.types.ObjectId;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class UDFTest {
private static final MongoClient CLIENT = new MongoClient(
new MongoClientURI("mongodb://localhost:27017/mongo_hadoop"));
private static final MongoCollection<Document> INPUT_COLLECTION =
CLIENT.getDatabase("mongo_hadoop").getCollection("udftest.input");
private static final MongoCollection<Document> OUTPUT_COLLECTION =
CLIENT.getDatabase("mongo_hadoop").getCollection("udftest.output");
private static List<Document> insertedDocuments;
@Before
public void setUp() {
OUTPUT_COLLECTION.drop();
}
@BeforeClass
public static void setUpClass() {
INPUT_COLLECTION.drop();
insertedDocuments = new ArrayList<Document>(100);
for (int i = 0; i < 100; ++i) {
ObjectId id = new ObjectId();
insertedDocuments.add(
new Document("_id", id)
.append("minkey", new MinKey())
.append("maxkey", new MaxKey())
.append("dbref", new DBRef("othercollection", new ObjectId()))
.append("binary", new Binary(new byte[]{1, 2, 3, 4, 5}))
.append("oidBytes", new Binary(id.toByteArray())));
}
INPUT_COLLECTION.insertMany(insertedDocuments);
}
@AfterClass
public static void tearDownClass() {
INPUT_COLLECTION.drop();
OUTPUT_COLLECTION.drop();
}
@Test
public void testAsObjectId() throws IOException, ParseException {
PigTest.runScript("/pig/toobjectid.pig");
assertEquals(insertedDocuments.size(), OUTPUT_COLLECTION.count());
Iterator<Document> it = insertedDocuments.iterator();
for (Document outputDoc : OUTPUT_COLLECTION.find()) {
ObjectId expectedId = it.next().getObjectId("_id");
assertEquals(expectedId, outputDoc.get("_id"));
assertEquals(expectedId, outputDoc.get("otherid"));
}
}
@Test
public void testAsBinary() throws IOException, ParseException {
PigTest.runScript("/pig/tobinary.pig");
for (Document doc : OUTPUT_COLLECTION.find()) {
Object binary = doc.get("binary");
assertTrue(binary instanceof Binary);
assertArrayEquals(
new byte[]{1, 2, 3, 4, 5},
((Binary) binary).getData());
}
}
@Test
public void testAsDBRef() throws IOException, ParseException {
PigTest.runScript("/pig/todbref.pig");
assertEquals(insertedDocuments.size(), OUTPUT_COLLECTION.count());
Iterator<Document> it = insertedDocuments.iterator();
for (Document outputDoc : OUTPUT_COLLECTION.find()) {
assertEquals(it.next().get("dbref"), outputDoc.get("dbref"));
}
}
@Test
public void testMinMaxKey() throws IOException, ParseException {
PigTest.runScript("/pig/genminmaxkeys.pig");
for (Document doc : OUTPUT_COLLECTION.find()) {
assertTrue(doc.get("newMin") instanceof MinKey);
assertTrue(doc.get("newMax") instanceof MaxKey);
}
}
@Test
public void testObjectIdToSeconds() throws IOException, ParseException {
PigTest.runScript("/pig/oidtoseconds.pig");
assertEquals(insertedDocuments.size(), OUTPUT_COLLECTION.count());
Iterator<Document> it = insertedDocuments.iterator();
for (Document outputDoc : OUTPUT_COLLECTION.find()) {
int seconds = outputDoc.getInteger("seconds");
int seconds2 = outputDoc.getInteger("seconds2");
int expectedSeconds = it.next().getObjectId("_id").getTimestamp();
assertEquals(expectedSeconds, seconds);
assertEquals(expectedSeconds, seconds2);
}
}
@Test
public void testUDFsSchemaless() throws IOException, ParseException {
// Test that one of our UDFs can work without any schemas being
// specified. This mostly tests that BSONStorage can infer the type
// correctly.
PigTest.runScript("/pig/udfschemaless.pig");
assertEquals(insertedDocuments.size(), OUTPUT_COLLECTION.count());
Iterator<Document> it = insertedDocuments.iterator();
for (Document doc : OUTPUT_COLLECTION.find()) {
// We don't know what Pig will call the fields that aren't "_id".
ObjectId expectedId = it.next().getObjectId("_id");
for (Map.Entry<String, Object> entry : doc.entrySet()) {
// _id field contains a different ObjectId than the one we're
// interested in.
if ("_id".equals(entry.getKey())) {
continue;
}
assertEquals(expectedId, entry.getValue());
}
}
}
}