package com.mongodb.hadoop.pig;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.hadoop.input.MongoRecordReader;
import com.mongodb.hadoop.util.MongoConfigUtil;
import com.mongodb.util.JSON;
import org.apache.pig.LoadPushDown;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.UDFContext;
import org.bson.types.Binary;
import org.joda.time.DateTime;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class MongoLoaderTest {
@Test
@SuppressWarnings("unchecked")
public void testBinaryNoSchema() throws IOException {
byte[] data = new byte[] {1, 2, 3};
BasicDBObject obj = new BasicDBObject("bytes", new Binary(data));
MongoRecordReader rr = mock(MongoRecordReader.class);
when(rr.nextKeyValue()).thenReturn(true);
when(rr.getCurrentValue()).thenReturn(obj);
// No explicit schema.
MongoLoader ml = new MongoLoader();
ml.prepareToRead(rr, null);
Tuple result = ml.getNext();
// Tuple just contains a Map.
Map<String, Object> tupleContents;
tupleContents = (Map<String, Object>) result.get(0);
// Map contains DataByteArray with binary data.
assertArrayEquals(
data, ((DataByteArray) tupleContents.get("bytes")).get());
}
@Test
@SuppressWarnings("unchecked")
public void testByteArrayNoSchema() throws IOException {
byte[] data = new byte[] {1, 2, 3};
BasicDBObject obj = new BasicDBObject("bytes", data);
MongoRecordReader rr = mock(MongoRecordReader.class);
when(rr.nextKeyValue()).thenReturn(true);
when(rr.getCurrentValue()).thenReturn(obj);
// No explicit schema.
MongoLoader ml = new MongoLoader();
ml.prepareToRead(rr, null);
Tuple result = ml.getNext();
// Tuple just contains a Map.
Map<String, Object> tupleContents;
tupleContents = (Map<String, Object>) result.get(0);
// Map contains DataByteArray with binary data.
assertArrayEquals(
data, ((DataByteArray) tupleContents.get("bytes")).get());
}
@Test
public void testSimpleBytearray() throws IOException {
byte[] data = new byte[] {1, 2, 3};
String userSchema = "d:bytearray";
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(data, ml.getFields()[0]);
assertArrayEquals(data, ((DataByteArray) result).get());
}
@Test
public void testSimpleBinary() throws IOException {
byte[] data = new byte[] {1, 2, 3};
String userSchema = "d:bytearray";
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(
new Binary(data), ml.getFields()[0]);
assertArrayEquals(data, ((DataByteArray) result).get());
}
@Test
public void testSimpleChararray() throws IOException {
String userSchema = "d:chararray";
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField("value", ml.getFields()[0]);
assertEquals("value", result);
}
@Test
public void testSimpleFloat() throws IOException {
String userSchema = "d:float";
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(1.1F, ml.getFields()[0]);
assertEquals(1.1F, result);
}
@Test
public void testSimpleFloatAsDouble() throws IOException {
String userSchema = "d:float";
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(1.1D, ml.getFields()[0]);
assertEquals(1.1F, result);
}
@Test
public void testSimpleDate() throws IOException {
String userSchema = "d:datetime";
MongoLoader ml = new MongoLoader(userSchema);
Calendar calendar = Calendar.getInstance();
Date in = calendar.getTime();
DateTime out = new DateTime(in);
Object result = BSONLoader.readField(in, ml.getFields()[0]);
assertEquals(out, result);
}
@Test
public void testSimpleTuple() throws IOException {
String userSchema = "t:tuple(t1:chararray, t2:chararray)";
Object val = new BasicDBObject()
.append("t1", "t1_value")
.append("t2", "t2_value");
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(val, ml.getFields()[0]);
Tuple t = (Tuple) result;
assertEquals(2, t.size());
assertEquals("t1_value", t.get(0));
assertEquals("t2_value", t.get(1));
}
@Test
public void testSimpleTupleMissingField() throws IOException {
String userSchema = "t:tuple(t1:chararray, t2:chararray, t3:chararray)";
Object val = new BasicDBObject()
.append("t1", "t1_value")
.append("t2", "t2_value");
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(val, ml.getFields()[0]);
Tuple t = (Tuple) result;
assertEquals(3, t.size());
assertEquals("t1_value", t.get(0));
assertEquals("t2_value", t.get(1));
assertNull(t.get(2));
}
@Test
public void testSimpleTupleIncorrectFieldType() throws IOException {
String userSchema = "t:tuple(t1:chararray, t2:float)";
Object val = new BasicDBObject()
.append("t1", "t1_value")
.append("t2", "t2_value");
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(val, ml.getFields()[0]);
Tuple t = (Tuple) result;
assertEquals(2, t.size());
assertEquals("t1_value", t.get(0));
assertNull(t.get(1));
}
@Test
public void testSimpleBag() throws IOException {
String userSchema = "b:{t:tuple(t1:chararray, t2:chararray)}";
BasicDBList bag = new BasicDBList();
bag.add(new BasicDBObject()
.append("t1", "t11_value")
.append("t2", "t12_value"));
bag.add(new BasicDBObject()
.append("t1", "t21_value")
.append("t2", "t22_value"));
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(bag, ml.getFields()[0]);
DataBag b = (DataBag) result;
Iterator<Tuple> bit = b.iterator();
Tuple firstInnerT = bit.next();
assertEquals(2, firstInnerT.size());
assertEquals("t11_value", firstInnerT.get(0));
assertEquals("t12_value", firstInnerT.get(1));
Tuple secondInnerT = bit.next();
assertEquals(2, secondInnerT.size());
assertEquals("t21_value", secondInnerT.get(0));
assertEquals("t22_value", secondInnerT.get(1));
assertFalse(bit.hasNext());
}
@Test
public void testBagThatIsNotABag() throws IOException {
String userSchema = "b:{t:tuple(t1:chararray, t2:chararray)}";
BasicDBObject notABag = new BasicDBObject();
notABag.append("f1", new BasicDBObject()
.append("t1", "t11_value")
.append("t2", "t12_value"));
notABag.append("f2", new BasicDBObject()
.append("t1", "t21_value")
.append("t2", "t22_value"));
MongoLoader ml = new MongoLoader(userSchema);
Object result = BSONLoader.readField(notABag, ml.getFields()[0]);
assertNull(result);
}
@Test
public void testDeepness() throws IOException {
String userSchema = "b:{t:tuple(t1:chararray, b:{t:tuple(i1:int, i2:int)})}";
BasicDBList innerBag = new BasicDBList();
innerBag.add(new BasicDBObject()
.append("i1", 1)
.append("i2", 2));
innerBag.add(new BasicDBObject()
.append("i1", 3)
.append("i2", 4));
BasicDBList bag = new BasicDBList();
bag.add(new BasicDBObject()
.append("t1", "t1_value")
.append("b", innerBag));
MongoLoader ml = new MongoLoader(userSchema);
DataBag result = (DataBag) BSONLoader.readField(bag, ml.getFields()[0]);
assertEquals(1, result.size());
Iterator<Tuple> bit = result.iterator();
Tuple t = bit.next();
assertEquals(2, t.size());
DataBag innerBagResult = (DataBag) t.get(1);
assertEquals(2, innerBagResult.size());
Iterator<Tuple> innerBit = innerBagResult.iterator();
Tuple innerT = innerBit.next();
assertEquals(2, innerT.get(1));
}
@Test
public void testSimpleMap() throws Exception {
//String userSchema = "m:[int]";
// Note: before pig 0.9, explicitly setting the type for
// map keys was not allowed, so can't test that here :(
String userSchema = "m:[]";
BasicDBObject obj = new BasicDBObject()
.append("k1", 1)
.append("k2", 2);
MongoLoader ml = new MongoLoader(userSchema);
Map m = (Map) BSONLoader.readField(obj, ml.getFields()[0]);
assertEquals(2, m.size());
assertEquals(1, m.get("k1"));
assertEquals(2, m.get("k2"));
}
@Test
public void testMapWithTuple() throws Exception {
//String userSchema = "m:[(t1:chararray, t2:int)]";
// Note: before pig 0.9, explicitly setting the type for
// map keys was not allowed, so can't test that here :(
String userSchema = "m:[]";
BasicDBObject v1 = new BasicDBObject()
.append("t1", "t11 value")
.append("t2", 12);
BasicDBObject v2 = new BasicDBObject()
.append("t1", "t21 value")
.append("t2", 22);
BasicDBObject obj = new BasicDBObject()
.append("v1", v1)
.append("v2", v2);
MongoLoader ml = new MongoLoader(userSchema);
Map m = (Map) BSONLoader.readField(obj, ml.getFields()[0]);
assertEquals(2, m.size());
/* We can't safely cast to Tuple here
* because pig < 0.9 doesn't allow setting types.
* Skip for now.
Tuple t1 = (Tuple) m.get("v1");
assertEquals("t11 value", t1.get(0));
assertEquals(12, t1.get(1));
Tuple t2 = (Tuple) m.get("v2");
assertEquals("t21 value", t2.get(0));
*/
}
@Test
public void testPushProjection() throws FrontendException {
String userSchema = "a:int, m:[]";
MongoLoader ml = new MongoLoader(userSchema);
ml.setUDFContextSignature("signature");
LoadPushDown.RequiredField aField =
new LoadPushDown.RequiredField("a", 0, null, DataType.INTEGER);
List<LoadPushDown.RequiredField> mSubFields =
Collections.singletonList(
new LoadPushDown.RequiredField(
"x", 0, null, DataType.INTEGER));
LoadPushDown.RequiredField mField =
new LoadPushDown.RequiredField("m", 1, mSubFields, DataType.MAP);
LoadPushDown.RequiredFieldList requiredFields =
new LoadPushDown.RequiredFieldList(Arrays.asList(aField, mField));
LoadPushDown.RequiredFieldResponse response =
ml.pushProjection(requiredFields);
assertTrue(response.getRequiredFieldResponse());
Properties props = UDFContext.getUDFContext().getUDFProperties(
MongoLoader.class, new String[]{"signature"});
assertEquals(
new BasicDBObjectBuilder()
.add("a", true).add("m.x", true).add("_id", false).get(),
JSON.parse(props.getProperty(MongoConfigUtil.INPUT_FIELDS)));
}
}