package com.mongodb.hadoop.hive.input;
import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.DBObject;
import com.mongodb.hadoop.hive.BSONSerDe;
import com.mongodb.hadoop.hive.HiveTest;
import com.mongodb.util.JSON;
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.JobConf;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
public class HiveMongoInputFormatTest extends HiveTest {
private static HiveMongoInputFormat inputFormat;
private static IndexPredicateAnalyzer analyzer;
private static Map<String, String> colNameMapping;
@BeforeClass
public static void setUpClass() {
inputFormat = new HiveMongoInputFormat();
colNameMapping = new HashMap<String, String>() {{
put("i", "mongo_i");
put("j", "mongo_j");
put("id", "_id");
}};
analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
for (String colName : colNameMapping.keySet()) {
analyzer.allowColumnName(colName);
}
}
private DBObject filterForExpr(
final ExprNodeGenericFuncDesc expr) {
List<IndexSearchCondition> conditions =
new LinkedList<IndexSearchCondition>();
analyzer.analyzePredicate(expr, conditions);
return inputFormat.getFilter(conditions, colNameMapping);
}
@Test
public void testTranslateEqualsOp() {
// WHERE i = 20
GenericUDFOPEqual equal = new GenericUDFOPEqual();
ExprNodeDesc[] children = {
new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")),
new ExprNodeConstantDesc(20)
};
ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(
TypeInfoFactory.booleanTypeInfo,
equal,
Arrays.asList(children));
Assert.assertEquals(
new BasicDBObject("mongo_i", 20),
filterForExpr(expr));
}
@Test
public void testTranslateCompareOp() {
// WHERE i >= 20
GenericUDFOPEqualOrGreaterThan gte =
new GenericUDFOPEqualOrGreaterThan();
ExprNodeDesc[] children = {
new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")),
new ExprNodeConstantDesc(20)
};
ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(
TypeInfoFactory.booleanTypeInfo,
gte,
Arrays.asList(children));
Assert.assertEquals(
new BasicDBObject("mongo_i", new BasicDBObject("$gte", 20)),
filterForExpr(expr));
}
@Test
public void testTranslateConjoinedQuery() {
// i < 50
GenericUDFOPLessThan lt = new GenericUDFOPLessThan();
ExprNodeDesc[] iLt50Children = {
new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")),
new ExprNodeConstantDesc(50)
};
ExprNodeGenericFuncDesc iLt50 = new ExprNodeGenericFuncDesc(
TypeInfoFactory.booleanTypeInfo,
lt,
Arrays.asList(iLt50Children));
// j > 20
GenericUDFOPGreaterThan gt = new GenericUDFOPGreaterThan();
ExprNodeDesc[] jGt20Children = {
new ExprNodeColumnDesc(new SimpleMockColumnInfo("j")),
new ExprNodeConstantDesc(20)
};
ExprNodeGenericFuncDesc jGt20 = new ExprNodeGenericFuncDesc(
TypeInfoFactory.booleanTypeInfo,
gt,
Arrays.asList(jGt20Children));
// i < 50 AND j > 20
ExprNodeDesc[] andExprChildren = {iLt50, jGt20};
ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(
TypeInfoFactory.booleanTypeInfo,
new GenericUDFOPAnd(),
Arrays.asList(andExprChildren));
assertEquals(
// {"$and": [{"i": {"$lt": 50}}, {"j": {"$gt": 20}}]}
new BasicDBObjectBuilder()
.push("mongo_i").add("$lt", 50).pop()
.push("mongo_j").add("$gt", 20).pop().get(),
filterForExpr(expr));
}
@Test
public void testProjection() {
String selectedColumns = "i,j";
JobConf conf = new JobConf();
conf.set(
ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, selectedColumns);
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
assertEquals(
new BasicDBObjectBuilder()
.add("i", 1).add("j", 1).add("_id", 0).get(),
inputFormat.getProjection(conf, null));
}
@Test
public void testProjectionWithColumnMapping() {
DBObject mapping = new BasicDBObjectBuilder()
.add("i", "mongo_i").add("j", "mongo_j").add("id", "_id").get();
String selectedColumns = "id,i";
JobConf conf = new JobConf();
conf.set(
ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, selectedColumns);
conf.set(BSONSerDe.MONGO_COLS, JSON.serialize(mapping));
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
// _id field is implicitly mapped to id field in Hive.
assertEquals(
new BasicDBObjectBuilder().add("mongo_i", 1).add("_id", 1).get(),
inputFormat.getProjection(conf, colNameMapping));
}
}