/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.physical;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import junit.framework.Assert;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsLongToLong;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.*;
import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.junit.Before;
import org.junit.Test;
public class TestVectorizer {
static VectorizationContext vContext = null;
@Before
public void setUp() {
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
columns.add("col3");
//Generate vectorized expression
vContext = new VectorizationContext("name", columns);
}
@Description(name = "fake", value = "FAKE")
static class FakeGenericUDF extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
return null;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
return null;
}
@Override
public String getDisplayString(String[] children) {
return "fake";
}
}
@Test
public void testAggregateOnUDF() throws HiveException {
ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(colExprA);
ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(exprNodeDesc);
List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
paramOIs.add(exprNodeDesc.getWritableObjectInspector());
AggregationDesc aggDesc = new AggregationDesc("sum",
FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false),
params,
false,
GenericUDAFEvaluator.Mode.PARTIAL1);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
GroupByDesc desc = new GroupByDesc();
desc.setVectorDesc(new VectorGroupByDesc());
desc.setOutputColumnNames(outputColumnNames);
ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
aggDescList.add(aggDesc);
desc.setAggregators(aggDescList);
ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
grpByKeys.add(colExprB);
desc.setKeys(grpByKeys);
Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
desc.setMode(GroupByDesc.Mode.HASH);
Vectorizer v = new Vectorizer();
v.testSetCurrentBaseWork(new MapWork());
Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false));
VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false, null);
Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass());
VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0];
Assert.assertEquals(FuncAbsLongToLong.class, udaf.getInputExpression().getClass());
}
@Test
public void testValidateNestedExpressions() {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
greaterExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
greaterExprDesc.setChildren(children1);
FakeGenericUDF udf2 = new FakeGenericUDF();
ExprNodeGenericFuncDesc nonSupportedExpr = new ExprNodeGenericFuncDesc();
nonSupportedExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
nonSupportedExpr.setGenericUDF(udf2);
GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
andExprDesc.setGenericUDF(andUdf);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
children3.add(greaterExprDesc);
children3.add(nonSupportedExpr);
andExprDesc.setChildren(children3);
Vectorizer v = new Vectorizer();
v.testSetCurrentBaseWork(new MapWork());
Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER));
Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION));
}
/**
* prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests.
*/
private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> map, MapJoinDesc mjdesc) {
mjdesc.setPosBigTable(0);
List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>();
expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
keyMap.put((byte)0, expr);
List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
keyMap.put((byte)1, smallTableExpr);
mjdesc.setKeys(keyMap);
mjdesc.setExprs(keyMap);
Byte[] order = new Byte[] {(byte) 0, (byte) 1};
mjdesc.setTagOrder(order);
//Set filter expression
GenericUDFOPEqual udf = new GenericUDFOPEqual();
ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc();
equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
equalExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false));
equalExprDesc.setChildren(children1);
List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>();
filterExpr.add(equalExprDesc);
Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
filterMap.put((byte) 0, expr);
mjdesc.setFilters(filterMap);
}
/**
* testValidateMapJoinOperator validates that the Map join operator can be vectorized.
*/
@Test
public void testValidateMapJoinOperator() {
MapJoinOperator map = new MapJoinOperator(new CompilationOpContext());
MapJoinDesc mjdesc = new MapJoinDesc();
prepareAbstractMapJoin(map, mjdesc);
map.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
vectorizer.testSetCurrentBaseWork(new MapWork());
Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
}
/**
* testValidateSMBJoinOperator validates that the SMB join operator can be vectorized.
*/
@Test
public void testValidateSMBJoinOperator() {
SMBMapJoinOperator map = new SMBMapJoinOperator(new CompilationOpContext());
SMBJoinDesc mjdesc = new SMBJoinDesc();
prepareAbstractMapJoin(map, mjdesc);
map.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
vectorizer.testSetCurrentBaseWork(new MapWork());
Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
}
@Test
public void testExprNodeDynamicValue() {
ExprNodeDesc exprNode = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo));
Vectorizer v = new Vectorizer();
Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.FILTER));
Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.PROJECTION));
}
@Test
public void testExprNodeBetweenWithDynamicValue() {
ExprNodeDesc notBetween = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE);
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
ExprNodeDesc minExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo));
ExprNodeDesc maxExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id2", TypeInfoFactory.stringTypeInfo));
ExprNodeGenericFuncDesc betweenExpr = new ExprNodeGenericFuncDesc();
GenericUDF betweenUdf = new GenericUDFBetween();
betweenExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
betweenExpr.setGenericUDF(betweenUdf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(notBetween);
children1.add(colExpr);
children1.add(minExpr);
children1.add(maxExpr);
betweenExpr.setChildren(children1);
Vectorizer v = new Vectorizer();
v.testSetCurrentBaseWork(new MapWork());
boolean valid = v.validateExprNodeDesc(betweenExpr, "Test", Mode.FILTER);
Assert.assertTrue(valid);
}
}