/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.calcite.avatica.util.DateTimeUtils;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlCollation;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.ConversionUtil;
import org.apache.calcite.util.NlsString;
import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
public class RexNodeConverter {
private static class InputCtx {
private final RelDataType calciteInpDataType;
private final ImmutableMap<String, Integer> hiveNameToPosMap;
private final RowResolver hiveRR;
private final int offsetInCalciteSchema;
private InputCtx(RelDataType calciteInpDataType, ImmutableMap<String, Integer> hiveNameToPosMap,
RowResolver hiveRR, int offsetInCalciteSchema) {
this.calciteInpDataType = calciteInpDataType;
this.hiveNameToPosMap = hiveNameToPosMap;
this.hiveRR = hiveRR;
this.offsetInCalciteSchema = offsetInCalciteSchema;
}
};
private final RelOptCluster cluster;
private final ImmutableList<InputCtx> inputCtxs;
private final boolean flattenExpr;
//outerRR belongs to outer query and is required to resolve correlated references
private final RowResolver outerRR;
private final ImmutableMap<String, Integer> outerNameToPosMap;
private int correlatedId;
//Constructor used by HiveRexExecutorImpl
public RexNodeConverter(RelOptCluster cluster) {
this(cluster, new ArrayList<InputCtx>(), false);
}
//subqueries will need outer query's row resolver
public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
ImmutableMap<String, Integer> outerNameToPosMap,
ImmutableMap<String, Integer> nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) {
this.cluster = cluster;
this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset));
this.flattenExpr = flattenExpr;
this.outerRR = outerRR;
this.outerNameToPosMap = outerNameToPosMap;
this.correlatedId = correlatedId;
}
public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
ImmutableMap<String, Integer> nameToPosMap, int offset, boolean flattenExpr) {
this.cluster = cluster;
this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset));
this.flattenExpr = flattenExpr;
this.outerRR = null;
this.outerNameToPosMap = null;
}
public RexNodeConverter(RelOptCluster cluster, List<InputCtx> inpCtxLst, boolean flattenExpr) {
this.cluster = cluster;
this.inputCtxs = ImmutableList.<InputCtx> builder().addAll(inpCtxLst).build();
this.flattenExpr = flattenExpr;
this.outerRR = null;
this.outerNameToPosMap = null;
}
public RexNode convert(ExprNodeDesc expr) throws SemanticException {
if (expr instanceof ExprNodeGenericFuncDesc) {
return convert((ExprNodeGenericFuncDesc) expr);
} else if (expr instanceof ExprNodeConstantDesc) {
return convert((ExprNodeConstantDesc) expr);
} else if (expr instanceof ExprNodeColumnDesc) {
return convert((ExprNodeColumnDesc) expr);
} else if (expr instanceof ExprNodeFieldDesc) {
return convert((ExprNodeFieldDesc) expr);
} else if(expr instanceof ExprNodeSubQueryDesc) {
return convert((ExprNodeSubQueryDesc) expr);
} else {
throw new RuntimeException("Unsupported Expression");
}
// TODO: handle ExprNodeColumnListDesc
}
private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException {
if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN ) {
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
"SubQuery can contain only 1 item in Select List."));
}
//create RexNode for LHS
RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs());
//create RexSubQuery node
RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(),
ImmutableList.<RexNode>of(rexNodeLhs) );
return rexSubQuery;
}
else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) {
RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery());
return subQueryNode;
}
else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SCALAR){
if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
"SubQuery can contain only 1 item in Select List."));
}
//create RexSubQuery node
RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery());
return rexSubQuery;
}
else {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
"Invalid subquery: " + subQueryDesc.getType()));
}
}
private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException {
RexNode rexNode = convert(fieldDesc.getDesc());
if (rexNode.getType().isStruct()) {
// regular case of accessing nested field in a column
return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true);
} else {
// This may happen for schema-less tables, where columns are dynamically
// supplied by serdes.
throw new CalciteSemanticException("Unexpected rexnode : "
+ rexNode.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table);
}
}
private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType> builder();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary
&& func.getTypeInfo().getCategory() == Category.PRIMITIVE
&& (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp &&
func.getChildren().size() != 0;
boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween;
boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0)
.getTypeInfo(), func.getChildren().get(1).getTypeInfo());
} else if (isWhenCase) {
// If it is a CASE or WHEN, we need to check that children do not contain stateful functions
// as they are not allowed
if (checkForStatefulFunctions(func.getChildren())) {
throw new SemanticException("Stateful expressions cannot be used inside of CASE");
}
} else if (isTransformableTimeStamp) {
// unix_timestamp(args) -> to_unix_timestamp(args)
func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
} else if (isBetween) {
assert func.getChildren().size() == 4;
// We skip first child as is not involved (is the revert boolean)
// The target type needs to account for all 3 operands
tgtDT = FunctionRegistry.getCommonClassForComparison(
func.getChildren().get(1).getTypeInfo(),
FunctionRegistry.getCommonClassForComparison(
func.getChildren().get(2).getTypeInfo(),
func.getChildren().get(3).getTypeInfo()));
} else if (isIN) {
// We're only considering the first element of the IN list for the type
assert func.getChildren().size() > 1;
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0)
.getTypeInfo(), func.getChildren().get(1).getTypeInfo());
}
for (int i =0; i < func.getChildren().size(); ++i) {
ExprNodeDesc childExpr = func.getChildren().get(i);
tmpExprNode = childExpr;
if (tgtDT != null
&& TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare || isBetween || isIN) {
// For compare, we will convert requisite children
// For BETWEEN skip the first child (the revert boolean)
if (!isBetween || i > 0) {
tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
}
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
}
}
argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
tmpRN = convert(tmpExprNode);
childRexNodeLst.add(tmpRN);
}
// See if this is an explicit cast.
RexNode expr = null;
RelDataType retType = null;
expr = handleExplicitCast(func, childRexNodeLst);
if (expr == null) {
// This is not a cast; process the function.
retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(),
func.getGenericUDF(), argTypeBldr.build(), retType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst);
} else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a extract operator, we need to rewrite it
childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst);
} else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a floor <date> operator, we need to rewrite it
childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst);
}
expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
} else {
retType = expr.getType();
}
// TODO: Cast Function in Calcite have a bug where it infer type on cast throws
// an exception
if (flattenExpr && (expr instanceof RexCall)
&& !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(),
RexUtil.flatten(call.getOperands(), call.getOperator()));
}
return expr;
}
private boolean castExprUsingUDFBridge(GenericUDF gUDF) {
boolean castExpr = false;
if (gUDF != null && gUDF instanceof GenericUDFBridge) {
String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName();
if (udfClassName != null) {
int sp = udfClassName.lastIndexOf('.');
// TODO: add method to UDFBridge to say if it is a cast func
if (sp >= 0 & (sp + 1) < udfClassName.length()) {
udfClassName = udfClassName.substring(sp + 1);
if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte")
|| udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger")
|| udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort")
|| udfClassName.equals("UDFToFloat") || udfClassName.equals("UDFToString"))
castExpr = true;
}
}
}
return castExpr;
}
private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst)
throws CalciteSemanticException {
RexNode castExpr = null;
if (childRexNodeLst != null && childRexNodeLst.size() == 1) {
GenericUDF udf = func.getGenericUDF();
if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar)
|| (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate)
// Calcite can not specify the scale for timestamp. As a result, all
// the millisecond part will be lost
|| (udf instanceof GenericUDFTimestamp)
|| (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) {
castExpr = cluster.getRexBuilder().makeAbstractCast(
TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()),
childRexNodeLst.get(0));
}
}
return castExpr;
}
/*
* Hive syntax allows to define CASE expressions in two ways:
* - CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END (translated into the
* "case" function, ELSE clause is optional)
* - CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END (translated into the
* "when" function, ELSE clause is optional)
* However, Calcite only has the equivalent to the "when" Hive function. Thus,
* we need to transform the "case" function into "when". Further, ELSE clause is
* not optional in Calcite.
*
* Example. Consider the following statement:
* CASE x + y WHEN 1 THEN 'fee' WHEN 2 THEN 'fie' END
* It will be transformed into:
* CASE WHEN =(x + y, 1) THEN 'fee' WHEN =(x + y, 2) THEN 'fie' ELSE null END
*/
private List<RexNode> rewriteCaseChildren(ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst)
throws SemanticException {
List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) {
RexNode firstPred = childRexNodeLst.get(0);
int length = childRexNodeLst.size() % 2 == 1 ?
childRexNodeLst.size() : childRexNodeLst.size() - 1;
for (int i = 1; i < length; i++) {
if (i % 2 == 1) {
// We rewrite it
newChildRexNodeLst.add(
cluster.getRexBuilder().makeCall(
SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i)));
} else {
newChildRexNodeLst.add(childRexNodeLst.get(i));
}
}
// The else clause
if (length != childRexNodeLst.size()) {
newChildRexNodeLst.add(childRexNodeLst.get(childRexNodeLst.size()-1));
}
} else {
newChildRexNodeLst.addAll(childRexNodeLst);
}
// Calcite always needs the else clause to be defined explicitly
if (newChildRexNodeLst.size() % 2 == 0) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral(
newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType().getSqlTypeName()));
}
return newChildRexNodeLst;
}
private List<RexNode> rewriteExtractDateChildren(SqlOperator op, List<RexNode> childRexNodeLst)
throws SemanticException {
List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
if (op == HiveExtractDate.YEAR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR));
} else if (op == HiveExtractDate.QUARTER) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER));
} else if (op == HiveExtractDate.MONTH) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH));
} else if (op == HiveExtractDate.WEEK) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK));
} else if (op == HiveExtractDate.DAY) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY));
} else if (op == HiveExtractDate.HOUR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR));
} else if (op == HiveExtractDate.MINUTE) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE));
} else if (op == HiveExtractDate.SECOND) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND));
}
assert childRexNodeLst.size() == 1;
newChildRexNodeLst.add(childRexNodeLst.get(0));
return newChildRexNodeLst;
}
private List<RexNode> rewriteFloorDateChildren(SqlOperator op, List<RexNode> childRexNodeLst)
throws SemanticException {
List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
assert childRexNodeLst.size() == 1;
newChildRexNodeLst.add(childRexNodeLst.get(0));
if (op == HiveFloorDate.YEAR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR));
} else if (op == HiveFloorDate.QUARTER) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER));
} else if (op == HiveFloorDate.MONTH) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH));
} else if (op == HiveFloorDate.WEEK) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK));
} else if (op == HiveFloorDate.DAY) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY));
} else if (op == HiveFloorDate.HOUR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR));
} else if (op == HiveFloorDate.MINUTE) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE));
} else if (op == HiveFloorDate.SECOND) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND));
}
return newChildRexNodeLst;
}
private static boolean checkForStatefulFunctions(List<ExprNodeDesc> list) {
for (ExprNodeDesc node : list) {
if (node instanceof ExprNodeGenericFuncDesc) {
GenericUDF nodeUDF = ((ExprNodeGenericFuncDesc) node).getGenericUDF();
// Stateful?
if (FunctionRegistry.isStateful(nodeUDF)) {
return true;
}
if (node.getChildren() != null && !node.getChildren().isEmpty() &&
checkForStatefulFunctions(node.getChildren())) {
return true;
}
}
}
return false;
}
private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException {
InputCtx ctxLookingFor = null;
if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) {
ctxLookingFor = inputCtxs.get(0);
} else {
String tableAlias = col.getTabAlias();
String colAlias = col.getColumn();
int noInp = 0;
for (InputCtx ic : inputCtxs) {
if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) {
if (ic.hiveRR.getPosition(colAlias) >= 0) {
ctxLookingFor = ic;
noInp++;
}
}
}
if (noInp > 1)
throw new RuntimeException("Ambiguous column mapping");
}
return ctxLookingFor;
}
protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException {
//if this is co-rrelated we need to make RexCorrelVariable(with id and type)
// id and type should be retrieved from outerRR
InputCtx ic = getInputCtx(col);
if(ic == null) {
// we have correlated column, build data type from outer rr
RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null);
if (this.outerNameToPosMap.get(col.getColumn()) == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn()));
}
int pos = this.outerNameToPosMap.get(col.getColumn());
CorrelationId colCorr = new CorrelationId(this.correlatedId);
RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr);
return cluster.getRexBuilder().makeFieldAccess(corExpr, pos);
}
int pos = ic.hiveNameToPosMap.get(col.getColumn());
return cluster.getRexBuilder().makeInputRef(
ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema);
}
private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE),
MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE);
private static NlsString asUnicodeString(String text) {
return new NlsString(text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT);
}
protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
ConstantObjectInspector coi = literal.getWritableObjectInspector();
Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(),
coi);
RexNode calciteLiteral = null;
// If value is null, the type should also be VOID.
if (value == null) {
hiveTypeCategory = PrimitiveCategory.VOID;
}
// TODO: Verify if we need to use ConstantObjectInspector to unwrap data
switch (hiveTypeCategory) {
case BOOLEAN:
calciteLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue());
break;
case BYTE:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
break;
case SHORT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
break;
case INT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
break;
case LONG:
calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
break;
// TODO: is Decimal an exact numeric or approximate numeric?
case DECIMAL:
if (value instanceof HiveDecimal) {
value = ((HiveDecimal) value).bigDecimalValue();
} else if (value instanceof Decimal128) {
value = ((Decimal128) value).toBigDecimal();
}
if (value == null) {
// We have found an invalid decimal value while enforcing precision and
// scale. Ideally,
// we would replace it with null here, which is what Hive does. However,
// we need to plumb
// this thru up somehow, because otherwise having different expression
// type in AST causes
// the plan generation to fail after CBO, probably due to some residual
// state in SA/QB.
// For now, we will not run CBO in the presence of invalid decimal
// literals.
throw new CalciteSemanticException("Expression " + literal.getExprString()
+ " is not a valid decimal", UnsupportedFeature.Invalid_decimal);
// TODO: return createNullLiteral(literal);
}
BigDecimal bd = (BigDecimal) value;
BigInteger unscaled = bd.unscaledValue();
if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
calciteLiteral = rexBuilder.makeExactLiteral(bd);
} else {
// CBO doesn't support unlimited precision decimals. In practice, this
// will work...
// An alternative would be to throw CboSemanticException and fall back
// to no CBO.
RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL,
unscaled.toString().length(), bd.scale());
calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
}
break;
case FLOAT:
calciteLiteral = rexBuilder.makeApproxLiteral(
new BigDecimal(Float.toString((Float)value)), calciteDataType);
break;
case DOUBLE:
// TODO: The best solution is to support NaN in expression reduction.
if (Double.isNaN((Double) value)) {
throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal);
}
calciteLiteral = rexBuilder.makeApproxLiteral(
new BigDecimal(Double.toString((Double)value)), calciteDataType);
break;
case CHAR:
if (value instanceof HiveChar) {
value = ((HiveChar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case VARCHAR:
if (value instanceof HiveVarchar) {
value = ((HiveVarchar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case STRING:
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case DATE:
// The Calcite literal is in GMT, this will be converted back to JVM locale
// by ASTBuilder.literal during Calcite->Hive plan conversion
final Calendar cal = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault());
cal.setTime((Date) value);
calciteLiteral = rexBuilder.makeDateLiteral(cal);
break;
case TIMESTAMP:
// The Calcite literal is in GMT, this will be converted back to JVM locale
// by ASTBuilder.literal during Calcite->Hive plan conversion
final Calendar calt = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault());
if (value instanceof Calendar) {
final Calendar c = (Calendar) value;
long timeMs = c.getTimeInMillis();
calt.setTimeInMillis(timeMs);
} else {
final Timestamp ts = (Timestamp) value;
// CALCITE-1690
// Calcite cannot represent TIMESTAMP literals with precision higher than 3
if (ts.getNanos() % 1000000 != 0) {
throw new CalciteSemanticException(
"High Precision Timestamp: " + String.valueOf(ts),
UnsupportedFeature.HighPrecissionTimestamp);
}
calt.setTimeInMillis(ts.getTime());
}
// Must call makeLiteral, not makeTimestampLiteral
// to have the RexBuilder.roundTime logic kick in
calciteLiteral = rexBuilder.makeLiteral(
calt,
rexBuilder.getTypeFactory().createSqlType(
SqlTypeName.TIMESTAMP,
rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)),
false);
break;
case INTERVAL_YEAR_MONTH:
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths());
calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths,
new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1,1)));
break;
case INTERVAL_DAY_TIME:
// Calcite day-time interval is millis value as BigDecimal
// Seconds converted to millis
BigDecimal secsValueBd = BigDecimal
.valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000);
// Nanos converted to millis
BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime)
value).getNanos(), 6);
calciteLiteral =
rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd),
new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new
SqlParserPos(1, 1)));
break;
case VOID:
calciteLiteral = cluster.getRexBuilder().makeLiteral(null,
cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true);
break;
case BINARY:
case UNKNOWN:
default:
throw new RuntimeException("UnSupported Literal");
}
return calciteLiteral;
}
public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode,
List<RelNode> inputRels, LinkedHashMap<RelNode, RowResolver> relToHiveRR,
Map<RelNode, ImmutableMap<String, Integer>> relToHiveColNameCalcitePosMap, boolean flattenExpr)
throws SemanticException {
List<InputCtx> inputCtxLst = new ArrayList<InputCtx>();
int offSet = 0;
for (RelNode r : inputRels) {
inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameCalcitePosMap.get(r), relToHiveRR
.get(r), offSet));
offSet += r.getRowType().getFieldCount();
}
return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode);
}
}