/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.calcite.rel.RelNode;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.NullWritable;
import org.apache.hive.common.util.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
/**
* The Factory for creating typecheck processors. The typecheck processors are
* used to processes the syntax trees for expressions and convert them into
* expression Node Descriptor trees. They also introduce the correct conversion
* functions to do proper implicit conversion.
*/
public class TypeCheckProcFactory {
protected static final Logger LOG = LoggerFactory.getLogger(TypeCheckProcFactory.class
.getName());
protected TypeCheckProcFactory() {
// prevent instantiation
}
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd
* The node that is being inspected.
* @param procCtx
* The processor context.
*
* @return exprNodeColumnDesc.
*/
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx)
throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// bypass only if outerRR is not null. Otherwise we need to look for expressions in outerRR for
// subqueries e.g. select min(b.value) from table b group by b.key
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if(colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
}
if (colInfo != null) {
desc = new ExprNodeColumnDesc(colInfo);
ASTNode source = input.getExpressionSource(expr);
if (source != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
public static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr, TypeCheckCtx tcCtx)
throws SemanticException {
return genExprNode(expr, tcCtx, new TypeCheckProcFactory());
}
protected static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr,
TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"),
tf.getNullExprProcessor());
opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" +
HiveParser.IntegralLiteral + "%|" +
HiveParser.NumberLiteral + "%"),
tf.getNumExprProcessor());
opRules
.put(new RuleRegExp("R3", HiveParser.Identifier + "%|"
+ HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|"
+ HiveParser.TOK_STRINGLITERALSEQUENCE + "%|"
+ "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|"
+ HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|"
+ HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|"
+ HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|"
+ HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"),
tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|"
+ HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%|"
+ HiveParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
opRules.put(new RuleRegExp("R6", HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_YEAR_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_MONTH_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_DAY_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_HOUR_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_MINUTE_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"),
tf.getColumnExprProcessor());
opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"),
tf.getSubQueryExprProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
opRules, tcCtx);
GraphWalker ogw = new ExpressionWalker(disp);
// Create a list of top nodes
ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
HashMap<Node, Object> nodeOutputs = new LinkedHashMap<Node, Object>();
ogw.startWalking(topNodes, nodeOutputs);
return convert(nodeOutputs);
}
// temporary type-safe casting
private static Map<ASTNode, ExprNodeDesc> convert(Map<Node, Object> outputs) {
Map<ASTNode, ExprNodeDesc> converted = new LinkedHashMap<ASTNode, ExprNodeDesc>();
for (Map.Entry<Node, Object> entry : outputs.entrySet()) {
if (entry.getKey() instanceof ASTNode &&
(entry.getValue() == null || entry.getValue() instanceof ExprNodeDesc)) {
converted.put((ASTNode)entry.getKey(), (ExprNodeDesc)entry.getValue());
} else {
LOG.warn("Invalid type entry " + entry);
}
}
return converted;
}
/**
* Processor for processing NULL expression.
*/
public static class NullExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
return new ExprNodeConstantDesc(TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), null);
}
}
/**
* Factory method to get NullExprProcessor.
*
* @return NullExprProcessor.
*/
public NullExprProcessor getNullExprProcessor() {
return new NullExprProcessor();
}
/**
* Processor for processing numeric constants.
*/
public static class NumExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
Number v = null;
ExprNodeConstantDesc decimalNode = null;
ASTNode expr = (ASTNode) nd;
// The expression can be any one of Double, Long and Integer. We
// try to parse the expression in that order to ensure that the
// most specific type is used for conversion.
try {
if (expr.getText().endsWith("L")) {
// Literal bigint.
v = Long.valueOf(expr.getText().substring(0, expr.getText().length() - 1));
} else if (expr.getText().endsWith("S")) {
// Literal smallint.
v = Short.valueOf(expr.getText().substring(0, expr.getText().length() - 1));
} else if (expr.getText().endsWith("Y")) {
// Literal tinyint.
v = Byte.valueOf(expr.getText().substring(0, expr.getText().length() - 1));
} else if (expr.getText().endsWith("BD")) {
// Literal decimal
String strVal = expr.getText().substring(0, expr.getText().length() - 2);
return createDecimal(strVal, false);
} else if (expr.getText().endsWith("D")) {
// Literal double.
v = Double.valueOf(expr.getText().substring(0, expr.getText().length() - 1));
} else {
v = Double.valueOf(expr.getText());
if (expr.getText() != null && !expr.getText().toLowerCase().contains("e")) {
decimalNode = createDecimal(expr.getText(), true);
if (decimalNode != null) {
v = null; // We will use decimal if all else fails.
}
}
v = Long.valueOf(expr.getText());
v = Integer.valueOf(expr.getText());
}
} catch (NumberFormatException e) {
// do nothing here, we will throw an exception in the following block
}
if (v == null && decimalNode == null) {
throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT.getMsg(expr));
}
return v != null ? new ExprNodeConstantDesc(v) : decimalNode;
}
public static ExprNodeConstantDesc createDecimal(String strVal, boolean notNull) {
// Note: the normalize() call with rounding in HiveDecimal will currently reduce the
// precision and scale of the value by throwing away trailing zeroes. This may or may
// not be desirable for the literals; however, this used to be the default behavior
// for explicit decimal literals (e.g. 1.0BD), so we keep this behavior for now.
HiveDecimal hd = HiveDecimal.create(strVal);
if (notNull && hd == null) return null;
int prec = 1;
int scale = 0;
if (hd != null) {
prec = hd.precision();
scale = hd.scale();
}
DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale);
return new ExprNodeConstantDesc(typeInfo, hd);
}
}
/**
* Factory method to get NumExprProcessor.
*
* @return NumExprProcessor.
*/
public NumExprProcessor getNumExprProcessor() {
return new NumExprProcessor();
}
/**
* Processor for processing string constants.
*/
public static class StrExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
ASTNode expr = (ASTNode) nd;
String str = null;
switch (expr.getToken().getType()) {
case HiveParser.StringLiteral:
str = BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
break;
case HiveParser.TOK_STRINGLITERALSEQUENCE:
StringBuilder sb = new StringBuilder();
for (Node n : expr.getChildren()) {
sb.append(
BaseSemanticAnalyzer.unescapeSQLString(((ASTNode)n).getText()));
}
str = sb.toString();
break;
case HiveParser.TOK_CHARSETLITERAL:
str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(),
expr.getChild(1).getText());
break;
default:
// HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT |
// HiveParse.KW_RIGHT
str = BaseSemanticAnalyzer.unescapeIdentifier(expr.getText().toLowerCase());
break;
}
return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str);
}
}
/**
* Factory method to get StrExprProcessor.
*
* @return StrExprProcessor.
*/
public StrExprProcessor getStrExprProcessor() {
return new StrExprProcessor();
}
/**
* Processor for boolean constants.
*/
public static class BoolExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
ASTNode expr = (ASTNode) nd;
Boolean bool = null;
switch (expr.getToken().getType()) {
case HiveParser.KW_TRUE:
bool = Boolean.TRUE;
break;
case HiveParser.KW_FALSE:
bool = Boolean.FALSE;
break;
default:
assert false;
}
return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool);
}
}
/**
* Factory method to get BoolExprProcessor.
*
* @return BoolExprProcessor.
*/
public BoolExprProcessor getBoolExprProcessor() {
return new BoolExprProcessor();
}
/**
* Processor for date constants.
*/
public static class DateTimeExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
ASTNode expr = (ASTNode) nd;
String timeString = BaseSemanticAnalyzer.stripQuotes(expr.getText());
// Get the string value and convert to a Date value.
try {
// todo replace below with joda-time, which supports timezone
if (expr.getType() == HiveParser.TOK_DATELITERAL) {
PrimitiveTypeInfo typeInfo = TypeInfoFactory.dateTypeInfo;
return new ExprNodeConstantDesc(typeInfo,
Date.valueOf(timeString));
}
if (expr.getType() == HiveParser.TOK_TIMESTAMPLITERAL) {
return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo,
Timestamp.valueOf(timeString));
}
throw new IllegalArgumentException("Invalid time literal type " + expr.getType());
} catch (Exception err) {
throw new SemanticException(
"Unable to convert time literal '" + timeString + "' to time value.", err);
}
}
}
/**
* Processor for interval constants.
*/
public static class IntervalExprProcessor implements NodeProcessor {
private static final BigDecimal NANOS_PER_SEC_BD = new BigDecimal(DateUtils.NANOS_PER_SEC);
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
ASTNode expr = (ASTNode) nd;
String intervalString = BaseSemanticAnalyzer.stripQuotes(expr.getText());
// Get the string value and convert to a Interval value.
try {
switch (expr.getType()) {
case HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo,
HiveIntervalYearMonth.valueOf(intervalString));
case HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo,
HiveIntervalDayTime.valueOf(intervalString));
case HiveParser.TOK_INTERVAL_YEAR_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo,
new HiveIntervalYearMonth(Integer.parseInt(intervalString), 0));
case HiveParser.TOK_INTERVAL_MONTH_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo,
new HiveIntervalYearMonth(0, Integer.parseInt(intervalString)));
case HiveParser.TOK_INTERVAL_DAY_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo,
new HiveIntervalDayTime(Integer.parseInt(intervalString), 0, 0, 0, 0));
case HiveParser.TOK_INTERVAL_HOUR_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo,
new HiveIntervalDayTime(0, Integer.parseInt(intervalString), 0, 0, 0));
case HiveParser.TOK_INTERVAL_MINUTE_LITERAL:
return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo,
new HiveIntervalDayTime(0, 0, Integer.parseInt(intervalString), 0, 0));
case HiveParser.TOK_INTERVAL_SECOND_LITERAL:
BigDecimal bd = new BigDecimal(intervalString);
BigDecimal bdSeconds = new BigDecimal(bd.toBigInteger());
BigDecimal bdNanos = bd.subtract(bdSeconds);
return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo,
new HiveIntervalDayTime(0, 0, 0, bdSeconds.intValueExact(),
bdNanos.multiply(NANOS_PER_SEC_BD).intValue()));
default:
throw new IllegalArgumentException("Invalid time literal type " + expr.getType());
}
} catch (Exception err) {
throw new SemanticException(
"Unable to convert interval literal '" + intervalString + "' to interval value.", err);
}
}
}
/**
* Factory method to get IntervalExprProcessor.
*
* @return IntervalExprProcessor.
*/
public IntervalExprProcessor getIntervalExprProcessor() {
return new IntervalExprProcessor();
}
/**
* Factory method to get DateExprProcessor.
*
* @return DateExprProcessor.
*/
public DateTimeExprProcessor getDateTimeExprProcessor() {
return new DateTimeExprProcessor();
}
/**
* Processor for table columns.
*/
public static class ColumnExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
ASTNode expr = (ASTNode) nd;
ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null;
RowResolver input = ctx.getInputRR();
if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) {
ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr);
return null;
}
assert (expr.getChildCount() == 1);
String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr
.getChild(0).getText());
boolean isTableAlias = input.hasTableAlias(tableOrCol);
ColumnInfo colInfo = input.get(null, tableOrCol);
// try outer row resolver
if(ctx.getOuterRR() != null && colInfo == null && !isTableAlias) {
RowResolver outerRR = ctx.getOuterRR();
isTableAlias = outerRR.hasTableAlias(tableOrCol);
colInfo = outerRR.get(null, tableOrCol);
}
if (isTableAlias) {
if (colInfo != null) {
if (parent != null && parent.getType() == HiveParser.DOT) {
// It's a table alias.
return null;
}
// It's a column.
return toExprNodeDesc(colInfo);
} else {
// It's a table alias.
// We will process that later in DOT.
return null;
}
} else {
if (colInfo == null) {
// It's not a column or a table alias.
if (input.getIsExprResolver()) {
ASTNode exprNode = expr;
if (!stack.empty()) {
ASTNode tmp = (ASTNode) stack.pop();
if (!stack.empty()) {
exprNode = (ASTNode) stack.peek();
}
stack.push(tmp);
}
ctx.setError(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(exprNode), expr);
return null;
} else {
List<String> possibleColumnNames = input.getReferenceableColumnAliases(tableOrCol, -1);
String reason = String.format("(possible column names are: %s)",
StringUtils.join(possibleColumnNames, ", "));
ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr.getChild(0), reason),
expr);
LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":"
+ input.toString());
return null;
}
} else {
// It's a column.
return toExprNodeDesc(colInfo);
}
}
}
}
private static ExprNodeDesc toExprNodeDesc(ColumnInfo colInfo) {
ObjectInspector inspector = colInfo.getObjectInspector();
if (inspector instanceof ConstantObjectInspector &&
inspector instanceof PrimitiveObjectInspector) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue();
return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant));
}
// non-constant or non-primitive constants
ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo);
column.setSkewedCol(colInfo.isSkewedCol());
return column;
}
/**
* Factory method to get ColumnExprProcessor.
*
* @return ColumnExprProcessor.
*/
public ColumnExprProcessor getColumnExprProcessor() {
return new ColumnExprProcessor();
}
/**
* The default processor for typechecking.
*/
public static class DefaultExprProcessor implements NodeProcessor {
static HashMap<Integer, String> specialUnaryOperatorTextHashMap;
static HashMap<Integer, String> conversionFunctionTextHashMap;
static HashSet<Integer> windowingTokens;
static {
specialUnaryOperatorTextHashMap = new HashMap<Integer, String>();
specialUnaryOperatorTextHashMap.put(HiveParser.PLUS, "positive");
specialUnaryOperatorTextHashMap.put(HiveParser.MINUS, "negative");
conversionFunctionTextHashMap = new HashMap<Integer, String>();
conversionFunctionTextHashMap.put(HiveParser.TOK_BOOLEAN,
serdeConstants.BOOLEAN_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_TINYINT,
serdeConstants.TINYINT_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_SMALLINT,
serdeConstants.SMALLINT_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_INT,
serdeConstants.INT_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_BIGINT,
serdeConstants.BIGINT_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_FLOAT,
serdeConstants.FLOAT_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_DOUBLE,
serdeConstants.DOUBLE_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_STRING,
serdeConstants.STRING_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_CHAR,
serdeConstants.CHAR_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR,
serdeConstants.VARCHAR_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY,
serdeConstants.BINARY_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_DATE,
serdeConstants.DATE_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMP,
serdeConstants.TIMESTAMP_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMPTZ,
serdeConstants.TIMESTAMPTZ_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_INTERVAL_YEAR_MONTH,
serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_INTERVAL_DAY_TIME,
serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_DECIMAL,
serdeConstants.DECIMAL_TYPE_NAME);
windowingTokens = new HashSet<Integer>();
windowingTokens.add(HiveParser.KW_OVER);
windowingTokens.add(HiveParser.TOK_PARTITIONINGSPEC);
windowingTokens.add(HiveParser.TOK_DISTRIBUTEBY);
windowingTokens.add(HiveParser.TOK_SORTBY);
windowingTokens.add(HiveParser.TOK_CLUSTERBY);
windowingTokens.add(HiveParser.TOK_WINDOWSPEC);
windowingTokens.add(HiveParser.TOK_WINDOWRANGE);
windowingTokens.add(HiveParser.TOK_WINDOWVALUES);
windowingTokens.add(HiveParser.KW_UNBOUNDED);
windowingTokens.add(HiveParser.KW_PRECEDING);
windowingTokens.add(HiveParser.KW_FOLLOWING);
windowingTokens.add(HiveParser.KW_CURRENT);
windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEASC);
windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC);
windowingTokens.add(HiveParser.TOK_NULLS_FIRST);
windowingTokens.add(HiveParser.TOK_NULLS_LAST);
}
protected static boolean isRedundantConversionFunction(ASTNode expr,
boolean isFunction, ArrayList<ExprNodeDesc> children) {
if (!isFunction) {
return false;
}
// conversion functions take a single parameter
if (children.size() != 1) {
return false;
}
String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr
.getChild(0)).getType());
// not a conversion function
if (funcText == null) {
return false;
}
// return true when the child type and the conversion target type is the
// same
return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName()
.equalsIgnoreCase(funcText);
}
public static String getFunctionText(ASTNode expr, boolean isFunction) {
String funcText = null;
if (!isFunction) {
// For operator, the function name is the operator text, unless it's in
// our special dictionary
if (expr.getChildCount() == 1) {
funcText = specialUnaryOperatorTextHashMap.get(expr.getType());
}
if(funcText == null) {
funcText = expr.getText();
}
} else {
// For TOK_FUNCTION, the function name is stored in the first child,
// unless it's in our
// special dictionary.
assert (expr.getChildCount() >= 1);
int funcType = ((ASTNode) expr.getChild(0)).getType();
if (funcText == null) {
funcText = conversionFunctionTextHashMap.get(funcType);
}
if (funcText == null) {
funcText = ((ASTNode) expr.getChild(0)).getText();
}
}
return BaseSemanticAnalyzer.unescapeIdentifier(funcText);
}
/**
* This function create an ExprNodeDesc for a UDF function given the
* children (arguments). It will insert implicit type conversion functions
* if necessary.
*
* @throws UDFArgumentException
*/
static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo,
ExprNodeDesc... children) throws UDFArgumentException {
FunctionInfo fi;
try {
fi = FunctionRegistry.getFunctionInfo(udfName);
} catch (SemanticException e) {
throw new UDFArgumentException(e);
}
if (fi == null) {
throw new UDFArgumentException(udfName + " not found.");
}
GenericUDF genericUDF = fi.getGenericUDF();
if (genericUDF == null) {
throw new UDFArgumentException(udfName
+ " is an aggregation function or a table function.");
}
// Add udfData to UDF if necessary
if (typeInfo != null) {
if (genericUDF instanceof SettableUDF) {
((SettableUDF)genericUDF).setTypeInfo(typeInfo);
}
}
List<ExprNodeDesc> childrenList = new ArrayList<ExprNodeDesc>(children.length);
childrenList.addAll(Arrays.asList(children));
return ExprNodeGenericFuncDesc.newInstance(genericUDF,
childrenList);
}
public static ExprNodeDesc getFuncExprNodeDesc(String udfName,
ExprNodeDesc... children) throws UDFArgumentException {
return getFuncExprNodeDescWithUdfData(udfName, null, children);
}
protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi,
List<ExprNodeDesc> children, GenericUDF genericUDF) throws SemanticException {
// Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't
// supported
if (fi.getGenericUDTF() != null) {
throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg());
}
// UDAF in filter condition, group-by caluse, param of funtion, etc.
if (fi.getGenericUDAFResolver() != null) {
if (isFunction) {
throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr
.getChild(0)));
} else {
throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr));
}
}
if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) {
if (FunctionRegistry.isStateful(genericUDF)) {
throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg());
}
}
}
protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
boolean isFunction, ArrayList<ExprNodeDesc> children, TypeCheckCtx ctx)
throws SemanticException, UDFArgumentException {
// return the child directly if the conversion is redundant.
if (isRedundantConversionFunction(expr, isFunction, children)) {
assert (children.size() == 1);
assert (children.get(0) != null);
return children.get(0);
}
String funcText = getFunctionText(expr, isFunction);
ExprNodeDesc desc;
if (funcText.equals(".")) {
// "." : FIELD Expression
assert (children.size() == 2);
// Only allow constant field name for now
assert (children.get(1) instanceof ExprNodeConstantDesc);
ExprNodeDesc object = children.get(0);
ExprNodeConstantDesc fieldName = (ExprNodeConstantDesc) children.get(1);
assert (fieldName.getValue() instanceof String);
// Calculate result TypeInfo
String fieldNameString = (String) fieldName.getValue();
TypeInfo objectTypeInfo = object.getTypeInfo();
// Allow accessing a field of list element structs directly from a list
boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST);
if (isList) {
objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo();
}
if (objectTypeInfo.getCategory() != Category.STRUCT) {
throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr));
}
TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString);
if (isList) {
t = TypeInfoFactory.getListTypeInfo(t);
}
desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList);
} else if (funcText.equals("[")) {
// "[]" : LSQUARE/INDEX Expression
if (!ctx.getallowIndexExpr())
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
assert (children.size() == 2);
// Check whether this is a list or a map
TypeInfo myt = children.get(0).getTypeInfo();
if (myt.getCategory() == Category.LIST) {
// Only allow integer index for now
if (!TypeInfoUtils.implicitConvertible(children.get(1).getTypeInfo(),
TypeInfoFactory.intTypeInfo)) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg()));
}
// Calculate TypeInfo
TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo();
desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children);
} else if (myt.getCategory() == Category.MAP) {
if (!TypeInfoUtils.implicitConvertible(children.get(1).getTypeInfo(),
((MapTypeInfo) myt).getMapKeyTypeInfo())) {
throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE
.getMsg(expr));
}
// Calculate TypeInfo
TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo();
desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children);
} else {
throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName()));
}
} else {
// other operators or functions
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText);
if (fi == null) {
if (isFunction) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION
.getMsg((ASTNode) expr.getChild(0)));
} else {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
}
}
// getGenericUDF() actually clones the UDF. Just call it once and reuse.
GenericUDF genericUDF = fi.getGenericUDF();
if (!fi.isNative()) {
ctx.getUnparseTranslator().addIdentifierTranslation(
(ASTNode) expr.getChild(0));
}
// Handle type casts that may contain type parameters
if (isFunction) {
ASTNode funcNameNode = (ASTNode)expr.getChild(0);
switch (funcNameNode.getType()) {
case HiveParser.TOK_CHAR:
// Add type params
CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(funcNameNode);
if (genericUDF != null) {
((SettableUDF)genericUDF).setTypeInfo(charTypeInfo);
}
break;
case HiveParser.TOK_VARCHAR:
VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode);
if (genericUDF != null) {
((SettableUDF)genericUDF).setTypeInfo(varcharTypeInfo);
}
break;
case HiveParser.TOK_DECIMAL:
DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(funcNameNode);
if (genericUDF != null) {
((SettableUDF)genericUDF).setTypeInfo(decTypeInfo);
}
break;
default:
// Do nothing
break;
}
}
validateUDF(expr, isFunction, ctx, fi, children, genericUDF);
// Try to infer the type of the constant only if there are two
// nodes, one of them is column and the other is numeric const
if (genericUDF instanceof GenericUDFBaseCompare
&& children.size() == 2
&& ((children.get(0) instanceof ExprNodeConstantDesc
&& children.get(1) instanceof ExprNodeColumnDesc)
|| (children.get(0) instanceof ExprNodeColumnDesc
&& children.get(1) instanceof ExprNodeConstantDesc))) {
int constIdx =
children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1;
String constType = children.get(constIdx).getTypeString().toLowerCase();
String columnType = children.get(1 - constIdx).getTypeString().toLowerCase();
final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(columnType);
// Try to narrow type of constant
Object constVal = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
try {
if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Integer(constVal.toString())));
} else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Long(constVal.toString())));
}else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Double(constVal.toString())));
} else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Float(constVal.toString())));
} else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Byte(constVal.toString())));
} else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
children.set(constIdx, new ExprNodeConstantDesc(new Short(constVal.toString())));
}
} catch (NumberFormatException nfe) {
LOG.trace("Failed to narrow type of constant", nfe);
if ((genericUDF instanceof GenericUDFOPEqual && !NumberUtils.isNumber(constVal.toString()))) {
return new ExprNodeConstantDesc(false);
}
}
// if column type is char and constant type is string, then convert the constant to char
// type with padded spaces.
if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) &&
colTypeInfo instanceof CharTypeInfo) {
final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
final String constValue = originalValue.toString();
final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
final HiveChar newValue = new HiveChar(constValue, length);
children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue));
}
}
if (genericUDF instanceof GenericUDFOPOr) {
// flatten OR
List<ExprNodeDesc> childrenList = new ArrayList<ExprNodeDesc>(
children.size());
for (ExprNodeDesc child : children) {
if (FunctionRegistry.isOpOr(child)) {
childrenList.addAll(child.getChildren());
} else {
childrenList.add(child);
}
}
desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText,
childrenList);
} else if (genericUDF instanceof GenericUDFOPAnd) {
// flatten AND
List<ExprNodeDesc> childrenList = new ArrayList<ExprNodeDesc>(
children.size());
for (ExprNodeDesc child : children) {
if (FunctionRegistry.isOpAnd(child)) {
childrenList.addAll(child.getChildren());
} else {
childrenList.add(child);
}
}
desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText,
childrenList);
} else if (ctx.isFoldExpr() && canConvertIntoNvl(genericUDF, children)) {
// Rewrite CASE into NVL
desc = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(),
Lists.newArrayList(children.get(0), new ExprNodeConstantDesc(false)));
if (Boolean.FALSE.equals(((ExprNodeConstantDesc) children.get(1)).getValue())) {
desc = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(),
Lists.newArrayList(desc));
}
} else {
desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText,
children);
}
// If the function is deterministic and the children are constants,
// we try to fold the expression to remove e.g. cast on constant
if (ctx.isFoldExpr() && desc instanceof ExprNodeGenericFuncDesc &&
FunctionRegistry.isDeterministic(genericUDF) &&
ExprNodeDescUtils.isAllConstants(children)) {
ExprNodeDesc constantExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)desc);
if (constantExpr != null) {
desc = constantExpr;
}
}
}
// UDFOPPositive is a no-op.
// However, we still create it, and then remove it here, to make sure we
// only allow
// "+" for numeric types.
if (FunctionRegistry.isOpPositive(desc)) {
assert (desc.getChildren().size() == 1);
desc = desc.getChildren().get(0);
}
assert (desc != null);
return desc;
}
private boolean canConvertIntoNvl(GenericUDF genericUDF, ArrayList<ExprNodeDesc> children) {
if (genericUDF instanceof GenericUDFWhen && children.size() == 3 &&
children.get(1) instanceof ExprNodeConstantDesc &&
children.get(2) instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) children.get(1);
ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) children.get(2);
Object thenVal = constThen.getValue();
Object elseVal = constElse.getValue();
if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
return true;
}
}
return false;
}
/**
* Returns true if des is a descendant of ans (ancestor)
*/
private boolean isDescendant(Node ans, Node des) {
if (ans.getChildren() == null) {
return false;
}
for (Node c : ans.getChildren()) {
if (c == des) {
return true;
}
if (isDescendant(c, des)) {
return true;
}
}
return false;
}
protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr,
Object... nodeOutputs) throws SemanticException {
RowResolver input = ctx.getInputRR();
String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0)
.getText());
// NOTE: tableAlias must be a valid non-ambiguous table alias,
// because we've checked that in TOK_TABLE_OR_COL's process method.
String colName;
if (nodeOutputs[1] instanceof ExprNodeConstantDesc) {
colName = ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString();
} else if (nodeOutputs[1] instanceof ExprNodeColumnDesc) {
colName = ((ExprNodeColumnDesc)nodeOutputs[1]).getColumn();
} else {
throw new SemanticException("Unexpected ExprNode : " + nodeOutputs[1]);
}
ColumnInfo colInfo = input.get(tableAlias, colName);
// Try outer Row resolver
if(colInfo == null && ctx.getOuterRR() != null) {
RowResolver outerRR = ctx.getOuterRR();
colInfo = outerRR.get(tableAlias, colName);
}
if (colInfo == null) {
ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
return null;
}
return toExprNodeDesc(colInfo);
}
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
// Here we know nd represents a group by expression.
// During the DFS traversal of the AST, a descendant of nd likely set an
// error because a sub-tree of nd is unlikely to also be a group by
// expression. For example, in a query such as
// SELECT *concat(key)* FROM src GROUP BY concat(key), 'key' will be
// processed before 'concat(key)' and since 'key' is not a group by
// expression, an error will be set in ctx by ColumnExprProcessor.
// We can clear the global error when we see that it was set in a
// descendant node of a group by expression because
// processGByExpr() returns a ExprNodeDesc that effectively ignores
// its children. Although the error can be set multiple times by
// descendant nodes, DFS traversal ensures that the error only needs to
// be cleared once. Also, for a case like
// SELECT concat(value, concat(value))... the logic still works as the
// error is only set with the first 'value'; all node processors quit
// early if the global error is set.
if (isDescendant(nd, ctx.getErrorSrcNode())) {
ctx.setError(null, null);
}
return desc;
}
if (ctx.getError() != null) {
return null;
}
ASTNode expr = (ASTNode) nd;
/*
* A Windowing specification get added as a child to a UDAF invocation to distinguish it
* from similar UDAFs but on different windows.
* The UDAF is translated to a WindowFunction invocation in the PTFTranslator.
* So here we just return null for tokens that appear in a Window Specification.
* When the traversal reaches up to the UDAF invocation its ExprNodeDesc is build using the
* ColumnInfo in the InputRR. This is similar to how UDAFs are handled in Select lists.
* The difference is that there is translation for Window related tokens, so we just
* return null;
*/
if (windowingTokens.contains(expr.getType())) {
if (!ctx.getallowWindowing())
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context")));
return null;
}
if(expr.getType() == HiveParser.TOK_SUBQUERY_OP || expr.getType() == HiveParser.TOK_QUERY) {
return null;
}
if (expr.getType() == HiveParser.TOK_TABNAME) {
return null;
}
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
if (!ctx.getallowAllColRef())
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
ErrorMsg.INVALID_COLUMN
.getMsg("All column reference is not supported in the context")));
RowResolver input = ctx.getInputRR();
ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
assert expr.getChildCount() <= 1;
if (expr.getChildCount() == 1) {
// table aliased (select a.*, for example)
ASTNode child = (ASTNode) expr.getChild(0);
assert child.getType() == HiveParser.TOK_TABNAME;
assert child.getChildCount() == 1;
String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
HashMap<String, ColumnInfo> columns = input.getFieldMap(tableAlias);
if (columns == null) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child));
}
for (Map.Entry<String, ColumnInfo> colMap : columns.entrySet()) {
ColumnInfo colInfo = colMap.getValue();
if (!colInfo.getIsVirtualCol()) {
columnList.addColumn(toExprNodeDesc(colInfo));
}
}
} else {
// all columns (select *, for example)
for (ColumnInfo colInfo : input.getColumnInfos()) {
if (!colInfo.getIsVirtualCol()) {
columnList.addColumn(toExprNodeDesc(colInfo));
}
}
}
return columnList;
}
// If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL,
// and the operator is a DOT, then it's a table column reference.
if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& nodeOutputs[0] == null) {
return processQualifiedColRef(ctx, expr, nodeOutputs);
}
// Return nulls for conversion operators
if (conversionFunctionTextHashMap.keySet().contains(expr.getType())
|| expr.getToken().getType() == HiveParser.CharSetName
|| expr.getToken().getType() == HiveParser.CharSetLiteral) {
return null;
}
boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION ||
expr.getType() == HiveParser.TOK_FUNCTIONSTAR ||
expr.getType() == HiveParser.TOK_FUNCTIONDI);
if (!ctx.getAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) {
throw new SemanticException(
SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg()));
}
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(
expr.getChildCount() - childrenBegin);
for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) {
if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) {
children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]).getChildren());
} else {
children.add((ExprNodeDesc) nodeOutputs[ci]);
}
}
if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
if (!ctx.getallowFunctionStar())
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
ErrorMsg.INVALID_COLUMN
.getMsg(".* reference is not supported in the context")));
RowResolver input = ctx.getInputRR();
for (ColumnInfo colInfo : input.getColumnInfos()) {
if (!colInfo.getIsVirtualCol()) {
children.add(toExprNodeDesc(colInfo));
}
}
}
// If any of the children contains null, then return a null
// this is a hack for now to handle the group by case
if (children.contains(null)) {
List<String> possibleColumnNames = getReferenceableColumnAliases(ctx);
String reason = String.format("(possible column names are: %s)",
StringUtils.join(possibleColumnNames, ", "));
ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason),
expr);
return null;
}
// Create function desc
try {
return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx);
} catch (UDFArgumentTypeException e) {
throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg(expr
.getChild(childrenBegin + e.getArgumentId()), e.getMessage()), e);
} catch (UDFArgumentLengthException e) {
throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg(
expr, e.getMessage()), e);
} catch (UDFArgumentException e) {
throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e
.getMessage()), e);
}
}
protected List<String> getReferenceableColumnAliases(TypeCheckCtx ctx) {
return ctx.getInputRR().getReferenceableColumnAliases(null, -1);
}
}
/**
* Factory method to get DefaultExprProcessor.
*
* @return DefaultExprProcessor.
*/
public DefaultExprProcessor getDefaultExprProcessor() {
return new DefaultExprProcessor();
}
/**
* Processor for subquery expressions..
*/
public static class SubQueryExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
if (ctx.getError() != null) {
return null;
}
ASTNode expr = (ASTNode) nd;
ASTNode sqNode = (ASTNode) expr.getParent().getChild(1);
if (!ctx.getallowSubQueryExpr())
throw new CalciteSubquerySemanticException(SemanticAnalyzer.generateErrorMessage(sqNode,
ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg("Currently SubQuery expressions are only allowed as " +
"Where and Having Clause predicates")));
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
//TOK_SUBQUERY_EXPR should have either 2 or 3 children
assert(expr.getChildren().size() == 3 || expr.getChildren().size() == 2);
//First child should be operand
assert(expr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP);
ASTNode subqueryOp = (ASTNode) expr.getChild(0);
boolean isIN = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_IN
|| subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN);
boolean isEXISTS = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS
|| subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS);
boolean isScalar = subqueryOp.getChildCount() == 0 ;
// subqueryToRelNode might be null if subquery expression anywhere other than
// as expected in filter (where/having). We should throw an appropriate error
// message
Map<ASTNode, RelNode> subqueryToRelNode = ctx.getSubqueryToRelNode();
if(subqueryToRelNode == null) {
throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
" Currently SubQuery expressions are only allowed as " +
"Where and Having Clause predicates"));
}
RelNode subqueryRel = subqueryToRelNode.get(expr);
//For now because subquery is only supported in filter
// we will create subquery expression of boolean type
if(isEXISTS) {
return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel,
ExprNodeSubQueryDesc.SubqueryType.EXISTS);
}
else if(isIN) {
assert(nodeOutputs[2] != null);
ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2];
return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel,
ExprNodeSubQueryDesc.SubqueryType.IN, lhs);
}
else if(isScalar){
// only single subquery expr is supported
if(subqueryRel.getRowType().getFieldCount() != 1) {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
"More than one column expression in subquery"));
}
// figure out subquery expression column's type
TypeInfo subExprType = TypeConverter.convert(subqueryRel.getRowType().getFieldList().get(0).getType());
return new ExprNodeSubQueryDesc(subExprType, subqueryRel,
ExprNodeSubQueryDesc.SubqueryType.SCALAR);
}
/*
* Restriction.1.h :: SubQueries only supported in the SQL Where Clause.
*/
ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode,
"Currently only IN & EXISTS SubQuery expressions are allowed"),
sqNode);
return null;
}
}
/**
* Factory method to get SubQueryExprProcessor.
*
* @return DateExprProcessor.
*/
public SubQueryExprProcessor getSubQueryExprProcessor() {
return new SubQueryExprProcessor();
}
}