/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer.index; import java.io.Serializable; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; /** * Factory of methods used by {@link RewriteGBUsingIndex} * to determine if the rewrite optimization can be applied to the input query. * */ public final class RewriteCanApplyProcFactory { private static RewriteCanApplyCtx canApplyCtx = null; private RewriteCanApplyProcFactory(){ //this prevents the class from getting instantiated } /** * Check for conditions in FilterOperator that do not meet rewrite criteria. */ private static class CheckFilterProc implements NodeProcessor { public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { FilterOperator operator = (FilterOperator)nd; canApplyCtx = (RewriteCanApplyCtx)ctx; FilterDesc conf = (FilterDesc)operator.getConf(); //The filter operator should have a predicate of ExprNodeGenericFuncDesc type. //This represents the comparison operator ExprNodeGenericFuncDesc oldengfd = (ExprNodeGenericFuncDesc) conf.getPredicate(); if(oldengfd == null){ canApplyCtx.setWhrClauseColsFetchException(true); } //The predicate should have valid left and right columns List<String> colList = oldengfd.getCols(); if(colList == null || colList.size() == 0){ canApplyCtx.setWhrClauseColsFetchException(true); } //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later //if index keys contain all filter predicate columns and vice-a-versa for (String col : colList) { canApplyCtx.getPredicateColumnsList().add(col); } return null; } } public static CheckFilterProc canApplyOnFilterOperator() { return new CheckFilterProc(); } /** * Check for conditions in GroupByOperator that do not meet rewrite criteria. * */ private static class CheckGroupByProc implements NodeProcessor { public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { GroupByOperator operator = (GroupByOperator)nd; canApplyCtx = (RewriteCanApplyCtx)ctx; //for each group-by clause in query, only one GroupByOperator of the //GBY-RS-GBY sequence is stored in getGroupOpToInputTables //we need to process only this operator //Also, we do not rewrite for cases when same query branch has multiple group-by constructs if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) && !canApplyCtx.isQueryHasGroupBy()){ canApplyCtx.setQueryHasGroupBy(true); GroupByDesc conf = (GroupByDesc) operator.getConf(); List<AggregationDesc> aggrList = conf.getAggregators(); if(aggrList != null && aggrList.size() > 0){ for (AggregationDesc aggregationDesc : aggrList) { canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1); //In the current implementation, we do not support more than 1 agg funcs in group-by if(canApplyCtx.getAggFuncCnt() > 1) { return false; } String aggFunc = aggregationDesc.getGenericUDAFName(); if(!("count".equals(aggFunc))){ canApplyCtx.setAggFuncIsNotCount(true); }else{ List<ExprNodeDesc> para = aggregationDesc.getParameters(); //for a valid aggregation, it needs to have non-null parameter list if(para == null){ canApplyCtx.setAggFuncColsFetchException(true); }else if(para.size() == 0){ //count(*) case canApplyCtx.setCountOnAllCols(true); canApplyCtx.setAggFunction("_count_of_all"); }else{ assert para.size()==1; for(int i=0; i< para.size(); i++){ ExprNodeDesc expr = para.get(i); if(expr instanceof ExprNodeColumnDesc){ //Add the columns to RewriteCanApplyCtx's selectColumnsList list //to check later if index keys contain all select clause columns //and vice-a-versa. We get the select column 'actual' names only here //if we have a agg func along with group-by //SelectOperator has internal names in its colList data structure canApplyCtx.getSelectColumnsList().add( ((ExprNodeColumnDesc) expr).getColumn()); //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later //if columns contained in agg func are index key columns canApplyCtx.getAggFuncColList().add( ((ExprNodeColumnDesc) expr).getColumn()); canApplyCtx.setAggFunction("_count_of_" + ((ExprNodeColumnDesc) expr).getColumn() + ""); }else if(expr instanceof ExprNodeConstantDesc){ //count(1) case canApplyCtx.setCountOfOne(true); canApplyCtx.setAggFunction("_count_of_1"); } } } } } } //we need to have non-null group-by keys for a valid group-by operator List<ExprNodeDesc> keyList = conf.getKeys(); if(keyList == null || keyList.size() == 0){ canApplyCtx.setGbyKeysFetchException(true); } for (ExprNodeDesc expr : keyList) { checkExpression(expr); } } return null; } private void checkExpression(ExprNodeDesc expr){ if(expr instanceof ExprNodeColumnDesc){ //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later //if all keys are from index columns canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); }else if(expr instanceof ExprNodeGenericFuncDesc){ ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr; List<ExprNodeDesc> childExprs = funcExpr.getChildExprs(); for (ExprNodeDesc childExpr : childExprs) { if(childExpr instanceof ExprNodeColumnDesc){ canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn()); }else if(childExpr instanceof ExprNodeGenericFuncDesc){ checkExpression(childExpr); } } } } } public static CheckGroupByProc canApplyOnGroupByOperator() { return new CheckGroupByProc(); } /** * Check for conditions in SelectOperator that do not meet rewrite criteria. */ private static class CheckSelectProc implements NodeProcessor { public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { SelectOperator operator = (SelectOperator)nd; canApplyCtx = (RewriteCanApplyCtx)ctx; List<Operator<? extends Serializable>> childrenList = operator.getChildOperators(); Operator<? extends Serializable> child = childrenList.get(0); if(child instanceof FileSinkOperator){ Map<String, String> internalToAlias = new LinkedHashMap<String, String>(); RowSchema rs = operator.getSchema(); //to get the internal to alias mapping List<ColumnInfo> sign = rs.getSignature(); for (ColumnInfo columnInfo : sign) { internalToAlias.put(columnInfo.getInternalName(), columnInfo.getAlias()); } //if FilterOperator predicate has internal column names, //we need to retrieve the 'actual' column names to //check if index keys contain all filter predicate columns and vice-a-versa Iterator<String> predItr = canApplyCtx.getPredicateColumnsList().iterator(); while(predItr.hasNext()){ String predCol = predItr.next(); String newPredCol = ""; if(internalToAlias.get(predCol) != null){ newPredCol = internalToAlias.get(predCol); canApplyCtx.getPredicateColumnsList().remove(predCol); canApplyCtx.getPredicateColumnsList().add(newPredCol); } } } return null; } } public static CheckSelectProc canApplyOnSelectOperator() { return new CheckSelectProc(); } }