/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.thrift.TException; /** * The transformation step that does partition pruning. * */ public class PartitionPruner implements Transform { // The log private static final Log LOG = LogFactory .getLog("hive.ql.optimizer.ppr.PartitionPruner"); /* * (non-Javadoc) * * @see * org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop * .hive.ql.parse.ParseContext) */ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { // create a the context for walking operators OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), OpProcFactory .getFilterProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); pctx.setHasNonPartCols(opWalkerCtx.getHasNonPartCols()); return pctx; } /** * Find out whether the condition only contains partitioned columns. Note that * if the table is not partitioned, the function always returns true. * condition. * * @param tab * the table object * @param expr * the pruner expression for the table */ public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) { if (!tab.isPartitioned() || (expr == null)) { return true; } if (expr instanceof ExprNodeColumnDesc) { String colName = ((ExprNodeColumnDesc) expr).getColumn(); return tab.isPartitionKey(colName); } // It cannot contain a non-deterministic function if ((expr instanceof ExprNodeGenericFuncDesc) && !FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) expr) .getGenericUDF())) { return false; } // All columns of the expression must be parttioned columns List<ExprNodeDesc> children = expr.getChildren(); if (children != null) { for (int i = 0; i < children.size(); i++) { if (!onlyContainsPartnCols(tab, children.get(i))) { return false; } } } return true; } /** * Get the partition list for the table that satisfies the partition pruner * condition. * * @param tab * the table object for the alias * @param prunerExpr * the pruner expression for the alias * @param conf * for checking whether "strict" mode is on. * @param alias * for generating error message only. * @return the partition list for the table that satisfies the partition * pruner condition. * @throws HiveException */ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("dbname = " + tab.getDbName()); LOG.trace("tabname = " + tab.getTableName()); LOG.trace("prune Expression = " + prunerExpr); String key = tab.getDbName() + "." + tab.getTableName() + ";"; if (prunerExpr != null) { key = key + prunerExpr.getExprString(); } PrunedPartitionList ret = prunedPartitionsMap.get(key); if (ret != null) { return ret; } LinkedHashSet<Partition> true_parts = new LinkedHashSet<Partition>(); LinkedHashSet<Partition> unkn_parts = new LinkedHashSet<Partition>(); LinkedHashSet<Partition> denied_parts = new LinkedHashSet<Partition>(); try { StructObjectInspector rowObjectInspector = (StructObjectInspector) tab .getDeserializer().getObjectInspector(); Object[] rowWithPart = new Object[2]; if (tab.isPartitioned()) { // If the "strict" mode is on, we have to provide partition pruner for // each table. if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE))) { if (!hasColumnExpr(prunerExpr)) { throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE .getMsg("for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"")); } } if (prunerExpr == null) { // This can happen when hive.mapred.mode=nonstrict and there is no predicates at all // Add all partitions to the unknown_parts so that a MR job is generated. true_parts.addAll(Hive.get().getPartitions(tab)); } else { // remove non-partition columns ExprNodeDesc compactExpr = prunerExpr.clone(); compactExpr = compactExpr(compactExpr); LOG.debug("Filter w/ compacting: " + ((compactExpr != null) ? compactExpr.getExprString(): "null") + "; filter w/o compacting: " + ((prunerExpr != null) ? prunerExpr.getExprString(): "null")); if (compactExpr == null) { // This could happen when hive.mapred.mode=nonstrict and all the predicates // are on non-partition columns. unkn_parts.addAll(Hive.get().getPartitions(tab)); } else if (Utilities.checkJDOPushDown(tab, compactExpr)) { String filter = compactExpr.getExprString(); String oldFilter = prunerExpr.getExprString(); if (filter.equals(oldFilter)) { // pruneExpr contains only partition columns pruneByPushDown(tab, true_parts, filter); } else { // pruneExpr contains non-partition columns pruneByPushDown(tab, unkn_parts, filter); } } else { pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts, prunerExpr, rowObjectInspector); } } LOG.debug("tabname = " + tab.getTableName() + " is partitioned"); } else { true_parts.addAll(Hive.get().getPartitions(tab)); } } catch (HiveException e) { throw e; } catch (Exception e) { throw new HiveException(e); } // Now return the set of partitions ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts); prunedPartitionsMap.put(key, ret); return ret; } /** * Taking a partition pruning expression, remove the null operands. * @param expr original partition pruning expression. * @return partition pruning expression that only contains partition columns. */ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { if (expr instanceof ExprNodeConstantDesc) { if (((ExprNodeConstantDesc)expr).getValue() == null) { return null; } else { return expr; } } else if (expr instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF(); if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr) { List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc)expr).getChildren(); ExprNodeDesc left = children.get(0); children.set(0, compactExpr(left)); ExprNodeDesc right = children.get(1); children.set(1, compactExpr(right)); if (children.get(0) == null && children.get(1) == null) { return null; } else if (children.get(0) == null) { return children.get(1); } else if (children.get(1) == null) { return children.get(0); } } return expr; } return expr; } /** * Pruning partition using JDO filtering. * @param tab the table containing the partitions. * @param true_parts the resulting partitions. * @param filter the SQL predicate that involves only partition columns * @throws HiveException * @throws MetaException * @throws NoSuchObjectException * @throws TException */ static private void pruneByPushDown(Table tab, Set<Partition> true_parts, String filter) throws HiveException, MetaException, NoSuchObjectException, TException { Hive db = Hive.get(); List<Partition> parts = db.getPartitionsByFilter(tab, filter); true_parts.addAll(parts); return; } /** * Pruning partition by getting the partition names first and pruning using Hive expression * evaluator. * @param tab the table containing the partitions. * @param true_parts the resulting partitions if the partition pruning expression only contains * partition columns. * @param unkn_parts the resulting partitions if the partition pruning expression that only contains * non-partition columns. * @param denied_parts pruned out partitions. * @param prunerExpr the SQL predicate that involves partition columns. * @param rowObjectInspector object inspector used by the evaluator * @throws Exception */ static private void pruneBySequentialScan(Table tab, Set<Partition> true_parts, Set<Partition> unkn_parts, Set<Partition> denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector) throws Exception { List<String> trueNames = null; List<String> unknNames = null; PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.PRUNE_LISTING); List<String> partNames = Hive.get().getPartitionNames(tab.getDbName(), tab.getTableName(), (short) -1); List<FieldSchema> pCols = tab.getPartCols(); List<String> partCols = new ArrayList<String>(pCols.size()); List<String> values = new ArrayList<String>(pCols.size()); Object[] objectWithPart = new Object[2]; for (FieldSchema pCol : pCols) { partCols.add(pCol.getName()); } Map<PrimitiveObjectInspector, ExprNodeEvaluator> handle = PartExprEvalUtils.prepareExpr( prunerExpr, partCols, rowObjectInspector); for (String partName : partNames) { // Set all the variables here LinkedHashMap<String, String> partSpec = Warehouse .makeSpecFromName(partName); values.clear(); for (Map.Entry<String, String> kv: partSpec.entrySet()) { values.add(kv.getValue()); } objectWithPart[1] = values; // evaluate the expression tree Boolean r = (Boolean) PartExprEvalUtils.evaluateExprOnPart(handle, objectWithPart); if (r == null) { if (unknNames == null) { unknNames = new LinkedList<String>(); } unknNames.add(partName); LOG.debug("retained unknown partition: " + partName); } else if (Boolean.TRUE.equals(r)) { if (trueNames == null) { trueNames = new LinkedList<String>(); } trueNames.add(partName); LOG.debug("retained partition: " + partName); } } perfLogger.PerfLogEnd(LOG, PerfLogger.PRUNE_LISTING); perfLogger.PerfLogBegin(LOG, PerfLogger.PARTITION_RETRIEVING); if (trueNames != null) { List<Partition> parts = Hive.get().getPartitionsByNames(tab, trueNames); true_parts.addAll(parts); } if (unknNames != null) { List<Partition> parts = Hive.get().getPartitionsByNames(tab, unknNames); unkn_parts.addAll(parts); } perfLogger.PerfLogEnd(LOG, PerfLogger.PARTITION_RETRIEVING); } /** * Whether the expression contains a column node or not. */ public static boolean hasColumnExpr(ExprNodeDesc desc) { // Return false for null if (desc == null) { return false; } // Return true for exprNodeColumnDesc if (desc instanceof ExprNodeColumnDesc) { return true; } // Return true in case one of the children is column expr. List<ExprNodeDesc> children = desc.getChildren(); if (children != null) { for (int i = 0; i < children.size(); i++) { if (hasColumnExpr(children.get(i))) { return true; } } } // Return false otherwise return false; } }