/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; import java.util.BitSet; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory.FilterPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import com.google.common.base.Preconditions; /** * Fixed bucket pruning optimizer goes through all the table scans and annotates them * with a bucketing inclusion bit-set. */ public class FixedBucketPruningOptimizer extends Transform { private static final Log LOG = LogFactory .getLog(FixedBucketPruningOptimizer.class.getName()); private final boolean compat; public FixedBucketPruningOptimizer(boolean compat) { this.compat = compat; } public class NoopWalker implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // do nothing return null; } } public class FixedBucketPartitionWalker extends FilterPruner { @Override protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx); Table tbl = top.getConf().getTableMetadata(); if (tbl.getNumBuckets() > 0) { final int nbuckets = tbl.getNumBuckets(); ctxt.setNumBuckets(nbuckets); ctxt.setBucketCols(tbl.getBucketCols()); ctxt.setSchema(tbl.getFields()); if (tbl.isPartitioned()) { // Run partition pruner to get partitions ParseContext parseCtx = ctxt.pctx; PrunedPartitionList prunedPartList; try { String alias = (String) parseCtx.getTopOps().keySet().toArray()[0]; prunedPartList = PartitionPruner.prune(top, parseCtx, alias); } catch (HiveException e) { throw new SemanticException(e.getMessage(), e); } if (prunedPartList != null) { ctxt.setPartitions(prunedPartList); for (Partition p : prunedPartList.getPartitions()) { if (nbuckets != p.getBucketCount()) { // disable feature ctxt.setNumBuckets(-1); break; } } } } } } } public static class BucketBitsetGenerator extends FilterPruner { @Override protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx); if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) { // bucketing isn't consistent or there are >1 bucket columns // optimizer does not extract multiple column predicates for this return; } ExprNodeGenericFuncDesc filter = top.getConf().getFilterExpr(); if (filter == null) { return; } // the sargs are closely tied to hive.optimize.index.filter SearchArgument sarg = ConvertAstToSearchArg.create(ctxt.pctx.getConf(), filter); if (sarg == null) { return; } final String bucketCol = ctxt.getBucketCols().get(0); StructField bucketField = null; for (StructField fs : ctxt.getSchema()) { if(fs.getFieldName().equals(bucketCol)) { bucketField = fs; } } Preconditions.checkArgument(bucketField != null); List<Object> literals = new ArrayList<Object>(); List<PredicateLeaf> leaves = sarg.getLeaves(); Set<PredicateLeaf> bucketLeaves = new HashSet<PredicateLeaf>(); for (PredicateLeaf l : leaves) { if (bucketCol.equals(l.getColumnName())) { switch (l.getOperator()) { case EQUALS: case IN: // supported break; case IS_NULL: // TODO: (a = 1) and NOT (a is NULL) can be potentially folded earlier into a NO-OP // fall through case BETWEEN: // TODO: for ordinal types you can produce a range (BETWEEN 1444442100 1444442107) // fall through default: // cannot optimize any others return; } bucketLeaves.add(l); } } if (bucketLeaves.size() == 0) { return; } // TODO: Add support for AND clauses under OR clauses // first-cut takes a known minimal tree and no others. // $expr = (a=1) // (a=1 or a=2) // (a in (1,2)) // ($expr and *) // (* and $expr) ExpressionTree expr = sarg.getExpression(); if (expr.getOperator() == Operator.LEAF) { PredicateLeaf l = leaves.get(expr.getLeaf()); if (!addLiteral(literals, l)) { return; } } else if (expr.getOperator() == Operator.AND) { boolean found = false; for (ExpressionTree subExpr : expr.getChildren()) { if (subExpr.getOperator() != Operator.LEAF) { return; } // one of the branches is definitely a bucket-leaf PredicateLeaf l = leaves.get(subExpr.getLeaf()); if (bucketLeaves.contains(l)) { if (!addLiteral(literals, l)) { return; } found = true; } } if (!found) { return; } } else if (expr.getOperator() == Operator.OR) { for (ExpressionTree subExpr : expr.getChildren()) { if (subExpr.getOperator() != Operator.LEAF) { return; } PredicateLeaf l = leaves.get(subExpr.getLeaf()); if (bucketLeaves.contains(l)) { if (!addLiteral(literals, l)) { return; } } else { // all of the OR branches need to be bucket-leaves return; } } } // invariant: bucket-col IN literals of type bucketField BitSet bs = new BitSet(ctxt.getNumBuckets()); bs.clear(); PrimitiveObjectInspector bucketOI = (PrimitiveObjectInspector)bucketField.getFieldObjectInspector(); PrimitiveObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(bucketOI.getPrimitiveCategory()); for (Object literal: literals) { PrimitiveObjectInspector origOI = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(literal.getClass()); Converter conv = ObjectInspectorConverters.getConverter(origOI, constOI); // exact type conversion or get out if (conv == null) { return; } Object convCols[] = new Object[] {conv.convert(literal)}; int n = ObjectInspectorUtils.getBucketNumber(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets()); bs.set(n); if (ctxt.isCompat()) { int h = ObjectInspectorUtils.getBucketHashCode(convCols, new ObjectInspector[]{constOI}); // -ve hashcodes had conversion to positive done in different ways in the past // abs() is now obsolete and all inserts now use & Integer.MAX_VALUE // the compat mode assumes that old data could've been loaded using the other conversion n = ObjectInspectorUtils.getBucketNumber(Math.abs(h), ctxt.getNumBuckets()); bs.set(n); } } if (bs.cardinality() < ctxt.getNumBuckets()) { // there is a valid bucket pruning filter top.getConf().setIncludedBuckets(bs); top.getConf().setNumBuckets(ctxt.getNumBuckets()); } } private boolean addLiteral(List<Object> literals, PredicateLeaf leaf) { switch (leaf.getOperator()) { case EQUALS: return literals.add(leaf.getLiteral()); case IN: return literals.addAll(leaf.getLiteralList()); default: return false; } } } public final class FixedBucketPruningOptimizerCtxt implements NodeProcessorCtx { public final ParseContext pctx; private final boolean compat; private int numBuckets; private PrunedPartitionList partitions; private List<String> bucketCols; private List<StructField> schema; public FixedBucketPruningOptimizerCtxt(boolean compat, ParseContext pctx) { this.compat = compat; this.pctx = pctx; } public void setSchema(ArrayList<StructField> fields) { this.schema = fields; } public List<StructField> getSchema() { return this.schema; } public void setBucketCols(List<String> bucketCols) { this.bucketCols = bucketCols; } public List<String> getBucketCols() { return this.bucketCols; } public void setPartitions(PrunedPartitionList partitions) { this.partitions = partitions; } public PrunedPartitionList getPartitions() { return this.partitions; } public int getNumBuckets() { return numBuckets; } public void setNumBuckets(int numBuckets) { this.numBuckets = numBuckets; } // compatibility mode enabled public boolean isCompat() { return this.compat; } } @Override public ParseContext transform(ParseContext pctx) throws SemanticException { // create a the context for walking operators FixedBucketPruningOptimizerCtxt opPartWalkerCtx = new FixedBucketPruningOptimizerCtxt(compat, pctx); // Retrieve all partitions generated from partition pruner and partition // column pruner PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, new FixedBucketPartitionWalker(), new NoopWalker()); if (opPartWalkerCtx.getNumBuckets() < 0) { // bail out return pctx; } else { // walk operator tree to create expression tree for filter buckets PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, new BucketBitsetGenerator(), new NoopWalker()); } return pctx; } }