/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory.FilterPruner;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import com.google.common.base.Preconditions;
/**
* Fixed bucket pruning optimizer goes through all the table scans and annotates them
* with a bucketing inclusion bit-set.
*/
public class FixedBucketPruningOptimizer extends Transform {
private static final Log LOG = LogFactory
.getLog(FixedBucketPruningOptimizer.class.getName());
private final boolean compat;
public FixedBucketPruningOptimizer(boolean compat) {
this.compat = compat;
}
public class NoopWalker implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
// do nothing
return null;
}
}
public class FixedBucketPartitionWalker extends FilterPruner {
@Override
protected void generatePredicate(NodeProcessorCtx procCtx,
FilterOperator fop, TableScanOperator top) throws SemanticException,
UDFArgumentException {
FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
Table tbl = top.getConf().getTableMetadata();
if (tbl.getNumBuckets() > 0) {
final int nbuckets = tbl.getNumBuckets();
ctxt.setNumBuckets(nbuckets);
ctxt.setBucketCols(tbl.getBucketCols());
ctxt.setSchema(tbl.getFields());
if (tbl.isPartitioned()) {
// Run partition pruner to get partitions
ParseContext parseCtx = ctxt.pctx;
PrunedPartitionList prunedPartList;
try {
String alias = (String) parseCtx.getTopOps().keySet().toArray()[0];
prunedPartList = PartitionPruner.prune(top, parseCtx, alias);
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
}
if (prunedPartList != null) {
ctxt.setPartitions(prunedPartList);
for (Partition p : prunedPartList.getPartitions()) {
if (nbuckets != p.getBucketCount()) {
// disable feature
ctxt.setNumBuckets(-1);
break;
}
}
}
}
}
}
}
public static class BucketBitsetGenerator extends FilterPruner {
@Override
protected void generatePredicate(NodeProcessorCtx procCtx,
FilterOperator fop, TableScanOperator top) throws SemanticException,
UDFArgumentException {
FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) {
// bucketing isn't consistent or there are >1 bucket columns
// optimizer does not extract multiple column predicates for this
return;
}
ExprNodeGenericFuncDesc filter = top.getConf().getFilterExpr();
if (filter == null) {
return;
}
// the sargs are closely tied to hive.optimize.index.filter
SearchArgument sarg = ConvertAstToSearchArg.create(ctxt.pctx.getConf(), filter);
if (sarg == null) {
return;
}
final String bucketCol = ctxt.getBucketCols().get(0);
StructField bucketField = null;
for (StructField fs : ctxt.getSchema()) {
if(fs.getFieldName().equals(bucketCol)) {
bucketField = fs;
}
}
Preconditions.checkArgument(bucketField != null);
List<Object> literals = new ArrayList<Object>();
List<PredicateLeaf> leaves = sarg.getLeaves();
Set<PredicateLeaf> bucketLeaves = new HashSet<PredicateLeaf>();
for (PredicateLeaf l : leaves) {
if (bucketCol.equals(l.getColumnName())) {
switch (l.getOperator()) {
case EQUALS:
case IN:
// supported
break;
case IS_NULL:
// TODO: (a = 1) and NOT (a is NULL) can be potentially folded earlier into a NO-OP
// fall through
case BETWEEN:
// TODO: for ordinal types you can produce a range (BETWEEN 1444442100 1444442107)
// fall through
default:
// cannot optimize any others
return;
}
bucketLeaves.add(l);
}
}
if (bucketLeaves.size() == 0) {
return;
}
// TODO: Add support for AND clauses under OR clauses
// first-cut takes a known minimal tree and no others.
// $expr = (a=1)
// (a=1 or a=2)
// (a in (1,2))
// ($expr and *)
// (* and $expr)
ExpressionTree expr = sarg.getExpression();
if (expr.getOperator() == Operator.LEAF) {
PredicateLeaf l = leaves.get(expr.getLeaf());
if (!addLiteral(literals, l)) {
return;
}
} else if (expr.getOperator() == Operator.AND) {
boolean found = false;
for (ExpressionTree subExpr : expr.getChildren()) {
if (subExpr.getOperator() != Operator.LEAF) {
return;
}
// one of the branches is definitely a bucket-leaf
PredicateLeaf l = leaves.get(subExpr.getLeaf());
if (bucketLeaves.contains(l)) {
if (!addLiteral(literals, l)) {
return;
}
found = true;
}
}
if (!found) {
return;
}
} else if (expr.getOperator() == Operator.OR) {
for (ExpressionTree subExpr : expr.getChildren()) {
if (subExpr.getOperator() != Operator.LEAF) {
return;
}
PredicateLeaf l = leaves.get(subExpr.getLeaf());
if (bucketLeaves.contains(l)) {
if (!addLiteral(literals, l)) {
return;
}
} else {
// all of the OR branches need to be bucket-leaves
return;
}
}
}
// invariant: bucket-col IN literals of type bucketField
BitSet bs = new BitSet(ctxt.getNumBuckets());
bs.clear();
PrimitiveObjectInspector bucketOI = (PrimitiveObjectInspector)bucketField.getFieldObjectInspector();
PrimitiveObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(bucketOI.getPrimitiveCategory());
for (Object literal: literals) {
PrimitiveObjectInspector origOI = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(literal.getClass());
Converter conv = ObjectInspectorConverters.getConverter(origOI, constOI);
// exact type conversion or get out
if (conv == null) {
return;
}
Object convCols[] = new Object[] {conv.convert(literal)};
int n = ObjectInspectorUtils.getBucketNumber(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets());
bs.set(n);
if (ctxt.isCompat()) {
int h = ObjectInspectorUtils.getBucketHashCode(convCols, new ObjectInspector[]{constOI});
// -ve hashcodes had conversion to positive done in different ways in the past
// abs() is now obsolete and all inserts now use & Integer.MAX_VALUE
// the compat mode assumes that old data could've been loaded using the other conversion
n = ObjectInspectorUtils.getBucketNumber(Math.abs(h), ctxt.getNumBuckets());
bs.set(n);
}
}
if (bs.cardinality() < ctxt.getNumBuckets()) {
// there is a valid bucket pruning filter
top.getConf().setIncludedBuckets(bs);
top.getConf().setNumBuckets(ctxt.getNumBuckets());
}
}
private boolean addLiteral(List<Object> literals, PredicateLeaf leaf) {
switch (leaf.getOperator()) {
case EQUALS:
return literals.add(leaf.getLiteral());
case IN:
return literals.addAll(leaf.getLiteralList());
default:
return false;
}
}
}
public final class FixedBucketPruningOptimizerCtxt implements
NodeProcessorCtx {
public final ParseContext pctx;
private final boolean compat;
private int numBuckets;
private PrunedPartitionList partitions;
private List<String> bucketCols;
private List<StructField> schema;
public FixedBucketPruningOptimizerCtxt(boolean compat, ParseContext pctx) {
this.compat = compat;
this.pctx = pctx;
}
public void setSchema(ArrayList<StructField> fields) {
this.schema = fields;
}
public List<StructField> getSchema() {
return this.schema;
}
public void setBucketCols(List<String> bucketCols) {
this.bucketCols = bucketCols;
}
public List<String> getBucketCols() {
return this.bucketCols;
}
public void setPartitions(PrunedPartitionList partitions) {
this.partitions = partitions;
}
public PrunedPartitionList getPartitions() {
return this.partitions;
}
public int getNumBuckets() {
return numBuckets;
}
public void setNumBuckets(int numBuckets) {
this.numBuckets = numBuckets;
}
// compatibility mode enabled
public boolean isCompat() {
return this.compat;
}
}
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// create a the context for walking operators
FixedBucketPruningOptimizerCtxt opPartWalkerCtx = new FixedBucketPruningOptimizerCtxt(compat,
pctx);
// Retrieve all partitions generated from partition pruner and partition
// column pruner
PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx,
new FixedBucketPartitionWalker(), new NoopWalker());
if (opPartWalkerCtx.getNumBuckets() < 0) {
// bail out
return pctx;
} else {
// walk operator tree to create expression tree for filter buckets
PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx,
new BucketBitsetGenerator(), new NoopWalker());
}
return pctx;
}
}