/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import java.math.BigDecimal;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.metadata.RelColumnOrigin;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
/**
* This rule simplifies the condition in Filter operators using the
* column statistics (if available).
*
* For instance, given the following predicate:
* a > 5
* we can infer that the predicate will evaluate to false if the max
* value for column a is 4.
*
* Currently we support the simplification of =, >=, <=, >, <, and
* IN operations.
*/
public class HiveReduceExpressionsWithStatsRule extends RelOptRule {
protected static final Logger LOG = LoggerFactory.getLogger(
HiveReduceExpressionsWithStatsRule.class);
public static final HiveReduceExpressionsWithStatsRule INSTANCE =
new HiveReduceExpressionsWithStatsRule();
private static final Set<SqlKind> COMPARISON = EnumSet.of(SqlKind.EQUALS,
SqlKind.GREATER_THAN_OR_EQUAL,
SqlKind.LESS_THAN_OR_EQUAL,
SqlKind.GREATER_THAN,
SqlKind.LESS_THAN);
private HiveReduceExpressionsWithStatsRule() {
super(operand(Filter.class, operand(RelNode.class, any())));
}
@Override
public void onMatch(RelOptRuleCall call) {
final Filter filter = call.rel(0);
final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
final RelMetadataQuery metadataProvider = RelMetadataQuery.instance();
// 1. Recompose filter possibly by pulling out common elements from DNF
// expressions
RexNode newFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
// 2. Reduce filter with stats information
RexReplacer replacer = new RexReplacer(filter, rexBuilder, metadataProvider);
newFilterCondition = replacer.apply(newFilterCondition);
// 3. Transform if we have created a new filter operator
if (!filter.getCondition().toString().equals(newFilterCondition.toString())) {
Filter newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newFilterCondition);
call.transformTo(newFilter);
}
}
/**
* Replaces expressions with their reductions. Note that we only have to
* look for RexCall, since nothing else is reducible in the first place.
*/
protected static class RexReplacer extends RexShuttle {
private final Filter filterOp;
private final RexBuilder rexBuilder;
private final RelMetadataQuery metadataProvider;
RexReplacer(Filter filterOp, RexBuilder rexBuilder, RelMetadataQuery metadataProvider) {
this.filterOp = filterOp;
this.rexBuilder = rexBuilder;
this.metadataProvider = metadataProvider;
}
@Override
public RexNode visitCall(RexCall call) {
if (COMPARISON.contains(call.getOperator().getKind())) {
RexInputRef ref = null;
RexLiteral literal = null;
SqlKind kind = null;
if (call.operands.get(0) instanceof RexInputRef
&& call.operands.get(1) instanceof RexLiteral) {
ref = (RexInputRef) call.operands.get(0);
literal = (RexLiteral) call.operands.get(1);
kind = call.getOperator().getKind();
} else if (call.operands.get(1) instanceof RexInputRef
&& call.operands.get(0) instanceof RexLiteral) {
ref = (RexInputRef) call.operands.get(1);
literal = (RexLiteral) call.operands.get(0);
kind = call.getOperator().getKind().reverse();
}
// Found an expression that we can try to reduce
Number max = null;
Number min = null;
if (ref != null && literal != null && kind != null) {
Pair<Number,Number> maxMin = extractMaxMin(ref);
max = maxMin.left;
min = maxMin.right;
}
if (max != null && min != null) {
// Stats were available, try to reduce
RexNode reduced = reduceCall(literal, kind, max, min);
if (reduced != null) {
return reduced;
}
}
// We cannot apply the reduction
return call;
} else if (call.getOperator().getKind() == SqlKind.IN) {
if (call.getOperands().get(0) instanceof RexInputRef) {
// Ref
RexInputRef ref = (RexInputRef) call.getOperands().get(0);
// Found an expression that we can try to reduce
Number max = null;
Number min = null;
if (ref != null) {
Pair<Number,Number> maxMin = extractMaxMin(ref);
max = maxMin.left;
min = maxMin.right;
}
if (max != null && min != null) {
// Stats were available, try to reduce
List<RexNode> newOperands = Lists.newArrayList();
newOperands.add(ref);
for (int i = 1; i < call.getOperands().size(); i++) {
RexNode operand = call.getOperands().get(i);
if (operand instanceof RexLiteral) {
RexLiteral literal = (RexLiteral) operand;
RexNode reduced = reduceCall(literal, SqlKind.EQUALS, max, min);
if (reduced != null) {
if (reduced.isAlwaysTrue()) {
return rexBuilder.makeLiteral(true);
}
} else {
newOperands.add(literal);
}
} else {
newOperands.add(operand);
}
}
if (newOperands.size() == 1) {
return rexBuilder.makeLiteral(false);
}
return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands);
}
} else if (call.getOperands().get(0).getKind() == SqlKind.ROW) {
// Struct
RexCall struct = (RexCall) call.getOperands().get(0);
List<RexInputRef> refs = Lists.newArrayList();
List<Pair<Number,Number>> maxMinStats = Lists.newArrayList();
for (RexNode operand: struct.getOperands()) {
if (!(operand instanceof RexInputRef)) {
// Cannot simplify, we bail out
return call;
}
RexInputRef ref = (RexInputRef) operand;
refs.add(ref);
maxMinStats.add(extractMaxMin(ref));
}
// Try to reduce
List<RexNode> newOperands = Lists.newArrayList();
newOperands.add(struct);
for (int i = 1; i < call.getOperands().size(); i++) {
RexCall constStruct = (RexCall) call.getOperands().get(i);
boolean allTrue = true;
boolean addOperand = true;
for (int j = 0; i < constStruct.getOperands().size(); j++) {
RexNode operand = constStruct.getOperands().get(j);
if (operand instanceof RexLiteral) {
RexLiteral literal = (RexLiteral) operand;
RexNode reduced = reduceCall(literal, SqlKind.EQUALS,
maxMinStats.get(j).left, maxMinStats.get(j).right);
if (reduced != null) {
if (reduced.isAlwaysFalse()) {
allTrue = false;
addOperand = false;
break;
}
} else {
allTrue = false;
}
} else {
allTrue = false;
}
}
if (allTrue) {
return rexBuilder.makeLiteral(true);
}
if (addOperand) {
newOperands.add(constStruct);
}
}
if (newOperands.size() == 1) {
return rexBuilder.makeLiteral(false);
}
return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands);
}
// We cannot apply the reduction
return call;
}
// If we did not reduce, check the children nodes
RexNode node = super.visitCall(call);
if (node != call) {
node = RexUtil.simplify(rexBuilder, node);
}
return node;
}
private Pair<Number,Number> extractMaxMin(RexInputRef ref) {
Number max = null;
Number min = null;
RelColumnOrigin columnOrigin = this.metadataProvider.getColumnOrigin(filterOp, ref.getIndex());
if (columnOrigin != null) {
RelOptHiveTable table = (RelOptHiveTable) columnOrigin.getOriginTable();
if (table != null) {
ColStatistics colStats =
table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal())).get(0);
if (colStats != null && StatsSetupConst.areColumnStatsUptoDate(
table.getHiveTableMD().getParameters(), colStats.getColumnName())) {
Range range = colStats.getRange();
if (range != null) {
max = range.maxValue;
min = range.minValue;
}
}
}
}
return Pair.<Number,Number>of(max, min);
}
@SuppressWarnings("unchecked")
private RexNode reduceCall(RexLiteral literal, SqlKind kind, Number max, Number min) {
// Stats were available, try to reduce
if (max != null && min != null) {
BigDecimal maxVal = new BigDecimal(max.floatValue());
BigDecimal minVal = new BigDecimal(min.floatValue());
RexLiteral maxLiteral = rexBuilder.makeExactLiteral(maxVal, literal.getType());
RexLiteral minLiteral = rexBuilder.makeExactLiteral(minVal, literal.getType());
// Equals
if (kind == SqlKind.EQUALS) {
if (minLiteral.getValue().compareTo(literal.getValue()) > 0 ||
maxLiteral.getValue().compareTo(literal.getValue()) < 0) {
return rexBuilder.makeLiteral(false);
}
}
// Greater than (or equal), and less than (or equal)
if (kind == SqlKind.GREATER_THAN) {
if (minLiteral.getValue().compareTo(literal.getValue()) > 0) {
return rexBuilder.makeLiteral(true);
} else if (maxLiteral.getValue().compareTo(literal.getValue()) <= 0) {
return rexBuilder.makeLiteral(false);
}
} else if (kind == SqlKind.GREATER_THAN_OR_EQUAL) {
if (minLiteral.getValue().compareTo(literal.getValue()) >= 0) {
return rexBuilder.makeLiteral(true);
} else if (maxLiteral.getValue().compareTo(literal.getValue()) < 0) {
return rexBuilder.makeLiteral(false);
}
} else if (kind == SqlKind.LESS_THAN) {
if (minLiteral.getValue().compareTo(literal.getValue()) >= 0) {
return rexBuilder.makeLiteral(false);
} else if (maxLiteral.getValue().compareTo(literal.getValue()) < 0) {
return rexBuilder.makeLiteral(true);
}
} else if (kind == SqlKind.LESS_THAN_OR_EQUAL) {
if (minLiteral.getValue().compareTo(literal.getValue()) > 0) {
return rexBuilder.makeLiteral(false);
} else if (maxLiteral.getValue().compareTo(literal.getValue()) <= 0) {
return rexBuilder.makeLiteral(true);
}
}
}
return null;
}
}
}