/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.parquet; import com.google.common.collect.ImmutableSet; import org.apache.drill.common.expression.BooleanOperator; import org.apache.drill.common.expression.FunctionHolderExpression; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.expression.PathSegment; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.expression.ValueExpressions; import org.apache.drill.common.expression.fn.CastFunctions; import org.apache.drill.common.expression.fn.FuncHolder; import org.apache.drill.common.expression.visitors.AbstractExprVisitor; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder; import org.apache.drill.exec.expr.fn.FunctionGenerationHelper; import org.apache.drill.exec.expr.fn.interpreter.InterpreterEvaluator; import org.apache.drill.exec.expr.holders.BigIntHolder; import org.apache.drill.exec.expr.holders.DateHolder; import org.apache.drill.exec.expr.holders.Float4Holder; import org.apache.drill.exec.expr.holders.Float8Holder; import org.apache.drill.exec.expr.holders.IntHolder; import org.apache.drill.exec.expr.holders.TimeHolder; import org.apache.drill.exec.expr.holders.TimeStampHolder; import org.apache.drill.exec.expr.holders.ValueHolder; import org.apache.drill.exec.expr.stat.ParquetPredicates; import org.apache.drill.exec.expr.stat.TypedFieldExpr; import org.apache.drill.exec.ops.UdfUtilities; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Set; /** * A visitor which visits a materialized logical expression, and build ParquetFilterPredicate * If a visitXXX method returns null, that means the corresponding filter branch is not qualified for pushdown. */ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression, Set<LogicalExpression>, RuntimeException> { static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); private final UdfUtilities udfUtilities; /** * @param expr materialized filter expression * @param constantBoundaries set of constant expressions * @param udfUtilities */ public static LogicalExpression buildParquetFilterPredicate(LogicalExpression expr, final Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) { final LogicalExpression predicate = expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries); return predicate; } private ParquetFilterBuilder(UdfUtilities udfUtilities) { this.udfUtilities = udfUtilities; } @Override public LogicalExpression visitUnknown(LogicalExpression e, Set<LogicalExpression> value) { if (e instanceof TypedFieldExpr && ! containsArraySeg(((TypedFieldExpr) e).getPath()) && e.getMajorType().getMode() != TypeProtos.DataMode.REPEATED) { // A filter is not qualified for push down, if // 1. it contains an array segment : a.b[1], a.b[1].c.d // 2. it's repeated type. return e; } return null; } @Override public LogicalExpression visitIntConstant(ValueExpressions.IntExpression intExpr, Set<LogicalExpression> value) throws RuntimeException { return intExpr; } @Override public LogicalExpression visitDoubleConstant(ValueExpressions.DoubleExpression dExpr, Set<LogicalExpression> value) throws RuntimeException { return dExpr; } @Override public LogicalExpression visitFloatConstant(ValueExpressions.FloatExpression fExpr, Set<LogicalExpression> value) throws RuntimeException { return fExpr; } @Override public LogicalExpression visitLongConstant(ValueExpressions.LongExpression intExpr, Set<LogicalExpression> value) throws RuntimeException { return intExpr; } @Override public LogicalExpression visitDateConstant(ValueExpressions.DateExpression dateExpr, Set<LogicalExpression> value) throws RuntimeException { return dateExpr; } @Override public LogicalExpression visitTimeStampConstant(ValueExpressions.TimeStampExpression tsExpr, Set<LogicalExpression> value) throws RuntimeException { return tsExpr; } @Override public LogicalExpression visitTimeConstant(ValueExpressions.TimeExpression timeExpr, Set<LogicalExpression> value) throws RuntimeException { return timeExpr; } @Override public LogicalExpression visitBooleanOperator(BooleanOperator op, Set<LogicalExpression> value) { List<LogicalExpression> childPredicates = new ArrayList<>(); String functionName = op.getName(); for (LogicalExpression arg : op.args) { LogicalExpression childPredicate = arg.accept(this, value); if (childPredicate == null) { if (functionName.equals("booleanOr")) { // we can't include any leg of the OR if any of the predicates cannot be converted return null; } } else { childPredicates.add(childPredicate); } } if (childPredicates.size() == 0) { return null; // none leg is qualified, return null. } else if (childPredicates.size() == 1) { return childPredicates.get(0); // only one leg is qualified, remove boolean op. } else { if (functionName.equals("booleanOr")) { return new ParquetPredicates.OrPredicate(op.getName(), childPredicates, op.getPosition()); } else { return new ParquetPredicates.AndPredicate(op.getName(), childPredicates, op.getPosition()); } } } private boolean containsArraySeg(final SchemaPath schemaPath) { PathSegment seg = schemaPath.getRootSegment(); while (seg != null) { if (seg.isArray()) { return true; } seg = seg.getChild(); } return false; } private LogicalExpression getValueExpressionFromConst(ValueHolder holder, TypeProtos.MinorType type) { switch (type) { case INT: return ValueExpressions.getInt(((IntHolder) holder).value); case BIGINT: return ValueExpressions.getBigInt(((BigIntHolder) holder).value); case FLOAT4: return ValueExpressions.getFloat4(((Float4Holder) holder).value); case FLOAT8: return ValueExpressions.getFloat8(((Float8Holder) holder).value); case DATE: return ValueExpressions.getDate(((DateHolder) holder).value); case TIMESTAMP: return ValueExpressions.getTimeStamp(((TimeStampHolder) holder).value); case TIME: return ValueExpressions.getTime(((TimeHolder) holder).value); default: return null; } } @Override public LogicalExpression visitFunctionHolderExpression(FunctionHolderExpression funcHolderExpr, Set<LogicalExpression> value) throws RuntimeException { FuncHolder holder = funcHolderExpr.getHolder(); if (! (holder instanceof DrillSimpleFuncHolder)) { return null; } if (value.contains(funcHolderExpr)) { ValueHolder result ; try { result = InterpreterEvaluator.evaluateConstantExpr(udfUtilities, funcHolderExpr); } catch (Exception e) { logger.warn("Error in evaluating function of {}", funcHolderExpr.getName()); return null; } logger.debug("Reduce a constant function expression into a value expression"); return getValueExpressionFromConst(result, funcHolderExpr.getMajorType().getMinorType()); } final String funcName = ((DrillSimpleFuncHolder) holder).getRegisteredNames()[0]; if (isCompareFunction(funcName)) { return handleCompareFunction(funcHolderExpr, value); } if (CastFunctions.isCastFunction(funcName)) { List<LogicalExpression> newArgs = new ArrayList(); for (LogicalExpression arg : funcHolderExpr.args) { final LogicalExpression newArg = arg.accept(this, value); if (newArg == null) { return null; } newArgs.add(newArg); } return funcHolderExpr.copy(newArgs); } else { return null; } } private LogicalExpression handleCompareFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) { List<LogicalExpression> newArgs = new ArrayList(); for (LogicalExpression arg : functionHolderExpression.args) { LogicalExpression newArg = arg.accept(this, value); if (newArg == null) { return null; } newArgs.add(newArg); } String funcName = ((DrillSimpleFuncHolder) functionHolderExpression.getHolder()).getRegisteredNames()[0]; switch (funcName) { case FunctionGenerationHelper.EQ : return new ParquetPredicates.EqualPredicate(newArgs.get(0), newArgs.get(1)); case FunctionGenerationHelper.GT : return new ParquetPredicates.GTPredicate(newArgs.get(0), newArgs.get(1)); case FunctionGenerationHelper.GE : return new ParquetPredicates.GEPredicate(newArgs.get(0), newArgs.get(1)); case FunctionGenerationHelper.LT : return new ParquetPredicates.LTPredicate(newArgs.get(0), newArgs.get(1)); case FunctionGenerationHelper.LE : return new ParquetPredicates.LEPredicate(newArgs.get(0), newArgs.get(1)); case FunctionGenerationHelper.NE : return new ParquetPredicates.NEPredicate(newArgs.get(0), newArgs.get(1)); default: return null; } } private LogicalExpression handleCastFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) { for (LogicalExpression arg : functionHolderExpression.args) { LogicalExpression newArg = arg.accept(this, value); if (newArg == null) { return null; } } String funcName = ((DrillSimpleFuncHolder) functionHolderExpression.getHolder()).getRegisteredNames()[0]; return null; } private static boolean isCompareFunction(String funcName) { return COMPARE_FUNCTIONS_SET.contains(funcName); } private static final ImmutableSet<String> COMPARE_FUNCTIONS_SET; static { ImmutableSet.Builder<String> builder = ImmutableSet.builder(); COMPARE_FUNCTIONS_SET = builder .add(FunctionGenerationHelper.EQ) .add(FunctionGenerationHelper.GT) .add(FunctionGenerationHelper.GE) .add(FunctionGenerationHelper.LT) .add(FunctionGenerationHelper.LE) .add(FunctionGenerationHelper.NE) .build(); } }