/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.parquet; import com.google.common.collect.Sets; import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.expression.visitors.AbstractExprVisitor; import org.apache.drill.exec.compile.sig.ConstantExpressionIdentifier; import org.apache.drill.exec.expr.ExpressionTreeMaterializer; import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry; import org.apache.drill.exec.expr.stat.ParquetFilterPredicate; import org.apache.drill.exec.expr.stat.RangeExprEvaluator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.ops.UdfUtilities; import org.apache.drill.exec.server.options.OptionManager; import org.apache.drill.exec.store.parquet.stat.ColumnStatCollector; import org.apache.drill.exec.store.parquet.stat.ColumnStatistics; import org.apache.drill.exec.store.parquet.stat.ParquetFooterStatCollector; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import java.util.HashMap; import java.util.Map; import java.util.Set; public class ParquetRGFilterEvaluator { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetRGFilterEvaluator.class); public static boolean evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex, OptionManager options, FragmentContext fragmentContext) { final HashMap<String, String> emptyMap = new HashMap<String, String>(); return evalFilter(expr, footer, rowGroupIndex, options, fragmentContext, emptyMap); } public static boolean evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex, OptionManager options, FragmentContext fragmentContext, Map<String, String> implicitColValues) { // figure out the set of columns referenced in expression. final Set<SchemaPath> schemaPathsInExpr = expr.accept(new FieldReferenceFinder(), null); final ColumnStatCollector columnStatCollector = new ParquetFooterStatCollector(footer, rowGroupIndex, implicitColValues,true, options); Map<SchemaPath, ColumnStatistics> columnStatisticsMap = columnStatCollector.collectColStat(schemaPathsInExpr); boolean canDrop = canDrop(expr, columnStatisticsMap, footer.getBlocks().get(rowGroupIndex).getRowCount(), fragmentContext, fragmentContext.getFunctionRegistry()); return canDrop; } public static boolean canDrop(ParquetFilterPredicate parquetPredicate, Map<SchemaPath, ColumnStatistics> columnStatisticsMap, long rowCount) { boolean canDrop = false; if (parquetPredicate != null) { RangeExprEvaluator rangeExprEvaluator = new RangeExprEvaluator(columnStatisticsMap, rowCount); canDrop = parquetPredicate.canDrop(rangeExprEvaluator); } return canDrop; } public static boolean canDrop(LogicalExpression expr, Map<SchemaPath, ColumnStatistics> columnStatisticsMap, long rowCount, UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry) { ErrorCollector errorCollector = new ErrorCollectorImpl(); LogicalExpression materializedFilter = ExpressionTreeMaterializer.materializeFilterExpr( expr, columnStatisticsMap, errorCollector, functionImplementationRegistry); if (errorCollector.hasErrors()) { logger.error("{} error(s) encountered when materialize filter expression : {}", errorCollector.getErrorCount(), errorCollector.toErrorString()); return false; } Set<LogicalExpression> constantBoundaries = ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter); ParquetFilterPredicate parquetPredicate = (ParquetFilterPredicate) ParquetFilterBuilder.buildParquetFilterPredicate( materializedFilter, constantBoundaries, udfUtilities); return canDrop(parquetPredicate, columnStatisticsMap, rowCount); } /** * Search through a LogicalExpression, finding all internal schema path references and returning them in a set. */ public static class FieldReferenceFinder extends AbstractExprVisitor<Set<SchemaPath>, Void, RuntimeException> { @Override public Set<SchemaPath> visitSchemaPath(SchemaPath path, Void value) { Set<SchemaPath> set = Sets.newHashSet(); set.add(path); return set; } @Override public Set<SchemaPath> visitUnknown(LogicalExpression e, Void value) { Set<SchemaPath> paths = Sets.newHashSet(); for (LogicalExpression ex : e) { paths.addAll(ex.accept(this, null)); } return paths; } } }