/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.expr.stat;
import org.apache.drill.common.expression.BooleanOperator;
import org.apache.drill.common.expression.ExpressionPosition;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.LogicalExpressionBase;
import org.apache.drill.common.expression.visitors.ExprVisitor;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public abstract class ParquetPredicates {
public static abstract class ParquetCompPredicate extends LogicalExpressionBase implements ParquetFilterPredicate {
protected final LogicalExpression left;
protected final LogicalExpression right;
public ParquetCompPredicate(LogicalExpression left, LogicalExpression right) {
super(left.getPosition());
this.left = left;
this.right = right;
}
@Override
public Iterator<LogicalExpression> iterator() {
final List<LogicalExpression> args = new ArrayList<>();
args.add(left);
args.add(right);
return args.iterator();
}
@Override
public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
return visitor.visitUnknown(this, value);
}
}
public static abstract class ParquetBooleanPredicate extends BooleanOperator implements ParquetFilterPredicate {
public ParquetBooleanPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
super(name, args, pos);
}
@Override
public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
return visitor.visitBooleanOperator(this, value);
}
}
public static class AndPredicate extends ParquetBooleanPredicate {
public AndPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
super(name, args, pos);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
// "and" : as long as one branch is OK to drop, we can drop it.
for (LogicalExpression child : this) {
if (((ParquetFilterPredicate) child).canDrop(evaluator)) {
return true;
}
}
return false;
}
}
public static class OrPredicate extends ParquetBooleanPredicate {
public OrPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
super(name, args, pos);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
for (LogicalExpression child : this) {
// "long" : as long as one branch is NOT ok to drop, we can NOT drop it.
if (! ((ParquetFilterPredicate) child).canDrop(evaluator)) {
return false;
}
}
return true;
}
}
// is this column chunk composed entirely of nulls?
// assumes the column chunk's statistics is not empty
protected static boolean isAllNulls(Statistics stat, long rowCount) {
return stat.getNumNulls() == rowCount;
}
// are there any nulls in this column chunk?
// assumes the column chunk's statistics is not empty
protected static boolean hasNulls(Statistics stat) {
return stat.getNumNulls() > 0;
}
/**
* EQ (=) predicate
*/
public static class EqualPredicate extends ParquetCompPredicate {
public EqualPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
/**
Semantics of canDrop() is very similar to what is implemented in Parquet library's
{@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
{@link org.apache.parquet.filter2.predicate.FilterPredicate}
Main difference :
1. A RangeExprEvaluator is used to compute the min/max of an expression, such as CAST function
of a column. CAST function could be explicitly added by Drill user (It's recommended to use CAST
function after DRILL-4372, if user wants to reduce planning time for limit 0 query), or implicitly
inserted by Drill, when the types of compare operands are not identical. Therefore, it's important
to allow CAST function to appear in the filter predicate.
2. We do not require list of ColumnChunkMetaData to do the evaluation, while Parquet library's
StatisticsFilter has such requirement. Drill's ParquetTableMetaData does not maintain ColumnChunkMetaData,
making it impossible to directly use Parquet library's StatisticFilter in query planning time.
3. We allows both sides of comparison operator to be a min/max range. As such, we support
expression_of(Column1) < expression_of(Column2),
where Column1 and Column2 are from same parquet table.
*/
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, = is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when left's max < right's min, or right's max < left's min
if ( ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0
|| rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0)) {
return true;
} else {
return false;
}
}
@Override
public String toString() {
return left.toString() + " = " + right.toString();
}
}
/**
* GT (>) predicate.
*/
public static class GTPredicate extends ParquetCompPredicate {
public GTPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, = is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when left's max <= right's min.
if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) <= 0 ) {
return true;
} else {
return false;
}
}
}
/**
* GE (>=) predicate.
*/
public static class GEPredicate extends ParquetCompPredicate {
public GEPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, = is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when left's max < right's min.
if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0 ) {
return true;
} else {
return false;
}
}
}
/**
* LT (<) predicate.
*/
public static class LTPredicate extends ParquetCompPredicate {
public LTPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, = is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when right's max <= left's min.
if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) <= 0 ) {
return true;
} else {
return false;
}
}
}
/**
* LE (<=) predicate.
*/
public static class LEPredicate extends ParquetCompPredicate {
public LEPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, = is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when right's max < left's min.
if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0 ) {
return true;
} else {
return false;
}
}
}
/**
* NE (!=) predicate.
*/
public static class NEPredicate extends ParquetCompPredicate {
public NEPredicate(LogicalExpression left, LogicalExpression right) {
super(left, right);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
Statistics leftStat = left.accept(evaluator, null);
Statistics rightStat = right.accept(evaluator, null);
if (leftStat == null ||
rightStat == null ||
leftStat.isEmpty() ||
rightStat.isEmpty()) {
return false;
}
// if either side is ALL null, comparison is evaluated to UNKNOW -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) ||
isAllNulls(rightStat, evaluator.getRowCount())) {
return true;
}
// can drop when there is only one unique value.
if ( leftStat.genericGetMin().compareTo(leftStat.genericGetMax()) == 0 &&
rightStat.genericGetMin().compareTo(rightStat.genericGetMax()) ==0 &&
leftStat.genericGetMax().compareTo(rightStat.genericGetMax()) == 0) {
return true;
} else {
return false;
}
}
}
}