/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.parquet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.parquet.filter2.predicate.FilterApi; import org.apache.parquet.filter2.predicate.FilterPredicate; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; import static org.apache.parquet.filter2.predicate.FilterApi.eq; import static org.apache.parquet.filter2.predicate.FilterApi.lt; import static org.apache.parquet.filter2.predicate.FilterApi.ltEq; import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn; import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn; import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn; import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn; import static org.apache.parquet.filter2.predicate.FilterApi.intColumn; public class LeafFilterFactory { private static final Logger LOG = LoggerFactory.getLogger(LeafFilterFactory.class); class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { /** * @param op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL * @param literal * @param columnName * @return */ @Override public FilterPredicate buildPredict(Operator op, Object literal, String columnName) { switch (op) { case LESS_THAN: return lt(intColumn(columnName), ((Number) literal).intValue()); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(intColumn(columnName), (literal == null) ? null : ((Number) literal).intValue()); case LESS_THAN_EQUALS: return ltEq(intColumn(columnName), ((Number) literal).intValue()); default: throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { @Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) { switch (op) { case LESS_THAN: return lt(FilterApi.longColumn(columnName), ((Number) constant).longValue()); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(FilterApi.longColumn(columnName), (constant == null) ? null : ((Number) constant).longValue()); case LESS_THAN_EQUALS: return ltEq(FilterApi.longColumn(columnName), ((Number) constant).longValue()); default: throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } class FloatFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { @Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) { switch (op) { case LESS_THAN: return lt(floatColumn(columnName), ((Number) constant).floatValue()); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(floatColumn(columnName), (constant == null) ? null : ((Number) constant).floatValue()); case LESS_THAN_EQUALS: return ltEq(FilterApi.floatColumn(columnName), ((Number) constant).floatValue()); default: throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { @Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) { switch (op) { case LESS_THAN: return lt(doubleColumn(columnName), ((Number) constant).doubleValue()); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(doubleColumn(columnName), (constant == null) ? null : ((Number) constant).doubleValue()); case LESS_THAN_EQUALS: return ltEq(FilterApi.doubleColumn(columnName), ((Number) constant).doubleValue()); default: throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { @Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) throws Exception{ switch (op) { case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(booleanColumn(columnName), (constant == null) ? null : ((Boolean) constant).booleanValue()); default: throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { @Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) throws Exception{ switch (op) { case LESS_THAN: return lt(binaryColumn(columnName), Binary.fromString((String) constant)); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(binaryColumn(columnName), (constant == null) ? null : Binary.fromString((String) constant)); case LESS_THAN_EQUALS: return ltEq(binaryColumn(columnName), Binary.fromString((String) constant)); default: // should never be executed throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } } /** * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not * supported yet. * @param type FilterPredicateType * @return * @throws HiveException Exception is thrown for unsupported data types so we can skip filtering */ public FilterPredicateLeafBuilder getLeafFilterBuilderByType( PredicateLeaf.Type type, Type parquetType) throws HiveException { switch (type){ case LONG: if (parquetType.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32) { return new IntFilterPredicateLeafBuilder(); } else { return new LongFilterPredicateLeafBuilder(); } case FLOAT: if (parquetType.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.FLOAT) { return new FloatFilterPredicateLeafBuilder(); } else { return new DoubleFilterPredicateLeafBuilder(); } case STRING: // string, char, varchar return new BinaryFilterPredicateLeafBuilder(); case BOOLEAN: return new BooleanFilterPredicateLeafBuilder(); case DATE: case DECIMAL: case TIMESTAMP: default: String msg = "Conversion to Parquet FilterPredicate not supported for " + type; LOG.debug(msg); throw new HiveException(msg); } } }