/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.accumulo.predicate; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.data.Range; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.Like; import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.NotEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; import org.apache.hadoop.hive.ql.index.IndexSearchCondition; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * * Supporting operations dealing with Hive Predicate pushdown to iterators and ranges. * * See {@link PrimitiveComparisonFilter} * */ public class AccumuloPredicateHandler { private static final List<Range> TOTAL_RANGE = Collections.singletonList(new Range()); private static AccumuloPredicateHandler handler = new AccumuloPredicateHandler(); private static Map<String, Class<? extends CompareOp>> compareOps = Maps.newHashMap(); private static Map<String, Class<? extends PrimitiveComparison>> pComparisons = Maps.newHashMap(); // Want to start sufficiently "high" enough in the iterator stack private static int iteratorCount = 50; private static final Logger LOG = LoggerFactory.getLogger(AccumuloPredicateHandler.class); static { compareOps.put(GenericUDFOPEqual.class.getName(), Equal.class); compareOps.put(GenericUDFOPNotEqual.class.getName(), NotEqual.class); compareOps.put(GenericUDFOPGreaterThan.class.getName(), GreaterThan.class); compareOps.put(GenericUDFOPEqualOrGreaterThan.class.getName(), GreaterThanOrEqual.class); compareOps.put(GenericUDFOPEqualOrLessThan.class.getName(), LessThanOrEqual.class); compareOps.put(GenericUDFOPLessThan.class.getName(), LessThan.class); compareOps.put(UDFLike.class.getName(), Like.class); pComparisons.put("bigint", LongCompare.class); pComparisons.put("int", IntCompare.class); pComparisons.put("double", DoubleCompare.class); pComparisons.put("string", StringCompare.class); } public static AccumuloPredicateHandler getInstance() { return handler; } /** * * @return set of all UDF class names with matching CompareOpt implementations. */ public Set<String> cOpKeyset() { return compareOps.keySet(); } /** * * @return set of all hive data types with matching PrimitiveCompare implementations. */ public Set<String> pComparisonKeyset() { return pComparisons.keySet(); } /** * * @param udfType * GenericUDF classname to lookup matching CompareOpt * @return Class<? extends CompareOpt/> */ public Class<? extends CompareOp> getCompareOpClass(String udfType) throws NoSuchCompareOpException { if (!compareOps.containsKey(udfType)) { throw new NoSuchCompareOpException("Null compare op for specified key: " + udfType); } return compareOps.get(udfType); } public CompareOp getCompareOp(String udfType, IndexSearchCondition sc) throws NoSuchCompareOpException, SerDeException { Class<? extends CompareOp> clz = getCompareOpClass(udfType); try { return clz.newInstance(); } catch (ClassCastException e) { throw new SerDeException("Column type mismatch in WHERE clause " + sc.getIndexExpr().getExprString() + " found type " + sc.getConstantDesc().getTypeString() + " instead of " + sc.getColumnDesc().getTypeString()); } catch (IllegalAccessException e) { throw new SerDeException("Could not instantiate class for WHERE clause", e); } catch (InstantiationException e) { throw new SerDeException("Could not instantiate class for WHERE clause", e); } } /** * * @param type * String hive column lookup matching PrimitiveCompare * @return Class<? extends ></?> */ public Class<? extends PrimitiveComparison> getPrimitiveComparisonClass(String type) throws NoSuchPrimitiveComparisonException { if (!pComparisons.containsKey(type)) { throw new NoSuchPrimitiveComparisonException("Null primitive comparison for specified key: " + type); } return pComparisons.get(type); } public PrimitiveComparison getPrimitiveComparison(String type, IndexSearchCondition sc) throws NoSuchPrimitiveComparisonException, SerDeException { Class<? extends PrimitiveComparison> clz = getPrimitiveComparisonClass(type); try { return clz.newInstance(); } catch (ClassCastException e) { throw new SerDeException("Column type mismatch in WHERE clause " + sc.getIndexExpr().getExprString() + " found type " + sc.getConstantDesc().getTypeString() + " instead of " + sc.getColumnDesc().getTypeString()); } catch (IllegalAccessException e) { throw new SerDeException("Could not instantiate class for WHERE clause", e); } catch (InstantiationException e) { throw new SerDeException("Could not instantiate class for WHERE clause", e); } } private AccumuloPredicateHandler() {} /** * Loop through search conditions and build ranges for predicates involving rowID column, if any. */ public List<Range> getRanges(Configuration conf, ColumnMapper columnMapper) throws SerDeException { if (!columnMapper.hasRowIdMapping()) { return TOTAL_RANGE; } int rowIdOffset = columnMapper.getRowIdOffset(); String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS); if (null == hiveColumnNamesArr) { throw new IllegalArgumentException("Could not find Hive columns in configuration"); } // Already verified that we should have the rowId mapping String hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset]; ExprNodeDesc root = this.getExpression(conf); // No expression, therefore scan the whole table if (null == root) { return TOTAL_RANGE; } Object result = generateRanges(conf, columnMapper, hiveRowIdColumnName, root); if (null == result) { LOG.info("Calculated null set of ranges, scanning full table"); return TOTAL_RANGE; } else if (result instanceof Range) { LOG.info("Computed a single Range for the query: " + result); return Collections.singletonList((Range) result); } else if (result instanceof List) { LOG.info("Computed a collection of Ranges for the query: " + result); @SuppressWarnings("unchecked") List<Range> ranges = (List<Range>) result; return ranges; } else { throw new IllegalArgumentException("Unhandled return from Range generation: " + result); } } /** * Encapsulates the traversal over some {@link ExprNodeDesc} tree for the generation of Accumuluo. * Ranges using expressions involving the Accumulo rowid-mapped Hive column. * * @param conf * Hadoop configuration * @param columnMapper * Mapping of Hive to Accumulo columns for the query * @param hiveRowIdColumnName * Name of the hive column mapped to the Accumulo rowid * @param root * Root of some ExprNodeDesc tree to traverse, the WHERE clause * @return An object representing the result from the ExprNodeDesc tree traversal using the * AccumuloRangeGenerator */ protected Object generateRanges(Configuration conf, ColumnMapper columnMapper, String hiveRowIdColumnName, ExprNodeDesc root) { AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, columnMapper.getRowIdMapping(), hiveRowIdColumnName); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); List<Node> roots = new ArrayList<Node>(); roots.add(root); HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>(); try { ogw.startWalking(roots, nodeOutput); } catch (SemanticException ex) { throw new RuntimeException(ex); } return nodeOutput.get(root); } /** * Loop through search conditions and build iterator settings for predicates involving columns * other than rowID, if any. * * @param conf * Configuration * @throws SerDeException */ public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException { List<IteratorSetting> itrs = Lists.newArrayList(); boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT); if (!shouldPushdown) { LOG.info("Iterator pushdown is disabled for this table"); return itrs; } boolean binaryEncodedRow = ColumnEncoding.BINARY.getName(). equalsIgnoreCase(conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)); int rowIdOffset = columnMapper.getRowIdOffset(); String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS); if (null == hiveColumnNamesArr) { throw new IllegalArgumentException("Could not find Hive columns in configuration"); } String hiveRowIdColumnName = null; if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) { hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset]; } List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr); for (IndexSearchCondition sc : getSearchConditions(conf)) { String col = sc.getColumnDesc().getColumn(); if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) { HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper .getColumnMappingForHiveColumn(hiveColumnNames, col); itrs.add(toSetting(mapping, sc, binaryEncodedRow)); } } if (LOG.isInfoEnabled()) { LOG.info("num iterators = " + itrs.size()); } return itrs; } /** * Create an IteratorSetting for the right qualifier, constant, CompareOpt, and PrimitiveCompare * type. * * @param accumuloColumnMapping * ColumnMapping to filter * @param sc * IndexSearchCondition * @param binaryEncodedValues * flag for binary encodedValues * @return IteratorSetting * @throws SerDeException */ public IteratorSetting toSetting(HiveAccumuloColumnMapping accumuloColumnMapping, IndexSearchCondition sc, boolean binaryEncodedValues) throws SerDeException { iteratorCount++; final IteratorSetting is = new IteratorSetting(iteratorCount, PrimitiveComparisonFilter.FILTER_PREFIX + iteratorCount, PrimitiveComparisonFilter.class); final String type = binaryEncodedValues ? sc.getColumnDesc().getTypeString() : ColumnEncoding.STRING.getName(); final String comparisonOpStr = sc.getComparisonOp(); PushdownTuple tuple; try { tuple = new PushdownTuple(sc, getPrimitiveComparison(type, sc), getCompareOp(comparisonOpStr, sc)); } catch (NoSuchPrimitiveComparisonException e) { throw new SerDeException("No configured PrimitiveComparison class for " + type, e); } catch (NoSuchCompareOpException e) { throw new SerDeException("No configured CompareOp class for " + comparisonOpStr, e); } is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, tuple.getpCompare().getClass() .getName()); is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, tuple.getcOpt().getClass().getName()); is.addOption(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(tuple.getConstVal()))); is.addOption(PrimitiveComparisonFilter.COLUMN, accumuloColumnMapping.serialize()); return is; } public ExprNodeDesc getExpression(Configuration conf) { String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filteredExprSerialized == null) { return null; } return SerializationUtilities.deserializeExpression(filteredExprSerialized); } /** * * @param conf * Configuration * @return list of IndexSearchConditions from the filter expression. */ public List<IndexSearchCondition> getSearchConditions(Configuration conf) { final List<IndexSearchCondition> sConditions = Lists.newArrayList(); ExprNodeDesc filterExpr = getExpression(conf); if (null == filterExpr) { return sConditions; } IndexPredicateAnalyzer analyzer = newAnalyzer(conf); ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions); if (residual != null) { throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString()); } return sConditions; } /** * * @param conf * Configuration * @param desc * predicate expression node. * @return DecomposedPredicate containing translated search conditions the analyzer can support. */ public DecomposedPredicate decompose(Configuration conf, ExprNodeDesc desc) { IndexPredicateAnalyzer analyzer = newAnalyzer(conf); List<IndexSearchCondition> sConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(desc, sConditions); if (sConditions.size() == 0) { LOG.info("nothing to decompose. Returning"); return null; } DecomposedPredicate decomposedPredicate = new DecomposedPredicate(); decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(sConditions); decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate; return decomposedPredicate; } /** * Build an analyzer that allows comparison opts from compareOpts map, and all columns from table * definition. */ private IndexPredicateAnalyzer newAnalyzer(Configuration conf) { IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); analyzer.clearAllowedColumnNames(); for (String op : cOpKeyset()) { analyzer.addComparisonOp(op); } String[] hiveColumnNames = conf.getStrings(serdeConstants.LIST_COLUMNS); for (String col : hiveColumnNames) { analyzer.allowColumnName(col); } return analyzer; } }