/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hive.metastore.hbase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.hbase.PartitionKeyComparator.Operator;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
/**
* Utility function for generating hbase partition filtering plan representation
* from ExpressionTree.
* Optimizations to be done -
* - Case where all partition keys are specified. Should use a get
*
* {@link PartitionFilterGenerator} is a visitor on the given filter expression tree. After
* walking it it produces the HBase execution plan represented by {@link FilterPlan}. See
* their javadocs for more details.
*/
class HBaseFilterPlanUtil {
/**
* Compare two byte arrays.
*
* @param ar1
* first byte array
* @param ar2
* second byte array
* @return -1 if ar1 < ar2, 0 if == , 1 if >
*/
static int compare(byte[] ar1, byte[] ar2) {
// null check is not needed, nulls are not passed here
for (int i = 0; i < ar1.length; i++) {
if (i == ar2.length) {
return 1;
} else {
if (ar1[i] == ar2[i]) {
continue;
} else if (ar1[i] > ar2[i]) {
return 1;
} else {
return -1;
}
}
}
// ar2 equal until length of ar1.
if(ar1.length == ar2.length) {
return 0;
}
// ar2 has more bytes
return -1;
}
/**
* Represents the execution plan for hbase to find the set of partitions that
* match given filter expression.
* If you have an AND or OR of two expressions, you can determine FilterPlan for each
* children and then call lhs.and(rhs) or lhs.or(rhs) respectively
* to generate a new plan for the expression.
*
* The execution plan has one or more ScanPlan objects. To get the results the set union of all
* ScanPlan objects needs to be done.
*/
public static abstract class FilterPlan {
abstract FilterPlan and(FilterPlan other);
abstract FilterPlan or(FilterPlan other);
abstract List<ScanPlan> getPlans();
@Override
public String toString() {
return getPlans().toString();
}
}
/**
* Represents a union/OR of single scan plans (ScanPlan).
*/
public static class MultiScanPlan extends FilterPlan {
final ImmutableList<ScanPlan> scanPlans;
public MultiScanPlan(List<ScanPlan> scanPlans){
this.scanPlans = ImmutableList.copyOf(scanPlans);
}
@Override
public FilterPlan and(FilterPlan other) {
// Convert to disjunctive normal form (DNF), ie OR of ANDs
// First get a new set of FilterPlans by doing an AND
// on each ScanPlan in this one with the other FilterPlan
List<FilterPlan> newFPlans = new ArrayList<FilterPlan>();
for (ScanPlan splan : getPlans()) {
newFPlans.add(splan.and(other));
}
//now combine scanPlans in multiple new FilterPlans into one
// MultiScanPlan
List<ScanPlan> newScanPlans = new ArrayList<ScanPlan>();
for (FilterPlan fp : newFPlans) {
newScanPlans.addAll(fp.getPlans());
}
return new MultiScanPlan(newScanPlans);
}
@Override
public FilterPlan or(FilterPlan other) {
// just combine the ScanPlans
List<ScanPlan> newScanPlans = new ArrayList<ScanPlan>(this.getPlans());
newScanPlans.addAll(other.getPlans());
return new MultiScanPlan(newScanPlans);
}
@Override
public List<ScanPlan> getPlans() {
return scanPlans;
}
}
/**
* Represents a single Hbase Scan api call
*/
public static class ScanPlan extends FilterPlan {
public static class ScanMarker {
final String value;
/**
* If inclusive = true, it means that the
* marker includes those bytes.
* If it is false, it means the marker starts at the next possible byte array
* or ends at the next possible byte array
*/
final boolean isInclusive;
final String type;
ScanMarker(String obj, boolean i, String type){
this.value = obj;
this.isInclusive = i;
this.type = type;
}
@Override
public String toString() {
return "ScanMarker [" + "value=" + value.toString() + ", isInclusive=" + isInclusive +
", type=" + type + "]";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + value.hashCode();
result = prime * result + (isInclusive ? 1231 : 1237);
result = prime * result + type.hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ScanMarker other = (ScanMarker) obj;
if (!value.equals(other.value))
return false;
if (isInclusive != other.isInclusive)
return false;
if (type != other.type)
return false;
return true;
}
}
public static class ScanMarkerPair {
public ScanMarkerPair(ScanMarker startMarker, ScanMarker endMarker) {
this.startMarker = startMarker;
this.endMarker = endMarker;
}
ScanMarker startMarker;
ScanMarker endMarker;
}
// represent Scan start, partition key name -> scanMarkerPair
Map<String, ScanMarkerPair> markers = new HashMap<String, ScanMarkerPair>();
List<Operator> ops = new ArrayList<Operator>();
// Get the number of partition key prefixes which can be used in the scan range.
// For example, if partition key is (year, month, state)
// 1. year = 2015 and month >= 1 and month < 5
// year + month can be used in scan range, majorParts = 2
// 2. year = 2015 and state = 'CA'
// only year can be used in scan range, majorParts = 1
// 3. month = 10 and state = 'CA'
// nothing can be used in scan range, majorParts = 0
private int getMajorPartsCount(List<FieldSchema> parts) {
int majorPartsCount = 0;
while (majorPartsCount<parts.size() && markers.containsKey(parts.get(majorPartsCount).getName())) {
ScanMarkerPair pair = markers.get(parts.get(majorPartsCount).getName());
majorPartsCount++;
if (pair.startMarker!=null && pair.endMarker!=null && pair.startMarker.value.equals(pair
.endMarker.value) && pair.startMarker.isInclusive && pair.endMarker.isInclusive) {
// is equal
continue;
} else {
break;
}
}
return majorPartsCount;
}
public Filter getFilter(List<FieldSchema> parts) {
int majorPartsCount = getMajorPartsCount(parts);
Set<String> majorKeys = new HashSet<String>();
for (int i=0;i<majorPartsCount;i++) {
majorKeys.add(parts.get(i).getName());
}
List<String> names = HBaseUtils.getPartitionNames(parts);
List<PartitionKeyComparator.Range> ranges = new ArrayList<PartitionKeyComparator.Range>();
for (Map.Entry<String, ScanMarkerPair> entry : markers.entrySet()) {
if (names.contains(entry.getKey()) && !majorKeys.contains(entry.getKey())) {
PartitionKeyComparator.Mark startMark = null;
if (entry.getValue().startMarker != null) {
startMark = new PartitionKeyComparator.Mark(entry.getValue().startMarker.value,
entry.getValue().startMarker.isInclusive);
}
PartitionKeyComparator.Mark endMark = null;
if (entry.getValue().endMarker != null) {
startMark = new PartitionKeyComparator.Mark(entry.getValue().endMarker.value,
entry.getValue().endMarker.isInclusive);
}
PartitionKeyComparator.Range range = new PartitionKeyComparator.Range(
entry.getKey(), startMark, endMark);
ranges.add(range);
}
}
if (ranges.isEmpty() && ops.isEmpty()) {
return null;
} else {
return new RowFilter(CompareFilter.CompareOp.EQUAL, new PartitionKeyComparator(
StringUtils.join(names, ","), StringUtils.join(HBaseUtils.getPartitionKeyTypes(parts), ","),
ranges, ops));
}
}
public void setStartMarker(String keyName, String keyType, String start, boolean isInclusive) {
if (markers.containsKey(keyName)) {
markers.get(keyName).startMarker = new ScanMarker(start, isInclusive, keyType);
} else {
ScanMarkerPair marker = new ScanMarkerPair(new ScanMarker(start, isInclusive, keyType), null);
markers.put(keyName, marker);
}
}
public ScanMarker getStartMarker(String keyName) {
if (markers.containsKey(keyName)) {
return markers.get(keyName).startMarker;
} else {
return null;
}
}
public void setEndMarker(String keyName, String keyType, String end, boolean isInclusive) {
if (markers.containsKey(keyName)) {
markers.get(keyName).endMarker = new ScanMarker(end, isInclusive, keyType);
} else {
ScanMarkerPair marker = new ScanMarkerPair(null, new ScanMarker(end, isInclusive, keyType));
markers.put(keyName, marker);
}
}
public ScanMarker getEndMarker(String keyName) {
if (markers.containsKey(keyName)) {
return markers.get(keyName).endMarker;
} else {
return null;
}
}
@Override
public FilterPlan and(FilterPlan other) {
List<ScanPlan> newSPlans = new ArrayList<ScanPlan>();
for (ScanPlan otherSPlan : other.getPlans()) {
newSPlans.add(this.and(otherSPlan));
}
return new MultiScanPlan(newSPlans);
}
private ScanPlan and(ScanPlan other) {
// create combined FilterPlan based on existing lhs and rhs plan
ScanPlan newPlan = new ScanPlan();
newPlan.markers.putAll(markers);
for (String keyName : other.markers.keySet()) {
if (newPlan.markers.containsKey(keyName)) {
// create new scan start
ScanMarker greaterStartMarker = getComparedMarker(this.getStartMarker(keyName),
other.getStartMarker(keyName), true);
if (greaterStartMarker != null) {
newPlan.setStartMarker(keyName, greaterStartMarker.type, greaterStartMarker.value, greaterStartMarker.isInclusive);
}
// create new scan end
ScanMarker lesserEndMarker = getComparedMarker(this.getEndMarker(keyName), other.getEndMarker(keyName),
false);
if (lesserEndMarker != null) {
newPlan.setEndMarker(keyName, lesserEndMarker.type, lesserEndMarker.value, lesserEndMarker.isInclusive);
}
} else {
newPlan.markers.put(keyName, other.markers.get(keyName));
}
}
newPlan.ops.addAll(ops);
newPlan.ops.addAll(other.ops);
return newPlan;
}
/**
* @param lStartMarker
* @param rStartMarker
* @param getGreater if true return greater startmarker, else return smaller one
* @return greater/lesser marker depending on value of getGreater
*/
@VisibleForTesting
static ScanMarker getComparedMarker(ScanMarker lStartMarker, ScanMarker rStartMarker,
boolean getGreater) {
// if one of them has null bytes, just return other
if(lStartMarker == null) {
return rStartMarker;
} else if (rStartMarker == null) {
return lStartMarker;
}
TypeInfo expectedType =
TypeInfoUtils.getTypeInfoFromTypeString(lStartMarker.type);
ObjectInspector outputOI =
TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
Converter lConverter = ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
Converter rConverter = ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
Comparable lValue = (Comparable)lConverter.convert(lStartMarker.value);
Comparable rValue = (Comparable)rConverter.convert(rStartMarker.value);
int compareRes = lValue.compareTo(rValue);
if (compareRes == 0) {
// bytes are equal, now compare the isInclusive flags
if (lStartMarker.isInclusive == rStartMarker.isInclusive) {
// actually equal, so return any one
return lStartMarker;
}
boolean isInclusive = true;
// one that does not include the current bytes is greater
if (getGreater) {
isInclusive = false;
}
// else
return new ScanMarker(lStartMarker.value, isInclusive, lStartMarker.type);
}
if (getGreater) {
return compareRes == 1 ? lStartMarker : rStartMarker;
}
// else
return compareRes == -1 ? lStartMarker : rStartMarker;
}
@Override
public FilterPlan or(FilterPlan other) {
List<ScanPlan> plans = new ArrayList<ScanPlan>(getPlans());
plans.addAll(other.getPlans());
return new MultiScanPlan(plans);
}
@Override
public List<ScanPlan> getPlans() {
return Arrays.asList(this);
}
/**
* @return row suffix - This is appended to db + table, to generate start row for the Scan
*/
public byte[] getStartRowSuffix(String dbName, String tableName, List<FieldSchema> parts) {
int majorPartsCount = getMajorPartsCount(parts);
List<String> majorPartTypes = new ArrayList<String>();
List<String> components = new ArrayList<String>();
boolean endPrefix = false;
for (int i=0;i<majorPartsCount;i++) {
majorPartTypes.add(parts.get(i).getType());
ScanMarker marker = markers.get(parts.get(i).getName()).startMarker;
if (marker != null) {
components.add(marker.value);
if (i==majorPartsCount-1) {
endPrefix = !marker.isInclusive;
}
} else {
components.add(null);
if (i==majorPartsCount-1) {
endPrefix = false;
}
}
}
byte[] bytes = HBaseUtils.buildPartitionKey(dbName, tableName, majorPartTypes, components, endPrefix);
return bytes;
}
/**
* @return row suffix - This is appended to db + table, to generate end row for the Scan
*/
public byte[] getEndRowSuffix(String dbName, String tableName, List<FieldSchema> parts) {
int majorPartsCount = getMajorPartsCount(parts);
List<String> majorPartTypes = new ArrayList<String>();
List<String> components = new ArrayList<String>();
boolean endPrefix = false;
for (int i=0;i<majorPartsCount;i++) {
majorPartTypes.add(parts.get(i).getType());
ScanMarker marker = markers.get(parts.get(i).getName()).endMarker;
if (marker != null) {
components.add(marker.value);
if (i==majorPartsCount-1) {
endPrefix = marker.isInclusive;
}
} else {
components.add(null);
if (i==majorPartsCount-1) {
endPrefix = true;
}
}
}
byte[] bytes = HBaseUtils.buildPartitionKey(dbName, tableName, majorPartTypes, components, endPrefix);
if (components.isEmpty()) {
bytes[bytes.length-1]++;
}
return bytes;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("ScanPlan:\n");
for (Map.Entry<String, ScanMarkerPair> entry : markers.entrySet()) {
sb.append("key=" + entry.getKey() + "[startMarker=" + entry.getValue().startMarker
+ ", endMarker=" + entry.getValue().endMarker + "]");
}
return sb.toString();
}
}
/**
* Visitor for ExpressionTree.
* It first generates the ScanPlan for the leaf nodes. The higher level nodes are
* either AND or OR operations. It then calls FilterPlan.and and FilterPlan.or with
* the child nodes to generate the plans for higher level nodes.
*/
@VisibleForTesting
static class PartitionFilterGenerator extends TreeVisitor {
private FilterPlan curPlan;
// this tells us if there is a condition that did not get included in the plan
// such condition would be treated as getting evaluated to TRUE
private boolean hasUnsupportedCondition = false;
//Need to cache the left plans for the TreeNode. Use IdentityHashMap here
// as we don't want to dedupe on two TreeNode that are otherwise considered equal
Map<TreeNode, FilterPlan> leftPlans = new IdentityHashMap<TreeNode, FilterPlan>();
// temporary params for current left and right side plans, for AND, OR
private FilterPlan rPlan;
private Map<String, String> nameToType = new HashMap<String, String>();
public PartitionFilterGenerator(List<FieldSchema> parts) {
for (FieldSchema part : parts) {
nameToType.put(part.getName(), part.getType());
}
}
FilterPlan getPlan() {
return curPlan;
}
@Override
protected void beginTreeNode(TreeNode node) throws MetaException {
// reset the params
curPlan = rPlan = null;
}
@Override
protected void midTreeNode(TreeNode node) throws MetaException {
leftPlans.put(node, curPlan);
curPlan = null;
}
@Override
protected void endTreeNode(TreeNode node) throws MetaException {
rPlan = curPlan;
FilterPlan lPlan = leftPlans.get(node);
leftPlans.remove(node);
switch (node.getAndOr()) {
case AND:
curPlan = lPlan.and(rPlan);
break;
case OR:
curPlan = lPlan.or(rPlan);
break;
default:
throw new AssertionError("Unexpected logical operation " + node.getAndOr());
}
}
@Override
public void visit(LeafNode node) throws MetaException {
ScanPlan leafPlan = new ScanPlan();
curPlan = leafPlan;
// this is a condition on first partition column, so might influence the
// start and end of the scan
final boolean INCLUSIVE = true;
switch (node.operator) {
case EQUALS:
leafPlan.setStartMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), INCLUSIVE);
leafPlan.setEndMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), INCLUSIVE);
break;
case GREATERTHAN:
leafPlan.setStartMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), !INCLUSIVE);
break;
case GREATERTHANOREQUALTO:
leafPlan.setStartMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), INCLUSIVE);
break;
case LESSTHAN:
leafPlan.setEndMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), !INCLUSIVE);
break;
case LESSTHANOREQUALTO:
leafPlan.setEndMarker(node.keyName, nameToType.get(node.keyName), node.value.toString(), INCLUSIVE);
break;
case LIKE:
leafPlan.ops.add(new Operator(Operator.Type.LIKE, node.keyName, node.value.toString()));
break;
case NOTEQUALS:
case NOTEQUALS2:
leafPlan.ops.add(new Operator(Operator.Type.NOTEQUALS, node.keyName, node.value.toString()));
break;
}
}
private boolean hasUnsupportedCondition() {
return hasUnsupportedCondition;
}
}
public static class PlanResult {
public final FilterPlan plan;
public final boolean hasUnsupportedCondition;
PlanResult(FilterPlan plan, boolean hasUnsupportedCondition) {
this.plan = plan;
this.hasUnsupportedCondition = hasUnsupportedCondition;
}
}
public static PlanResult getFilterPlan(ExpressionTree exprTree, List<FieldSchema> parts) throws MetaException {
if (exprTree == null) {
// TODO: if exprTree is null, we should do what ObjectStore does. See HIVE-10102
return new PlanResult(new ScanPlan(), true);
}
PartitionFilterGenerator pGenerator = new PartitionFilterGenerator(parts);
exprTree.accept(pGenerator);
return new PlanResult(pGenerator.getPlan(), pGenerator.hasUnsupportedCondition());
}
}