/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.broker.requesthandler; import com.google.common.base.Splitter; import com.linkedin.pinot.common.request.FilterOperator; import com.linkedin.pinot.common.utils.StringUtil; import com.linkedin.pinot.common.utils.request.FilterQueryTree; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; /** * Optimizer that collapses multiple OR clauses to IN clauses. For example, <code>a = 1 OR a = 2 OR a = * 3</code> gets turned to <code>a IN (1, 2, 3)</code>. */ public class MultipleOrEqualitiesToInClauseFilterQueryTreeOptimizer extends FilterQueryTreeOptimizer { @Override public FilterQueryTree optimize(FilterQueryOptimizerRequest request) { return optimize(request.getFilterQueryTree(), null); } private FilterQueryTree optimize(FilterQueryTree filterQueryTree, FilterQueryTree parent) { if (filterQueryTree.getOperator() == FilterOperator.OR) { Map<String, Set<String>> columnToValues = new HashMap<>(); List<FilterQueryTree> nonEqualityOperators = new ArrayList<>(); // Collect all equality/in values and non-equality operators boolean containsDuplicates = collectChildOperators(filterQueryTree, columnToValues, nonEqualityOperators); // If we have at least one column to return if (!columnToValues.isEmpty()) { // We can eliminate the OR node if there is only one column with multiple values if (columnToValues.size() == 1 && nonEqualityOperators.isEmpty()) { Map.Entry<String, Set<String>> columnAndValues = columnToValues.entrySet().iterator().next(); return buildFilterQueryTreeForColumnAndValues(columnAndValues); } // Check if we need to rebuild the predicate boolean rebuildRequired = isRebuildRequired(columnToValues, containsDuplicates); if (!rebuildRequired) { // No mutation needed, so just return the same tree return filterQueryTree; } else { // Rebuild the predicates return rebuildFilterPredicate(columnToValues, nonEqualityOperators); } } } else if (filterQueryTree.getChildren() != null){ // Optimize the child nodes, if any applyOptimizationToChildNodes(filterQueryTree); } return filterQueryTree; } private void applyOptimizationToChildNodes(FilterQueryTree filterQueryTree) { Iterator<FilterQueryTree> childTreeIterator = filterQueryTree.getChildren().iterator(); List<FilterQueryTree> childrenToAdd = null; while (childTreeIterator.hasNext()) { FilterQueryTree childQueryTree = childTreeIterator.next(); FilterQueryTree optimizedChildQueryTree = optimize(childQueryTree, filterQueryTree); if (childQueryTree != optimizedChildQueryTree) { childTreeIterator.remove(); if (childrenToAdd == null) { childrenToAdd = new ArrayList<>(); } childrenToAdd.add(optimizedChildQueryTree); } } if (childrenToAdd != null) { filterQueryTree.getChildren().addAll(childrenToAdd); } } private boolean collectChildOperators(FilterQueryTree filterQueryTree, Map<String, Set<String>> columnToValues, List<FilterQueryTree> nonEqualityOperators) { boolean containsDuplicates = false; for (FilterQueryTree childQueryTree : filterQueryTree.getChildren()) { if (childQueryTree.getOperator() == FilterOperator.EQUALITY || childQueryTree.getOperator() == FilterOperator.IN) { List<String> childValues = valueDoubleTabListToElements(childQueryTree.getValue()); if (!columnToValues.containsKey(childQueryTree.getColumn())) { TreeSet<String> value = new TreeSet<>(childValues); columnToValues.put(childQueryTree.getColumn(), value); if (!containsDuplicates && value.size() != childValues.size()) { containsDuplicates = true; } } else { Set<String> currentValues = columnToValues.get(childQueryTree.getColumn()); for (String childValue : childValues) { if (!containsDuplicates && currentValues.contains(childValue)) { containsDuplicates = true; } else { currentValues.add(childValue); } } } } else { nonEqualityOperators.add(childQueryTree); } } return containsDuplicates; } private boolean isRebuildRequired(Map<String, Set<String>> columnToValues, boolean containsDuplicates) { // We need to rebuild the predicate if there were duplicate values detected (eg. a = 1 OR a = 1) or if there is // more than one value for a column (eg. a = 1 OR a = 2) boolean rebuildRequired = containsDuplicates; if (!rebuildRequired) { for (Set<String> columnValues : columnToValues.values()) { if (1 < columnValues.size()) { rebuildRequired = true; break; } } } return rebuildRequired; } private FilterQueryTree rebuildFilterPredicate(Map<String, Set<String>> columnToValues, List<FilterQueryTree> nonEqualityOperators) { ArrayList<FilterQueryTree> newChildren = new ArrayList<>(); for (Map.Entry<String, Set<String>> columnAndValues : columnToValues.entrySet()) { newChildren.add(buildFilterQueryTreeForColumnAndValues(columnAndValues)); } newChildren.addAll(nonEqualityOperators); return new FilterQueryTree(null, null, FilterOperator.OR, newChildren); } private FilterQueryTree buildFilterQueryTreeForColumnAndValues(Map.Entry<String, Set<String>> columnAndValues) { // If there's only one value, turn it into an equality, otherwise turn it into an IN clause if (columnAndValues.getValue().size() == 1) { return new FilterQueryTree(columnAndValues.getKey(), elementListToDoubleTabSingletonList(columnAndValues.getValue()), FilterOperator.EQUALITY, null); } else { return new FilterQueryTree(columnAndValues.getKey(), elementListToDoubleTabSingletonList(columnAndValues.getValue()), FilterOperator.IN, null); } } private List<String> elementListToDoubleTabSingletonList(Collection<String> elementList) { return Collections.singletonList(StringUtil.join("\t\t", elementList.toArray(new String[elementList.size()]))); } private List<String> valueDoubleTabListToElements(List<String> doubleTabSeparatedElements) { Splitter valueSplitter = Splitter.on("\t\t"); List<String> valueElements = new ArrayList<>(); for (String value : doubleTabSeparatedElements) { valueElements.addAll(valueSplitter.splitToList(value)); } return valueElements; } private List<String> valueDoubleTabListToElements(String doubleTabSeparatedElements) { Splitter valueSplitter = Splitter.on("\t\t"); return valueSplitter.splitToList(doubleTabSeparatedElements); } }