/* * Copyright 2006-2012 Amazon Technologies, Inc. or its affiliates. * Amazon, Amazon.com and Carbonado are trademarks or registered trademarks * of Amazon Technologies, Inc. or its affiliates. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.amazon.carbonado.qe; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import com.amazon.carbonado.FetchException; import com.amazon.carbonado.RepositoryException; import com.amazon.carbonado.Storable; import com.amazon.carbonado.SupportException; import com.amazon.carbonado.filter.AndFilter; import com.amazon.carbonado.filter.ExistsFilter; import com.amazon.carbonado.filter.Filter; import com.amazon.carbonado.filter.OrFilter; import com.amazon.carbonado.filter.PropertyFilter; import com.amazon.carbonado.filter.Visitor; import com.amazon.carbonado.info.ChainedProperty; import com.amazon.carbonado.info.Direction; import com.amazon.carbonado.info.OrderedProperty; import com.amazon.carbonado.info.StorableIndex; import com.amazon.carbonado.info.StorableInfo; import com.amazon.carbonado.info.StorableIntrospector; import com.amazon.carbonado.info.StorableKey; /** * Analyzes a query specification and determines how it can be executed as a * union of smaller queries. If necessary, the UnionQueryAnalyzer will alter * the query slightly, imposing a total ordering. Internally, an {@link * IndexedQueryAnalyzer} is used for selecting the best indexes. * * <p>UnionQueryAnalyzer is sharable and thread-safe. An instance for a * particular Storable type can be cached, avoiding repeated construction * cost. In addition, the analyzer caches learned foreign indexes. * * @author Brian S O'Neill */ public class UnionQueryAnalyzer<S extends Storable> implements QueryExecutorFactory<S> { final IndexedQueryAnalyzer<S> mIndexAnalyzer; final RepositoryAccess mRepoAccess; /** * @param type type of storable being queried * @param access repository access for examing available indexes * @throws IllegalArgumentException if type or indexProvider is null */ public UnionQueryAnalyzer(Class<S> type, RepositoryAccess access) { mIndexAnalyzer = new IndexedQueryAnalyzer<S>(type, access); mRepoAccess = access; } public Class<S> getStorableType() { return mIndexAnalyzer.getStorableType(); } /** * @param filter optional filter which must be {@link Filter#isBound bound} * @param ordering optional properties which define desired ordering * @param hints optional query hints */ public Result analyze(Filter<S> filter, OrderingList<S> ordering, QueryHints hints) throws SupportException, RepositoryException { if (filter != null && !filter.isBound()) { throw new IllegalArgumentException("Filter must be bound"); } if (ordering == null) { ordering = OrderingList.emptyList(); } return buildResult(filter, ordering, hints); } /** * Returns an executor that handles the given query specification. * * @param filter optional filter which must be {@link Filter#isBound bound} * @param ordering optional properties which define desired ordering * @param hints optional query hints */ public QueryExecutor<S> executor(Filter<S> filter, OrderingList<S> ordering, QueryHints hints) throws RepositoryException { return analyze(filter, ordering, hints).createExecutor(); } /** * Splits the filter into sub-results, merges sub-results, and possibly * imposes a total ordering. */ private Result buildResult(Filter<S> filter, OrderingList<S> ordering, QueryHints hints) throws SupportException, RepositoryException { List<IndexedQueryAnalyzer<S>.Result> subResults; if (filter == null) { subResults = Collections .singletonList(mIndexAnalyzer.analyze(filter, ordering, hints)); } else { subResults = splitIntoSubResults(filter, ordering, hints); } if (subResults.size() <= 1) { // Total ordering not required. return new Result(subResults); } // If any orderings have an unspecified direction, switch to ASCENDING // or DESCENDING, depending on which is more popular. Then build new // sub-results. for (int pos = 0; pos < ordering.size(); pos++) { OrderedProperty<S> op = ordering.get(pos); if (op.getDirection() != Direction.UNSPECIFIED) { continue; } // Find out which direction is most popular for this property. Tally tally = new Tally(op.getChainedProperty()); for (IndexedQueryAnalyzer<S>.Result result : subResults) { tally.increment(findHandledDirection(result, op)); } ordering = ordering.replace(pos, op.direction(tally.getBestDirection())); // Re-calc with specified direction. Only do one property at a time // since one simple change might alter the query plan. subResults = splitIntoSubResults(filter, ordering, hints); if (subResults.size() <= 1) { // Total ordering no longer required. return new Result(subResults); } } // Gather all the keys available. As ordering properties touch key // properties, they are removed from all key sets. When a key set size // reaches zero, total ordering has been achieved. List<Set<ChainedProperty<S>>> keys = getKeys(); // Check if current ordering is total. for (OrderedProperty<S> op : ordering) { ChainedProperty<S> property = op.getChainedProperty(); if (pruneKeys(keys, property)) { // Found a key which is fully covered, indicating total ordering. return new Result(subResults, ordering); } } // Create a super key which contains all the properties required for // total ordering. The goal here is to append these properties to the // ordering in a fashion that takes advantage of each index's natural // ordering. This in turn should cause any sort operation to operate // over smaller groups. Smaller groups means smaller sort buffers. // Smaller sort buffers makes a merge sort happy. // Super key could be stored simply in a set, but a map makes it // convenient for tracking tallies. Map<ChainedProperty<S>, Tally> superKey = new LinkedHashMap<ChainedProperty<S>, Tally>(); for (Set<ChainedProperty<S>> key : keys) { for (ChainedProperty<S> property : key) { if (!superKey.containsKey(property)) { superKey.put(property, new Tally(property)); } } } // Keep looping until total ordering achieved. while (true) { // For each ordering score, iterate over the entire unused ordering // properties and select the next free property. If property is in // the super key increment a tally associated with property // direction. Choose the property with the best tally and augment // the orderings with it and create new sub-results. Remove the // property from the super key and the key set. If any key is now // fully covered, a total ordering has been achieved. for (IndexedQueryAnalyzer<S>.Result result : subResults) { OrderingScore<S> score = result.getCompositeScore().getOrderingScore(); OrderingList<S> unused = score.getUnusedOrdering(); if (unused.size() > 0) { for (OrderedProperty<S> prop : unused) { ChainedProperty<S> chainedProp = prop.getChainedProperty(); Tally tally = superKey.get(chainedProp); if (tally != null) { tally.increment(prop.getDirection()); } } } OrderingList<S> free = score.getFreeOrdering(); if (free.size() > 0) { OrderedProperty<S> prop = free.get(0); ChainedProperty<S> chainedProp = prop.getChainedProperty(); Tally tally = superKey.get(chainedProp); if (tally != null) { tally.increment(prop.getDirection()); } } } Tally best = bestTally(superKey.values()); ChainedProperty<S> bestProperty = best.getProperty(); // Now augment the orderings and create new sub-results. ordering = ordering.concat(OrderedProperty.get(bestProperty, best.getBestDirection())); subResults = splitIntoSubResults(filter, ordering, hints); if (subResults.size() <= 1) { // Total ordering no longer required. break; } // Remove property from super key and key set... superKey.remove(bestProperty); if (superKey.size() == 0) { break; } if (pruneKeys(keys, bestProperty)) { break; } // Clear the tallies for the next run. for (Tally tally : superKey.values()) { tally.clear(); } } return new Result(subResults, ordering); } /** * Returns a list of all primary and alternate keys, stripped of ordering. */ private List<Set<ChainedProperty<S>>> getKeys() throws SupportException, RepositoryException { StorableInfo<S> info = StorableIntrospector.examine(mIndexAnalyzer.getStorableType()); List<Set<ChainedProperty<S>>> keys = new ArrayList<Set<ChainedProperty<S>>>(); keys.add(stripOrdering(info.getPrimaryKey().getProperties())); for (StorableKey<S> altKey : info.getAlternateKeys()) { keys.add(stripOrdering(altKey.getProperties())); } // Also fold in all unique indexes, just in case they weren't reported // as actual keys. Collection<StorableIndex<S>> indexes = mRepoAccess.storageAccessFor(getStorableType()).getAllIndexes(); for (StorableIndex<S> index : indexes) { if (!index.isUnique()) { continue; } int propCount = index.getPropertyCount(); Set<ChainedProperty<S>> props = new LinkedHashSet<ChainedProperty<S>>(propCount); for (int i=0; i<propCount; i++) { props.add(index.getOrderedProperty(i).getChainedProperty()); } keys.add(props); } return keys; } private Set<ChainedProperty<S>> stripOrdering(Set<? extends OrderedProperty<S>> orderedProps) { Set<ChainedProperty<S>> props = new LinkedHashSet<ChainedProperty<S>>(orderedProps.size()); for (OrderedProperty<S> ordering : orderedProps) { props.add(ordering.getChainedProperty()); } return props; } /** * Removes the given property from all keys, returning true if any key has * zero properties as a result. */ private boolean pruneKeys(List<Set<ChainedProperty<S>>> keys, ChainedProperty<S> property) { boolean result = false; for (Set<ChainedProperty<S>> key : keys) { key.remove(property); if (key.size() == 0) { result = true; continue; } } return result; } private Tally bestTally(Iterable<Tally> tallies) { Tally best = null; for (Tally tally : tallies) { if (best == null || tally.compareTo(best) > 0) { best = tally; } } return best; } private Direction findHandledDirection(IndexedQueryAnalyzer<S>.Result result, OrderedProperty<S> unspecified) { ChainedProperty<S> chained = unspecified.getChainedProperty(); OrderingScore<S> score = result.getCompositeScore().getOrderingScore(); OrderingList<S> handled = score.getHandledOrdering(); for (OrderedProperty<S> property : handled) { if (chained.equals(property.getChainedProperty())) { return property.getDirection(); } } return Direction.UNSPECIFIED; } /** * Splits the filter into sub-results and possibly merges them. */ private List<IndexedQueryAnalyzer<S>.Result> splitIntoSubResults(Filter<S> filter, OrderingList<S> ordering, QueryHints hints) throws SupportException, RepositoryException { // Required for split to work. Filter<S> dnfFilter = filter.disjunctiveNormalForm(); Splitter splitter = new Splitter(ordering, hints); RepositoryException e = dnfFilter.accept(splitter, null); if (e != null) { throw e; } List<IndexedQueryAnalyzer<S>.Result> subResults = splitter.mSubResults; // Check if any sub-result handles nothing. If so, a full scan is the // best option for the entire query and all sub-results merge into a // single sub-result. Any sub-results which filter anything and contain // a join property in the filter are exempt from the merge. This is // because fewer joins are read than if a full scan is performed for // the entire query. The resulting union has both a full scan and an // index scan. IndexedQueryAnalyzer<S>.Result full = null; for (IndexedQueryAnalyzer<S>.Result result : subResults) { if (!result.handlesAnything()) { full = result; break; } if (!result.getCompositeScore().getFilteringScore().hasAnyMatches()) { if (full == null) { // This index is used only for its ordering, and it will be // tentatively selected as the "full scan". If a result is // found doesn't use an index for anything, then it becomes // the "full scan" index. full = result; } } } if (full == null) { // Okay, no full scan needed. return subResults; } List<IndexedQueryAnalyzer<S>.Result> mergedResults = new ArrayList<IndexedQueryAnalyzer<S>.Result>(); for (IndexedQueryAnalyzer<S>.Result result : subResults) { if (result == full) { // Add after everything has been merged into it. continue; } boolean exempt = result.getCompositeScore().getFilteringScore().hasAnyMatches(); if (exempt) { // Must also have a join in the filter to be exempt. List<PropertyFilter<S>> subFilters = PropertyFilterList.get(result.getFilter()); joinCheck: { for (PropertyFilter<S> subFilter : subFilters) { if (subFilter.getChainedProperty().getChainCount() > 0) { // A chain implies a join was followed, so result is exempt. break joinCheck; } } // No joins found, result is not exempt from merging into full scan. exempt = false; } } if (exempt) { mergedResults.add(result); } else { full = full.mergeRemainderFilter(result.getFilter()); } } if (mergedResults.size() == 0) { // Nothing was exempt. Rather than return a result with a dnf // filter, return full scan with a simpler reduced filter. full = full.withRemainderFilter(filter.reduce()); } mergedResults.add(full); return mergedResults; } public class Result { private final List<IndexedQueryAnalyzer<S>.Result> mSubResults; private final OrderingList<S> mTotalOrdering; Result(List<IndexedQueryAnalyzer<S>.Result> subResults) { this(subResults, null); } Result(List<IndexedQueryAnalyzer<S>.Result> subResults, OrderingList<S> totalOrdering) { if (subResults.size() < 1) { throw new IllegalArgumentException(); } mSubResults = Collections.unmodifiableList(subResults); mTotalOrdering = totalOrdering; } /** * Returns results for each sub-query to be executed in the union. If * only one result is returned, then no union needs to be performed. */ public List<IndexedQueryAnalyzer<S>.Result> getSubResults() { return mSubResults; } /** * Returns a total ordering, if one was imposed. Otherwise, null is returned. */ public OrderingList<S> getTotalOrdering() { return mTotalOrdering; } /** * Creates a QueryExecutor based on this result. */ public QueryExecutor<S> createExecutor() throws SupportException, FetchException, RepositoryException { List<IndexedQueryAnalyzer<S>.Result> subResults = getSubResults(); int size = subResults.size(); if (size == 1) { return subResults.get(0).createExecutor(); } List<QueryExecutor<S>> executors = new ArrayList<QueryExecutor<S>>(size); for (int i=0; i<size; i++) { executors.add(subResults.get(i).createExecutor()); } return new UnionQueryExecutor<S>(executors, mTotalOrdering); } } /** * Used to track which property direction is most popular. */ private class Tally implements Comparable<Tally> { private final ChainedProperty<S> mProperty; private int mAscendingCount; private int mDescendingCount; Tally(ChainedProperty<S> property) { mProperty = property; } ChainedProperty<S> getProperty() { return mProperty; } void increment(Direction dir) { switch (dir) { case UNSPECIFIED: mAscendingCount++; mDescendingCount++; break; case ASCENDING: mAscendingCount++; break; case DESCENDING: mDescendingCount++; break; } } /** * Only returns ASCENDING or DESCENDING. */ Direction getBestDirection() { if (mAscendingCount >= mDescendingCount) { return Direction.ASCENDING; } return Direction.DESCENDING; } int getBestCount() { if (mAscendingCount >= mDescendingCount) { return mAscendingCount; } return mDescendingCount; } void clear() { mAscendingCount = 0; mDescendingCount = 0; } /** * Returns -1 if this tally is worse. */ public int compareTo(Tally other) { int thisBest = getBestCount(); int otherBest = other.getBestCount(); if (thisBest < otherBest) { return -1; } if (thisBest > otherBest) { return 1; } return 0; } @Override public String toString() { return "Tally: {property=" + mProperty + ", asc=" + mAscendingCount + ", desc=" + mDescendingCount + '}'; } } /** * Analyzes a disjunctive normal filter into sub-results over filters that * only contain 'and' operations. */ private class Splitter extends Visitor<S, RepositoryException, Object> { private final OrderingList<S> mOrdering; private final QueryHints mHints; final List<IndexedQueryAnalyzer<S>.Result> mSubResults; Splitter(OrderingList<S> ordering, QueryHints hints) { mOrdering = ordering; mHints = hints; mSubResults = new ArrayList<IndexedQueryAnalyzer<S>.Result>(); } @Override public RepositoryException visit(OrFilter<S> filter, Object param) { try { Filter<S> left = filter.getLeftFilter(); if (!(left instanceof OrFilter)) { subAnalyze(left); } else { RepositoryException e = left.accept(this, param); if (e != null) { return e; } } Filter<S> right = filter.getRightFilter(); if (!(right instanceof OrFilter)) { subAnalyze(right); } else { RepositoryException e = right.accept(this, param); if (e != null) { return e; } } return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no 'or' operators. @Override public RepositoryException visit(AndFilter<S> filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no logical operators. @Override public RepositoryException visit(PropertyFilter<S> filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no logical operators. @Override public RepositoryException visit(ExistsFilter<S> filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } private void subAnalyze(Filter<S> subFilter) throws SupportException, RepositoryException { IndexedQueryAnalyzer<S>.Result subResult = mIndexAnalyzer.analyze(subFilter, mOrdering, mHints); // Rather than blindly add to mSubResults, try to merge with // another result. This in turn reduces the number of cursors // needed by the union. int size = mSubResults.size(); for (int i=0; i<size; i++) { IndexedQueryAnalyzer<S>.Result existing = mSubResults.get(i); if (existing.canMergeRemainder(subResult)) { mSubResults.set(i, existing.mergeRemainder(subResult)); return; } } // Couldn't merge, so add a new entry. mSubResults.add(subResult); } } }