/** * Copyright (C) 2009-2013 FoundationDB, LLC * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.foundationdb.sql.optimizer.rule; import com.foundationdb.sql.optimizer.plan.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; /** Move WHERE clauses closer to their table origin. * This rule runs after joins of various sorts has been laid out, but * before while they are still in data-flow order. * * Note: <i>prepone</i>, while not an American or British English * word, is the transparent opposite of <i>postpone</i>. */ // TODO: Something similar is needed to handle moving HAVING // conditions on the group by fields across the aggregation boundary // and WHERE conditions on subqueries (views) into the subquery // itself. These need to run earlier to affect indexing. Not sure how // to integrate all these. Maybe move everything earlier on and then // recognize joins of such filtered tables as Joinable. public class SelectPreponer extends BaseRule { private static final Logger logger = LoggerFactory.getLogger(SelectPreponer.class); @Override protected Logger getLogger() { return logger; } @Override public void apply(PlanContext plan) { TableOriginFinder finder = new TableOriginFinder(); finder.find(plan.getPlan()); Preponer preponer = new Preponer(); for (PlanNode origin : finder.getOrigins()) { preponer.addOrigin(origin); } preponer.moveDeferred(); } /** Find all the places where data starts, such as * <code>IndexScan</code> and <code><i>XxxLookup</i></code>. */ static class TableOriginFinder implements PlanVisitor, ExpressionVisitor { List<PlanNode> origins = new ArrayList<>(); public void find(PlanNode root) { root.accept(this); } public List<PlanNode> getOrigins() { return origins; } @Override public boolean visitEnter(PlanNode n) { return visit(n); } @Override public boolean visitLeave(PlanNode n) { return true; } @Override public boolean visit(PlanNode n) { if (n instanceof IndexScan) { origins.add(n); } else if (n instanceof TableLoader) { if (n instanceof BasePlanWithInput) { PlanNode input = ((BasePlanWithInput)n).getInput(); if (!((input instanceof TableLoader) || (input instanceof IndexScan))) { // Will put input in, so don't bother putting both in. origins.add(n); } } else { origins.add(n); } } return true; } @Override public boolean visitEnter(ExpressionNode n) { return visit(n); } @Override public boolean visitLeave(ExpressionNode n) { return true; } @Override public boolean visit(ExpressionNode n) { return true; } } /** Holds the state of a single side of a loop, which usually * means a group with its in-group joins. */ static class Loop { Map<TableSource,PlanNode> loaders; // Lookup operators. Map<ExpressionNode,PlanNode> indexColumns; // Individual columns of IndexScan. List<PlanNode> flattens; // Flatten & Product operators that do in-group join. Map<PlanNode,Set<TableSource>> flattened; // Tables that participate in those. public Loop() { loaders = new HashMap<>(); } public void setIndex(IndexScan index) { indexColumns = new HashMap<>(); for (int i = 0; i < index.getColumns().size(); i++) { ExpressionNode column = index.getColumns().get(i); if ((column != null) && index.isRecoverableAt(i)) { indexColumns.put(column, index); } } } public void addLoader(PlanNode loader) { for (TableSource table : ((TableLoader)loader).getTables()) { loaders.put(table, loader); } } /** Add a within-group join: Flatten or Product. */ public Set<TableSource> addFlattenOrProduct(PlanNode join) { if (flattens == null) flattens = new ArrayList<>(); flattens.add(join); // Might be able to place multi-table conditions after a flatten join, // so record what is available. if (flattened == null) flattened = new HashMap<>(); Set<TableSource> tables = new HashSet<>(loaders.keySet()); flattened.put(join, tables); return tables; } public void addFlatten(Flatten flatten) { // Limit to tables that are inner joined (and on the outer // side of outer joins.) Set<TableSource> inner = flatten.getInnerJoinedTables(); loaders.keySet().retainAll(inner); if (indexColumns != null) { Iterator<ExpressionNode> iter = indexColumns.keySet().iterator(); while (iter.hasNext()) { ExpressionNode expr = iter.next(); if (expr.isColumn() && !inner.contains(((ColumnExpression)expr).getTable())) iter.remove(); } } // A Flatten can get more tables than directly feed it when in a Product. // Really, it's the Lookup_Nested that gets them, but the // sources don't advertize that, since only one node is // allowed to introduce a table. addFlattenOrProduct(flatten).addAll(inner); } /** Merge another loop into this one. Although * <code>Product</code> starts with separate lookup operators, * it's a single loop for purposes of nesting. */ public Loop merge(Loop other, PlanNode before) { loaders.putAll(other.loaders); if (indexColumns == null) indexColumns = other.indexColumns; else if (other.indexColumns != null) indexColumns.putAll(other.indexColumns); if (flattens == null) flattens = other.flattens; else if (other.flattens != null) { int i = -1; if (before != null) i = flattens.indexOf(before); if (i < 0) i = flattens.size(); for (PlanNode flatten : other.flattens) { if (flatten == before) break; flattens.add(i++, flatten); } } if (flattened == null) flattened = other.flattened; else if (other.flattened != null) { for (Map.Entry<PlanNode,Set<TableSource>> entry : other.flattened.entrySet()) { Set<TableSource> existing = flattened.get(entry.getKey()); if (existing != null) existing.addAll(entry.getValue()); else flattened.put(entry.getKey(), entry.getValue()); } } return this; } /** Does this loop have any interesting state? */ public boolean isEmpty() { return ((flattens == null) || (loaders.isEmpty() && ((indexColumns == null) || indexColumns.isEmpty()))); } /** Does this loop consist solely of an index? */ public boolean indexOnly() { return (loaders.isEmpty() && !((indexColumns == null) || indexColumns.isEmpty())); } } /** Move conditions as follows: * * Starting with index scans and lookup operators, trace * downstream, adding tables from additional such operators. When * we come to a <code>Product</code>, merge with any other * streams. When we come to a <code>MapJoin</code>, note the * traversal of its loops, which corresponds to bindings being * available to inner loops. * * When we finally come to a <code>Select</code>, move conditions from it down to * earlier operators:<ul> * <li>If the condition only uses columns from an index, right after the scan.</li> * <li>If the condition uses columns from a single table, right * after that table is looked up.</li> * <li>If the condition uses multiple tables in a single group, when they are joined * together by <code>Flatten</code> or <code>Product</code></li> * <li>Tables from outer loops using <code>MapJoin</code>, which are available to * the inner loop, can be ignored in the above.</li></ul> * * In general, nested loop handling needs to be deferred until all * the loops are recorded. */ static class Preponer { Map<Product,Loop> products; Map<Select,SelectConditions> selects; public Preponer() { } /** Starting at the given node, trace downstream until get to * some conditions or something we can't handle. */ public void addOrigin(PlanNode node) { Loop loop = new Loop(); boolean newLoop = true, hasMaps = false, hasProducts = false; PlanNode prev = null; if (node instanceof IndexScan) { loop.setIndex((IndexScan)node); prev = node; node = node.getOutput(); } while (node instanceof TableLoader) { loop.addLoader(node); prev = node; node = node.getOutput(); } while (true) { if (node instanceof Flatten) { // A Flatten takes a single stream of lookups. loop.addFlatten((Flatten)node); } else if (node instanceof Product) { Product product = (Product)node; if (newLoop) { // Always inner join at present, so no filtering // of sources. loop.addFlattenOrProduct(product); // A Product takes multiple streams, so we may // have seen this one before. if (products == null) products = new HashMap<>(); Loop oloop = products.get(product); if (oloop != null) { loop = oloop.merge(loop, product); newLoop = false; } else { products.put(product, loop); } } hasProducts = true; } else if (node instanceof MapJoin) { MapJoin map = (MapJoin)node; switch (map.getJoinType()) { case INNER: break; case LEFT: case SEMI: case ANTI: if (prev == map.getInner()) return; break; default: return; } hasMaps = true; } else if (node instanceof Select) { Select select = (Select)node; if (!select.getConditions().isEmpty()) { SelectConditions selectConditions = null; boolean newSelect = false; if (selects != null) selectConditions = selects.get(select); if (selectConditions == null) { selectConditions = new SelectConditions(select); newSelect = true; } if (!loop.isEmpty()) { // Try once right away to get single table conditions. selectConditions.moveConditions(loop); } if (select.getConditions().isEmpty()) { if (!newSelect) selects.remove(select); } else { if (hasMaps && newLoop) { selectConditions.addLoop(loop); } if (hasProducts || hasMaps) { // Need to defer until have all the contributors // to the Map joins. Enable reuse for // Product. if (selects == null) selects = new HashMap<>(); selects.put(select, selectConditions); } } } } else break; prev = node; node = node.getOutput(); } } public void moveDeferred() { if (selects != null) { for (SelectConditions swm : selects.values()) { if (swm.hasLoops()) { swm.moveConditions(null); } } } } } /** Holds what is known about inputs to a Select, which may come * from multiple <code>MapJoin</code> loops. */ static class SelectConditions { Select select; ConditionDependencyAnalyzer dependencies; // The loops that are joined up to feed the Select, added in visitor // order, meaning that tables from an earlier loop should be available as // bound variables to later / deeper ones. List<Loop> loops; public SelectConditions(Select select) { this.select = select; dependencies = new ConditionDependencyAnalyzer(select); } public void addLoop(Loop loop) { if (loops == null) loops = new ArrayList<>(); loops.add(loop); } public boolean hasLoops() { return (loops != null); } /** Try to move conditions from <code>Select</code>. * @param loop If non-null, have a straight path to these * conditions and know where tables came from. See what can * be moved back there. */ public void moveConditions(Loop loop) { Iterator<ConditionExpression> iter = select.getConditions().iterator(); while (iter.hasNext()) { ConditionExpression condition = iter.next(); ColumnSource singleTable = dependencies.analyze(condition); PlanNode moveTo = canMove(loop, singleTable); if ((moveTo != null) && (moveTo != select.getInput())) { moveCondition(condition, moveTo); iter.remove(); } } } /** Return where this condition can move. */ // TODO: Could move earlier after subset of joins by breaking apart Flatten. public PlanNode canMove(Loop loop, ColumnSource singleTable) { Set<TableSource> outerTables = null; if (loop == null) { // If the condition only references a single table, no // need to check outer bindings; it's wherever it is. if (singleTable == null) outerTables = new HashSet<>(); // Several nested loops: find the shallowest one that has everything. loop = findLoop(outerTables); if (loop == null) return null; } if (loop.indexColumns != null) { // Can check the index column(s) before it's used for lookup. PlanNode loader = getSingleIndexLoader(loop, outerTables); if (loader != null) return loader; } Set<ColumnSource> allTables = dependencies.getReferencedTables(); if ((singleTable == null) && (outerTables != null)) { // Might still narrow down to a single table within this loop. allTables.removeAll(outerTables); if (allTables.size() == 1) singleTable = allTables.iterator().next(); } if (singleTable != null) { return loop.loaders.get(singleTable); } if ((loop.flattens != null) && !allTables.isEmpty()) { flattens: for (PlanNode flatten : loop.flattens) { // Find the first (deepest) flatten that has all the tables we need. Set<TableSource> tables = loop.flattened.get(flatten); for (ColumnSource table : allTables) { if (!tables.contains(table)) continue flattens; } return flatten; } } return null; } /** Find the first loop that has enough to evaluate the condition. */ public Loop findLoop(Set<TableSource> outerTables) { for (Loop loop : loops) { if (loop.indexOnly()) { // If the map loop is just an index, have to // look at individual columns. Set<TableSource> maybeOuterTables = null; if (outerTables != null) // Even though index only has some columns, can exclude whole // tables for purposes of deeper loops. maybeOuterTables = new HashSet<>(); boolean allFound = true; for (ColumnExpression column : dependencies.getReferencedColumns()) { if (outerTables != null) { if (outerTables.contains(column.getTable())) continue; } if (loop.indexColumns.containsKey(column)) { if (maybeOuterTables != null) maybeOuterTables.add((TableSource)column.getTable()); } else { allFound = false; } } if (allFound) return loop; if (maybeOuterTables != null) outerTables.addAll(maybeOuterTables); } else { boolean allFound = true; for (ColumnSource referencedTable : dependencies.getReferencedTables()) { if (outerTables != null) { if (outerTables.contains(referencedTable)) continue; } if (!loop.loaders.containsKey(referencedTable)) { allFound = false; break; } } if (allFound) return loop; if (outerTables != null) // Not moving to this loop; its tables are then available. outerTables.addAll(loop.loaders.keySet()); } } return null; } /** If all the referenced columns come from the same index, return it. */ public PlanNode getSingleIndexLoader(Loop loop, Set<TableSource> outerTables) { PlanNode single = null; for (ColumnExpression column : dependencies.getReferencedColumns()) { if (outerTables != null) { if (outerTables.contains(column.getTable())) continue; } PlanNode loader = loop.indexColumns.get(column); if (loader == null) return null; if (single == null) single = loader; else if (single != loader) return null; } return single; } /** Move the given condition to a Select that is right after the given node. */ public void moveCondition(ConditionExpression condition, PlanNode before) { Select select = null; PlanWithInput after = before.getOutput(); if (after instanceof Select) select = (Select)after; else { select = new Select(before, new ConditionList(1)); after.replaceInput(before, select); } select.getConditions().add(condition); } } }