SelectPreponer.java example

Explorer
sql-layer-master
/**
 * Copyright (C) 2009-2013 FoundationDB, LLC
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.foundationdb.sql.optimizer.rule;

import com.foundationdb.sql.optimizer.plan.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

/** Move WHERE clauses closer to their table origin.
 * This rule runs after joins of various sorts has been laid out, but
 * before while they are still in data-flow order.
 *
 * Note: <i>prepone</i>, while not an American or British English
 * word, is the transparent opposite of <i>postpone</i>.
 */
// TODO: Something similar is needed to handle moving HAVING
// conditions on the group by fields across the aggregation boundary
// and WHERE conditions on subqueries (views) into the subquery
// itself. These need to run earlier to affect indexing. Not sure how
// to integrate all these. Maybe move everything earlier on and then
// recognize joins of such filtered tables as Joinable.
public class SelectPreponer extends BaseRule
{
    private static final Logger logger = LoggerFactory.getLogger(SelectPreponer.class);

    @Override
    protected Logger getLogger() {
        return logger;
    }

    @Override
    public void apply(PlanContext plan) {
        TableOriginFinder finder = new TableOriginFinder();
        finder.find(plan.getPlan());
        Preponer preponer = new Preponer();
        for (PlanNode origin : finder.getOrigins()) {
            preponer.addOrigin(origin);
        }
        preponer.moveDeferred();
    }
    
    /** Find all the places where data starts, such as
     * <code>IndexScan</code> and <code><i>XxxLookup</i></code>.
     */
    static class TableOriginFinder implements PlanVisitor, ExpressionVisitor {
        List<PlanNode> origins = new ArrayList<>();

        public void find(PlanNode root) {
            root.accept(this);
        }

        public List<PlanNode> getOrigins() {
            return origins;
        }

        @Override
        public boolean visitEnter(PlanNode n) {
            return visit(n);
        }

        @Override
        public boolean visitLeave(PlanNode n) {
            return true;
        }

        @Override
        public boolean visit(PlanNode n) {
            if (n instanceof IndexScan) {
                origins.add(n);
            }
            else if (n instanceof TableLoader) {
                if (n instanceof BasePlanWithInput) {
                    PlanNode input = ((BasePlanWithInput)n).getInput();
                    if (!((input instanceof TableLoader) ||
                          (input instanceof IndexScan))) {
                        // Will put input in, so don't bother putting both in.
                        origins.add(n);
                    }
                }
                else {
                    origins.add(n);
                }
            }
            return true;
        }

        @Override
        public boolean visitEnter(ExpressionNode n) {
            return visit(n);
        }

        @Override
        public boolean visitLeave(ExpressionNode n) {
            return true;
        }

        @Override
        public boolean visit(ExpressionNode n) {
            return true;
        }
    }

    /** Holds the state of a single side of a loop, which usually
     * means a group with its in-group joins.
     */
    static class Loop {
        Map<TableSource,PlanNode> loaders; // Lookup operators.
        Map<ExpressionNode,PlanNode> indexColumns; // Individual columns of IndexScan.
        List<PlanNode> flattens; // Flatten & Product operators that do in-group join.
        Map<PlanNode,Set<TableSource>> flattened; // Tables that participate in those.
        
        public Loop() {
            loaders = new HashMap<>();
        }

        public void setIndex(IndexScan index) {
            indexColumns = new HashMap<>();
            for (int i = 0; i < index.getColumns().size(); i++) {
                ExpressionNode column = index.getColumns().get(i);
                if ((column != null) && index.isRecoverableAt(i)) {
                    indexColumns.put(column, index);
                }
            }
        }

        public void addLoader(PlanNode loader) {
            for (TableSource table : ((TableLoader)loader).getTables()) {
                loaders.put(table, loader);
            }
        }

        /** Add a within-group join: Flatten or Product. */
        public Set<TableSource> addFlattenOrProduct(PlanNode join) {
            if (flattens == null)
                flattens = new ArrayList<>();
            flattens.add(join);

            // Might be able to place multi-table conditions after a flatten join,
            // so record what is available.
            if (flattened == null)
                flattened = new HashMap<>();
            Set<TableSource> tables = new HashSet<>(loaders.keySet());
            flattened.put(join, tables);
            return tables;
        }

        public void addFlatten(Flatten flatten) {
            // Limit to tables that are inner joined (and on the outer
            // side of outer joins.)
            Set<TableSource> inner = flatten.getInnerJoinedTables();
            loaders.keySet().retainAll(inner);
            if (indexColumns != null) {
                Iterator<ExpressionNode> iter = indexColumns.keySet().iterator();
                while (iter.hasNext()) {
                    ExpressionNode expr = iter.next();
                    if (expr.isColumn() && 
                        !inner.contains(((ColumnExpression)expr).getTable()))
                        iter.remove();
                }
            }
            // A Flatten can get more tables than directly feed it when in a Product.
            // Really, it's the Lookup_Nested that gets them, but the
            // sources don't advertize that, since only one node is
            // allowed to introduce a table.
            addFlattenOrProduct(flatten).addAll(inner);
        }

        /** Merge another loop into this one. Although
         * <code>Product</code> starts with separate lookup operators,
         * it's a single loop for purposes of nesting.
         */
        public Loop merge(Loop other, PlanNode before) {
            loaders.putAll(other.loaders);
            if (indexColumns == null)
                indexColumns = other.indexColumns;
            else if (other.indexColumns != null)
                indexColumns.putAll(other.indexColumns);
            if (flattens == null)
                flattens = other.flattens;
            else if (other.flattens != null) {
                int i = -1;
                if (before != null)
                    i = flattens.indexOf(before);
                if (i < 0)
                    i = flattens.size();
                for (PlanNode flatten : other.flattens) {
                    if (flatten == before) 
                        break;
                    flattens.add(i++, flatten);
                }
            }
            if (flattened == null)
                flattened = other.flattened;
            else if (other.flattened != null) {
                for (Map.Entry<PlanNode,Set<TableSource>> entry : other.flattened.entrySet()) {
                    Set<TableSource> existing = flattened.get(entry.getKey());
                    if (existing != null)
                        existing.addAll(entry.getValue());
                    else
                        flattened.put(entry.getKey(), entry.getValue());
                }
            }
            return this;
        }

        /** Does this loop have any interesting state? */
        public boolean isEmpty() {
            return ((flattens == null) ||
                    (loaders.isEmpty() &&
                     ((indexColumns == null) || indexColumns.isEmpty())));
        }

        /** Does this loop consist solely of an index? */
        public boolean indexOnly() {
            return (loaders.isEmpty() && 
                    !((indexColumns == null) || indexColumns.isEmpty()));
        }
    }

    /** Move conditions as follows:
     * 
     * Starting with index scans and lookup operators, trace
     * downstream, adding tables from additional such operators. When
     * we come to a <code>Product</code>, merge with any other
     * streams. When we come to a <code>MapJoin</code>, note the
     * traversal of its loops, which corresponds to bindings being
     * available to inner loops.
     *
     * When we finally come to a <code>Select</code>, move conditions from it down to
     * earlier operators:<ul>
     * <li>If the condition only uses columns from an index, right after the scan.</li>
     * <li>If the condition uses columns from a single table, right
     * after that table is looked up.</li>
     * <li>If the condition uses multiple tables in a single group, when they are joined
     * together by <code>Flatten</code> or <code>Product</code></li>
     * <li>Tables from outer loops using <code>MapJoin</code>, which are available to
     *  the inner loop, can be ignored in the above.</li></ul>
     * 
     * In general, nested loop handling needs to be deferred until all
     * the loops are recorded.
     */
    static class Preponer {
        Map<Product,Loop> products;
        Map<Select,SelectConditions> selects;
        
        public Preponer() {
        }

        /** Starting at the given node, trace downstream until get to
         * some conditions or something we can't handle. */
        public void addOrigin(PlanNode node) {
            Loop loop = new Loop();
            boolean newLoop = true, hasMaps = false, hasProducts = false;
            PlanNode prev = null;
            if (node instanceof IndexScan) {
                loop.setIndex((IndexScan)node);
                prev = node;
                node = node.getOutput();
            }
            while (node instanceof TableLoader) {
                loop.addLoader(node);
                prev = node;
                node = node.getOutput();
            }
            while (true) {
                if (node instanceof Flatten) {
                    // A Flatten takes a single stream of lookups.
                    loop.addFlatten((Flatten)node);
                }
                else if (node instanceof Product) {
                    Product product = (Product)node;
                    if (newLoop) {
                        // Always inner join at present, so no filtering
                        // of sources.
                        loop.addFlattenOrProduct(product);

                        // A Product takes multiple streams, so we may
                        // have seen this one before.
                        if (products == null)
                            products = new HashMap<>();
                        Loop oloop = products.get(product);
                        if (oloop != null) {
                            loop = oloop.merge(loop, product);
                            newLoop = false;
                        }
                        else {
                            products.put(product, loop);
                        }
                    }
                    hasProducts = true;
                }
                else if (node instanceof MapJoin) {
                    MapJoin map = (MapJoin)node;
                    switch (map.getJoinType()) {
                    case INNER:
                        break;
                    case LEFT:
                    case SEMI:
                    case ANTI:
                        if (prev == map.getInner())
                            return;
                        break;
                    default:
                        return;
                    }
                    hasMaps = true;
                }
                else if (node instanceof Select) {
                    Select select = (Select)node;
                    if (!select.getConditions().isEmpty()) {
                        SelectConditions selectConditions = null;
                        boolean newSelect = false;
                        if (selects != null)
                            selectConditions = selects.get(select);
                        if (selectConditions == null) {
                            selectConditions = new SelectConditions(select);
                            newSelect = true;
                        }
                        if (!loop.isEmpty()) {
                            // Try once right away to get single table conditions.
                            selectConditions.moveConditions(loop);
                        }
                        if (select.getConditions().isEmpty()) {
                            if (!newSelect)
                                selects.remove(select);
                        }
                        else {
                            if (hasMaps && newLoop) {
                                selectConditions.addLoop(loop);
                            }
                            if (hasProducts || hasMaps) {
                                // Need to defer until have all the contributors
                                // to the Map joins. Enable reuse for
                                // Product.
                                if (selects == null)
                                    selects = new HashMap<>();
                                selects.put(select, selectConditions);
                            }
                        }
                    }
                }
                else
                    break;
                prev = node;
                node = node.getOutput();
            }
        }

        public void moveDeferred() {
            if (selects != null) {
                for (SelectConditions swm : selects.values()) {
                    if (swm.hasLoops()) {
                        swm.moveConditions(null);
                    }
                }
            }
        }

    }

    /** Holds what is known about inputs to a Select, which may come
     * from multiple <code>MapJoin</code> loops. */
    static class SelectConditions {
        Select select;
        ConditionDependencyAnalyzer dependencies;
        // The loops that are joined up to feed the Select, added in visitor
        // order, meaning that tables from an earlier loop should be available as
        // bound variables to later / deeper ones.
        List<Loop> loops;

        public SelectConditions(Select select) {
            this.select = select;
            dependencies = new ConditionDependencyAnalyzer(select);
        }

        public void addLoop(Loop loop) {
            if (loops == null)
                loops = new ArrayList<>();
            loops.add(loop);
        }

        public boolean hasLoops() {
            return (loops != null);
        }
        
        /** Try to move conditions from <code>Select</code>.
         * @param loop If non-null, have a straight path to these
         * conditions and know where tables came from.  See what can
         * be moved back there.
         */
        public void moveConditions(Loop loop) {
            Iterator<ConditionExpression> iter = select.getConditions().iterator();
            while (iter.hasNext()) {
                ConditionExpression condition = iter.next();
                ColumnSource singleTable = dependencies.analyze(condition);
                PlanNode moveTo = canMove(loop, singleTable);
                if ((moveTo != null) && (moveTo != select.getInput())) {
                    moveCondition(condition, moveTo);
                    iter.remove();
                }
            }
        }
        
        /** Return where this condition can move. */
        // TODO: Could move earlier after subset of joins by breaking apart Flatten.
        public PlanNode canMove(Loop loop, ColumnSource singleTable) {
            Set<TableSource> outerTables = null;
            if (loop == null) {
                // If the condition only references a single table, no
                // need to check outer bindings; it's wherever it is.
                if (singleTable == null)
                    outerTables = new HashSet<>();
                // Several nested loops: find the shallowest one that has everything.
                loop = findLoop(outerTables);
                if (loop == null)
                    return null;
            }
            if (loop.indexColumns != null) {
                // Can check the index column(s) before it's used for lookup.
                PlanNode loader = getSingleIndexLoader(loop, outerTables);
                if (loader != null)
                    return loader;
            }
            Set<ColumnSource> allTables = dependencies.getReferencedTables();
            if ((singleTable == null) && (outerTables != null)) {
                // Might still narrow down to a single table within this loop.
                allTables.removeAll(outerTables);
                if (allTables.size() == 1)
                    singleTable = allTables.iterator().next();
            }
            if (singleTable != null) {
                return loop.loaders.get(singleTable);
            }
            if ((loop.flattens != null) && !allTables.isEmpty()) {
                flattens:
                for (PlanNode flatten : loop.flattens) {
                    // Find the first (deepest) flatten that has all the tables we need.
                    Set<TableSource> tables = loop.flattened.get(flatten);
                    for (ColumnSource table : allTables) {
                        if (!tables.contains(table))
                            continue flattens;
                    }
                    return flatten;
                }
            }
            return null;
        }

        /** Find the first loop that has enough to evaluate the condition. */
        public Loop findLoop(Set<TableSource> outerTables) {
            for (Loop loop : loops) {
                if (loop.indexOnly()) {
                    // If the map loop is just an index, have to
                    // look at individual columns.
                    Set<TableSource> maybeOuterTables = null;
                    if (outerTables != null)
                        // Even though index only has some columns, can exclude whole
                        // tables for purposes of deeper loops.
                        maybeOuterTables = new HashSet<>();
                    boolean allFound = true;
                    for (ColumnExpression column : dependencies.getReferencedColumns()) {
                        if (outerTables != null) {
                            if (outerTables.contains(column.getTable())) 
                                continue;
                        }
                        if (loop.indexColumns.containsKey(column)) {
                            if (maybeOuterTables != null)
                                maybeOuterTables.add((TableSource)column.getTable());
                        }
                        else {
                            allFound = false;
                        }
                    }
                    if (allFound)
                        return loop;
                    if (maybeOuterTables != null)
                        outerTables.addAll(maybeOuterTables);
                }
                else {
                    boolean allFound = true;
                    for (ColumnSource referencedTable : dependencies.getReferencedTables()) {
                        if (outerTables != null) {
                            if (outerTables.contains(referencedTable))
                                continue;
                        }
                        if (!loop.loaders.containsKey(referencedTable)) {
                            allFound = false;
                            break;
                        }
                    }
                    if (allFound)
                        return loop;
                    if (outerTables != null)
                        // Not moving to this loop; its tables are then available.
                        outerTables.addAll(loop.loaders.keySet());
                }
            }
            return null;
        }

        /** If all the referenced columns come from the same index, return it. */
        public PlanNode getSingleIndexLoader(Loop loop,
                                             Set<TableSource> outerTables) {
            PlanNode single = null;
            for (ColumnExpression column : dependencies.getReferencedColumns()) {
                if (outerTables != null) {
                    if (outerTables.contains(column.getTable())) 
                        continue;
                }
                PlanNode loader = loop.indexColumns.get(column);
                if (loader == null)
                    return null;
                if (single == null)
                    single = loader;
                else if (single != loader)
                    return null;
            }
            return single;
        }

        /** Move the given condition to a Select that is right after the given node. */
        public void moveCondition(ConditionExpression condition, PlanNode before) {
            Select select = null;
            PlanWithInput after = before.getOutput();
            if (after instanceof Select)
                select = (Select)after;
            else {
                select = new Select(before, new ConditionList(1));
                after.replaceInput(before, select);
            }
            select.getConditions().add(condition);
        }

    }

}