SelectSubPlanAssembler.java example

Explorer
h-store-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2010 VoltDB L.L.C.
 *
 * VoltDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * VoltDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.planner;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

import org.voltdb.catalog.Database;
import org.voltdb.catalog.Table;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.IndexScanPlanNode;
import org.voltdb.plannodes.NestLoopIndexPlanNode;
import org.voltdb.plannodes.NestLoopPlanNode;
import org.voltdb.plannodes.ReceivePlanNode;
import org.voltdb.types.JoinType;

/**
 * For a select, delete or update plan, this class builds the part of the plan
 * which collects tuples from relations. Given the tables and the predicate
 * (and sometimes the output columns), this will build a plan that will output
 * matching tuples to a temp table. A delete, update or send plan node can then
 * be glued on top of it. In selects, aggregation and other projections are also
 * done on top of the result from this class.
 *
 */
public class SelectSubPlanAssembler extends SubPlanAssembler {

    /** The list of generated plans. This allows their generation in batches.*/
    ArrayDeque<AbstractPlanNode> m_plans = new ArrayDeque<AbstractPlanNode>();

    /** The list of all possible join orders, assembled by queueAllJoinOrders */
    ArrayDeque<Table[]> m_joinOrders = new ArrayDeque<Table[]>();

    /**
     *
     * @param db The catalog's Database object.
     * @param parsedStmt The parsed and dissected statement object describing the sql to execute.
     * @param singlePartition Does this statement access one or multiple partitions?
     */
    SelectSubPlanAssembler(PlannerContext context, Database db, AbstractParsedStmt parsedStmt,
                           boolean singlePartition, int partitionCount)
    {
        super(context, db, parsedStmt, singlePartition, partitionCount);
        queueAllJoinOrders();
    }

    /**
     * Compute every permutation of the list of involved tables and put them in a deque.
     */
    private void queueAllJoinOrders() {
        // inserts can't have predicates
        assert(((m_parsedStmt instanceof ParsedInsertStmt) && (m_parsedStmt.where != null)) == false);
        // only selects can have more than one table
        if (m_parsedStmt.tableList.size() > 1)
            assert(m_parsedStmt instanceof ParsedSelectStmt);

        // these just shouldn't happen right?
        assert(m_parsedStmt.multiTableSelectionList.size() == 0);
        assert(m_parsedStmt.noTableSelectionList.size() == 0);

        // create arrays of the tables to permute them
        Table[] inputTables = new Table[m_parsedStmt.tableList.size()];
        Table[] outputTables = new Table[m_parsedStmt.tableList.size()];

        // fill the input table with tables from the parsed statement structure
        for (int i = 0; i < inputTables.length; i++)
            inputTables[i] = m_parsedStmt.tableList.get(i);

        // use recursion to solve...
        queueSubJoinOrders(inputTables, outputTables, 0);
    }

    /**
     * Recursively add all join orders (permutations) for the input table list.
     *
     * @param inputTables An array of tables to order.
     * @param outputTables A scratch space for recursion for an array of tables. Making this a parameter
     * might make the procedure a slight bit faster than if it was a return value.
     * @param place The index of the table to permute (all tables before index=place are fixed).
     */
    private void queueSubJoinOrders(Table[] inputTables, Table[] outputTables, int place) {

        // recursive stopping condition:
        //
        // stop when there is only one place and one table to permute
        if (place == inputTables.length) {
            m_joinOrders.add(outputTables.clone());
            return;
        }

        // recursive step:
        //
        // pick all possible options for the current
        for (int i = 0; i < outputTables.length; i++) {
            // choose a candidate table for this place
            outputTables[place] = inputTables[i];

            // don't select tables that have been chosen before
            boolean duplicate = false;
            for (int j = 0; j < place; j++) {
                if (outputTables[j].getTypeName().equalsIgnoreCase(outputTables[place].getTypeName())) {
                    duplicate = true;
                    break;
                }
            }
            if (duplicate)
                continue;

            // recursively call this function to permute the remaining places
            queueSubJoinOrders(inputTables, outputTables, place + 1);
        }
    }

    /**
     * Pull a join order out of the join orders deque, compute all possible plans
     * for that join order, then append them to the computed plans deque.
     */
    @Override
    protected AbstractPlanNode nextPlan() {

        // repeat (usually run once) until plans are created
        // or no more plans can be created
        while (m_plans.size() == 0) {
            // get the join order for us to make plans out of
            Table[] joinOrder = m_joinOrders.poll();

            // no more join orders => no more plans to generate
            if (joinOrder == null)
                return null;

            // generate more plans
            generateMorePlansForJoinOrder(joinOrder);
        }
        return m_plans.poll();
    }

    /**
     * Given a specific join order, compute all possible sub-plan-graphs for that
     * join order and add them to the deque of plans. If this doesn't add plans,
     * it doesn't mean no more plans can be generated. It's possible that the
     * particular join order it got had no reasonable plans.
     *
     * @param joinOrder An array of tables in the join order.
     */
    private void generateMorePlansForJoinOrder(Table[] joinOrder) {
        assert(joinOrder != null);
        assert(m_plans.size() == 0);

        // compute the reasonable access paths for all tables
        //HashMap<Table, ArrayList<Index[]>> accessPathOptions = generateAccessPathsForEachTable(joinOrder);
        // compute all combinations of access paths for this particular join order
        ArrayList<AccessPath[]> listOfAccessPathCombos = generateAllAccessPathCombinationsForJoinOrder(joinOrder);

        // for each access path
        for (AccessPath[] accessPath : listOfAccessPathCombos) {
            // get a plan
            AbstractPlanNode scanPlan = getSelectSubPlanForAccessPath(joinOrder, accessPath);
            m_plans.add(scanPlan);
        }
    }

    /**
     * Given a specific join order and access path set for that join order, construct the plan
     * that gives the right tuples. This method is the meat of sub-plan-graph generation, but all
     * of the smarts are probably done by now, so this is just boring actual construction.
     *
     * @param joinOrder An array of tables in a specific join order.
     * @param accessPath An array of access paths that match with the input tables.
     * @return A completed plan-sub-graph that should match the correct tuples from the
     * correct tables.
     */
    private AbstractPlanNode getSelectSubPlanForAccessPath(Table[] joinOrder, AccessPath[] accessPath) {

        // recursive stopping condition:
        //
        // If there is one table to scan from, then just
        if (joinOrder.length == 1)
            return getAccessPlanForTable(joinOrder[0], accessPath[0]);

        // recursive step:
        //
        // create copies of the tails of the joinOrder and accessPath arrays
        Table[] subJoinOrder = Arrays.copyOfRange(joinOrder, 1, joinOrder.length);
        AccessPath[] subAccessPath = Arrays.copyOfRange(accessPath, 1, accessPath.length);

        // recursively call this method to get the plan for the tail of the join order
        AbstractPlanNode subPlan = getSelectSubPlanForAccessPath(subJoinOrder, subAccessPath);

        // get all the clauses that join the applicable two tables
        ArrayList<AbstractExpression> joinClauses = accessPath[0].joinExprs;

        AbstractPlanNode nljAccessPlan = getAccessPlanForTable(joinOrder[0], accessPath[0]);

        /*
         * If the access plan for the table in the join order was for a
         * distributed table scan there will be a send/receive pair at the top.
         * The optimizations (nestloop, nestloopindex) that follow don't care
         * about the send/receive pair pop up the IndexScanPlanNode or
         * ScanPlanNode for them to work on.
         */
        boolean accessPlanIsSendReceive = false;
        AbstractPlanNode accessPlanTemp = nljAccessPlan;
        if (nljAccessPlan instanceof ReceivePlanNode) {
            accessPlanIsSendReceive = true;
            nljAccessPlan = nljAccessPlan.getChild(0).getChild(0);
            nljAccessPlan.clearParents();
        }

        AbstractPlanNode retval = null;
        if (nljAccessPlan instanceof IndexScanPlanNode) {
            NestLoopIndexPlanNode nlijNode = new NestLoopIndexPlanNode(m_context, PlanAssembler.getNextPlanNodeId());

            nlijNode.setJoinType(JoinType.INNER);

            IndexScanPlanNode innerNode = (IndexScanPlanNode) nljAccessPlan;

            //
            // Now we have to update the column references used by the inner node
            //
            subPlan.updateOutputColumns(m_db);
            final List<Integer> outputColumns = subPlan.getOutputColumnGUIDs();
            final int offset = outputColumns.size();
            if (innerNode.getPredicate() != null) {
                try {
                    innerNode.setPredicate(ExpressionUtil.clone(innerNode.getPredicate()));
                } catch (Exception e) {
                    e.printStackTrace();
                }
//                System.out.println("Join Tables: ");
//                for (Table t : joinOrder)
//                {
//                  System.out.println("Table Name: " + t.getName());
//                }
                //System.out.println("Node type: " + innerNode.getPlanNodeType() + " offset #: " + offset);
                ExpressionUtil.setAndOffsetColumnIndexes(
                        m_context,
                        innerNode.getPredicate(),
                        offset, joinOrder[0].getTypeName(),
                        outputColumns);
            }

            if (innerNode.getEndExpression() != null) {
                try {
                    innerNode.setEndExpression(ExpressionUtil.clone(innerNode.getEndExpression()));
                } catch (Exception e) {
                    e.printStackTrace();
                }
                ExpressionUtil.setAndOffsetColumnIndexes(
                        m_context, innerNode.getEndExpression(), offset, joinOrder[0].getTypeName(), outputColumns);
            }

            ArrayList<AbstractExpression> searchKeyExpressions = new ArrayList<AbstractExpression>(innerNode.getSearchKeyExpressions());
            innerNode.getSearchKeyExpressions().clear();
            for (int ctr = 0, cnt = searchKeyExpressions.size(); ctr < cnt; ctr++) {
                AbstractExpression expr = null;
                try {
                    expr = ExpressionUtil.clone(searchKeyExpressions.get(ctr));
                } catch (Exception e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
                ExpressionUtil.setColumnIndexes(m_context, expr, outputColumns);
                innerNode.getSearchKeyExpressions().add(expr);
            }

            nlijNode.addInlinePlanNode(nljAccessPlan);

            // combine the tails plan graph with the new head node
            nlijNode.addAndLinkChild(subPlan);

            retval = nlijNode;
        }
        else {
            NestLoopPlanNode nljNode = new NestLoopPlanNode(m_context, PlanAssembler.getNextPlanNodeId());
            if ((joinClauses != null) && (joinClauses.size() > 0))
                nljNode.setPredicate(ExpressionUtil.combine(joinClauses));
            nljNode.setJoinType(JoinType.LEFT);

            // combine the tails plan graph with the new head node
            nljNode.addAndLinkChild(nljAccessPlan);

            nljNode.addAndLinkChild(subPlan);

            retval = nljNode;
        }

        /*
         * Now push back to the send receive pair that was squirreled away earlier.
         */
        if (accessPlanIsSendReceive) {
            accessPlanTemp.getChild(0).clearChildren();
            accessPlanTemp.getChild(0).addAndLinkChild(retval);
            retval = accessPlanTemp;
        }

        return retval;
    }

    /**
     * For each table in the list, compute the set of all valid access paths that will get
     * tuples that match the right predicate (assuming there is a predicate).
     *
     * @param tables The array of tables we are computing paths for.
     * @return A map that contains a list of access paths for each table in the input array.
     * An access path is an array of indexes (possibly empty).
     */
    private HashMap<Table, ArrayList<AccessPath>> generateAccessPathsForEachTable(Table[] tables) {
        // this means just use full scans for all access paths (for now).
        // an access path is a list of indexes (possibly empty)
        HashMap<Table, ArrayList<AccessPath>> retval = new HashMap<Table, ArrayList<AccessPath>>();

        // for each table, just add the empty access path (the full table scan)
        for (int i = 0; i < tables.length; i++) {
            Table currentTable = tables[i];
            Table nextTables[] = new Table[tables.length - (i + 1)];
            System.arraycopy(tables, i + 1, nextTables, 0, tables.length - (i + 1));
            ArrayList<AccessPath> paths = getRelevantAccessPathsForTable(currentTable, nextTables);
            retval.put(tables[i], paths);
        }

        return retval;
    }

    /**
     * Given a join order, compute a list of all combinations of access paths. This will return a list
     * of sets of specific ways to access each table in a join order. It is called recursively.
     *
     * @param joinOrder The list of tables in this sub-select in a particular order.
     * @param accessPathOptions The list of ways to access each table for this sub-select.
     * @return A list of lists of lists (ugh). For a given table, an access path is a list of indexes
     * which might be empty. Given a join order, a complete access path for that join order is an
     * array (one slot per table) of access paths. The list of all possible complete access paths is
     * returned.
     */
    private ArrayList<AccessPath[]> generateAllAccessPathCombinationsForJoinOrder(Table[] joinOrder){

        HashMap<Table, ArrayList<AccessPath>> accessPathOptions = generateAccessPathsForEachTable(joinOrder);

        // An access path for a table is a an Index[]
        // A complete access path for a join order is an Index[][]
        // All possible complete access paths is an ArrayList<Index[][]>
        ArrayList<AccessPath[]> retval = new ArrayList<AccessPath[]>();

        // recursive stopping condition:
        //
        // if this is a single-table select, then this will be pretty easy
        if (joinOrder.length == 1) {
            // walk through all the access paths for this single table and put them
            // in the list of all possible access paths
            for (AccessPath path : accessPathOptions.get(joinOrder[0])) {
                AccessPath[] paths = new AccessPath[1];
                paths[0] = path;
                retval.add(paths);
            }
            return retval;
        }

        // recursive step:
        //
        // if we get here, assume join order is multi-table

        // make a copy of the tail (list - head) of the join order array
        Table[] subJoinOrder = Arrays.copyOfRange(joinOrder, 1, joinOrder.length);

        // recursively get all possible access path combinations for the tail of the join order
        ArrayList<AccessPath[]> subList = generateAllAccessPathCombinationsForJoinOrder(subJoinOrder);

        // get all possible access paths for the head, and glue them onto the options for the tail
        for (AccessPath path : accessPathOptions.get(joinOrder[0])) {
            // take the selected path for the head and cross-product with all tail options
            for (AccessPath[] choice : subList) {
                AccessPath[] paths = new AccessPath[joinOrder.length];
                paths[0] = path;
                assert(choice.length == subJoinOrder.length);
                for (int i = 0; i < choice.length; i++)
                    paths[i + 1] = choice[i];
                retval.add(paths);
            }
        }

        return retval;
    }

    /**
     * Determines whether a table will require a distributed scan.
     * @param table The table that may or may not require a distributed scan
     * @return true if the table requires a distributed scan, false otherwise
     */
    @Override
    protected boolean tableRequiresDistributedScan(Table table) {
        return ((m_singlePartition == false) && (table.getIsreplicated() == false));
    }

}