PlanAssembler.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.planner;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableSet;
import java.util.Set;

import org.json_voltpatches.JSONException;
import org.voltdb.VoltType;
import org.voltdb.catalog.CatalogMap;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.ColumnRef;
import org.voltdb.catalog.Constraint;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Index;
import org.voltdb.catalog.Table;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AggregateExpression;
import org.voltdb.expressions.ConstantValueExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.OperatorExpression;
import org.voltdb.expressions.ParameterValueExpression;
import org.voltdb.expressions.SelectSubqueryExpression;
import org.voltdb.expressions.TupleAddressExpression;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.expressions.WindowFunctionExpression;
import org.voltdb.planner.microoptimizations.MicroOptimizationRunner;
import org.voltdb.planner.parseinfo.BranchNode;
import org.voltdb.planner.parseinfo.JoinNode;
import org.voltdb.planner.parseinfo.StmtSubqueryScan;
import org.voltdb.planner.parseinfo.StmtTableScan;
import org.voltdb.plannodes.AbstractJoinPlanNode;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AbstractReceivePlanNode;
import org.voltdb.plannodes.AbstractScanPlanNode;
import org.voltdb.plannodes.AggregatePlanNode;
import org.voltdb.plannodes.DeletePlanNode;
import org.voltdb.plannodes.HashAggregatePlanNode;
import org.voltdb.plannodes.IndexScanPlanNode;
import org.voltdb.plannodes.IndexSortablePlanNode;
import org.voltdb.plannodes.IndexUseForOrderBy;
import org.voltdb.plannodes.InsertPlanNode;
import org.voltdb.plannodes.LimitPlanNode;
import org.voltdb.plannodes.MaterializePlanNode;
import org.voltdb.plannodes.MergeReceivePlanNode;
import org.voltdb.plannodes.NestLoopPlanNode;
import org.voltdb.plannodes.NodeSchema;
import org.voltdb.plannodes.OrderByPlanNode;
import org.voltdb.plannodes.PartialAggregatePlanNode;
import org.voltdb.plannodes.ProjectionPlanNode;
import org.voltdb.plannodes.ReceivePlanNode;
import org.voltdb.plannodes.SchemaColumn;
import org.voltdb.plannodes.SendPlanNode;
import org.voltdb.plannodes.SeqScanPlanNode;
import org.voltdb.plannodes.SwapTablesPlanNode;
import org.voltdb.plannodes.UnionPlanNode;
import org.voltdb.plannodes.UpdatePlanNode;
import org.voltdb.plannodes.WindowFunctionPlanNode;
import org.voltdb.types.ConstraintType;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.IndexType;
import org.voltdb.types.JoinType;
import org.voltdb.types.PlanNodeType;
import org.voltdb.types.SortDirectionType;
import org.voltdb.utils.CatalogUtil;

/**
 * The query planner accepts catalog data, SQL statements from the catalog, then
 * outputs a set of complete and correct query plans. It will output MANY plans
 * and some of them will be stupid. The best plan will be selected by computing
 * resource usage statistics for the plans, then using those statistics to
 * compute the cost of a specific plan. The plan with the lowest cost wins.
 *
 */
public class PlanAssembler {
    // The convenience struct to accumulate results after parsing multiple statements
    private static class ParsedResultAccumulator {
        public final boolean m_orderIsDeterministic;
        public final boolean m_hasLimitOrOffset;
        public final String m_isContentDeterministic;

        public ParsedResultAccumulator(boolean orderIsDeterministic,
                                       boolean hasLimitOrOffset,
                                       String isContentDeterministic)
        {
            m_orderIsDeterministic = orderIsDeterministic;
            m_hasLimitOrOffset  = hasLimitOrOffset;
            m_isContentDeterministic = isContentDeterministic;
        }
    }

    /** convenience pointer to the database object in the catalog */
    private final Database m_catalogDb;

    /** parsed statement for an insert */
    private ParsedInsertStmt m_parsedInsert = null;
    /** parsed statement for an update */
    private ParsedUpdateStmt m_parsedUpdate = null;
    /** parsed statement for a delete */
    private ParsedDeleteStmt m_parsedDelete = null;
    /** parsed statement for a swap */
    private ParsedSwapStmt m_parsedSwap = null;
    /** parsed statement for a select */
    private ParsedSelectStmt m_parsedSelect = null;
    /** parsed statement for a union */
    private ParsedUnionStmt m_parsedUnion = null;

    /** plan selector */
    private final PlanSelector m_planSelector;

    /** Describes the specified and inferred partition context. */
    private StatementPartitioning m_partitioning;

    /** Error message */
    private String m_recentErrorMsg;

    /**
     * Used to generate the table-touching parts of a plan. All join-order and
     * access path selection stuff is done by the SelectSubPlanAssember.
     */
    private SubPlanAssembler m_subAssembler = null;

    /**
     * Flag when the only expected plan for a statement has already been generated.
     */
    private boolean m_bestAndOnlyPlanWasGenerated = false;

    /**
     * @param catalogDb
     *            Catalog info about schema, metadata and procedures.
     * @param partitioning
     *            Describes the specified and inferred partition context.
     */
    PlanAssembler(Database catalogDb, StatementPartitioning partitioning, PlanSelector planSelector) {
        m_catalogDb = catalogDb;
        m_partitioning = partitioning;
        m_planSelector = planSelector;
    }

    String getSQLText() {
        if (m_parsedDelete != null) {
            return m_parsedDelete.m_sql;
        }

        if (m_parsedInsert != null) {
            return m_parsedInsert.m_sql;
        }

        if (m_parsedUpdate != null) {
            return m_parsedUpdate.m_sql;
        }

        if (m_parsedSelect != null) {
            return m_parsedSelect.m_sql;
        }

        assert(false);
        return null;
    }

    /**
     * Return true if tableList includes at least one matview.
     */
    private boolean tableListIncludesReadOnlyView(List<Table> tableList) {
        NavigableSet<String> exportTables = CatalogUtil.getExportTableNames(m_catalogDb);
        for (Table table : tableList) {
            if (table.getMaterializer() != null && !exportTables.contains(table.getMaterializer().getTypeName())) {
                return true;
            }
        }
        return false;
    }

    /**
     * Return true if tableList includes at least one export table.
     */
    private boolean tableListIncludesExportOnly(List<Table> tableList) {
        // list of all export tables (assume uppercase)
        NavigableSet<String> exportTables = CatalogUtil.getExportTableNames(m_catalogDb);

        // this loop is O(number-of-joins * number-of-export-tables)
        // which seems acceptable if not great. Probably faster than
        // re-hashing the export only tables for faster lookup.
        for (Table table : tableList) {
            if (exportTables.contains(table.getTypeName())) {
                return true;
            }
        }

        return false;
    }

    private boolean isPartitionColumnInGroupbyList(List<ParsedColInfo> groupbyColumns) {
        assert(m_parsedSelect != null);

        if (groupbyColumns == null) {
            return false;
        }

        for (ParsedColInfo groupbyCol : groupbyColumns) {
            StmtTableScan scanTable = m_parsedSelect.getStmtTableScanByAlias(groupbyCol.tableAlias);
            // table alias may be from AbstractParsedStmt.TEMP_TABLE_NAME.
            if (scanTable != null && scanTable.getPartitioningColumns() != null) {
                for (SchemaColumn pcol : scanTable.getPartitioningColumns()) {
                    if  (pcol != null && pcol.getColumnName().equals(groupbyCol.columnName) ) {
                        return true;
                    }
                }
            }
        }

        return false;
    }

    private boolean canPushDownDistinctAggregation(AggregateExpression aggExpr) {
        assert(m_parsedSelect != null);
        assert(aggExpr != null);
        assert(aggExpr.isDistinct());

        if ( aggExpr.getExpressionType() == ExpressionType.AGGREGATE_COUNT_STAR ) {
            return true;
        }

        AbstractExpression aggArg = aggExpr.getLeft();
        // constant
        if (aggArg instanceof ConstantValueExpression ||
                aggArg instanceof ParameterValueExpression) {
            return true;
        }

        if ( ! (aggArg instanceof TupleValueExpression)) {
            return false;
        }

        TupleValueExpression tve = (TupleValueExpression) aggArg;
        String tableAlias = tve.getTableAlias();
        StmtTableScan scanTable = m_parsedSelect.getStmtTableScanByAlias(tableAlias);
        // table alias may be from AbstractParsedStmt.TEMP_TABLE_NAME.
        if (scanTable == null || scanTable.getPartitioningColumns() == null) {
            return false;
        }

        for (SchemaColumn pcol : scanTable.getPartitioningColumns()) {
            if (pcol != null &&
                    pcol.getColumnName().equals(tve.getColumnName()) ) {
                return true;
            }
        }

        return false;
    }

    /**
     * Clear any old state and get ready to plan a new plan. The next call to
     * getNextPlan() will return the first candidate plan for these parameters.
     *
     */
    private void setupForNewPlans(AbstractParsedStmt parsedStmt) {
        m_bestAndOnlyPlanWasGenerated = false;
        m_partitioning.analyzeTablePartitioning(parsedStmt.allScans());

        if (parsedStmt instanceof ParsedUnionStmt) {
            m_parsedUnion = (ParsedUnionStmt) parsedStmt;
            return;
        }

        if (parsedStmt instanceof ParsedSelectStmt) {
            if (tableListIncludesExportOnly(parsedStmt.m_tableList)) {
                throw new PlanningErrorException(
                        "Illegal to read a stream.");
            }

            m_parsedSelect = (ParsedSelectStmt) parsedStmt;
            // Simplify the outer join if possible
            if (m_parsedSelect.m_joinTree instanceof BranchNode) {
                if (! m_parsedSelect.hasJoinOrder()) {
                    simplifyOuterJoin((BranchNode)m_parsedSelect.m_joinTree);
                }

                // Convert RIGHT joins to the LEFT ones
                ((BranchNode)m_parsedSelect.m_joinTree).toLeftJoin();
            }
            m_subAssembler = new SelectSubPlanAssembler(m_catalogDb, m_parsedSelect, m_partitioning);

            // Process the GROUP BY information, decide whether it is group by the partition column
            if (isPartitionColumnInGroupbyList(m_parsedSelect.groupByColumns())) {
                m_parsedSelect.setHasPartitionColumnInGroupby();
            }
            if (isPartitionColumnInWindowedAggregatePartitionByList()) {
                m_parsedSelect.setHasPartitionColumnInWindowedAggregate();
            }

            // FIXME: is the following scheme/comment obsolete?
            // FIXME: turn it on when we are able to push down DISTINCT
//            if (isPartitionColumnInGroupbyList(m_parsedSelect.m_distinctGroupByColumns)) {
//                m_parsedSelect.setHasPartitionColumnInDistinctGroupby();
//            }

            return;
        }

        // @TODO
        // Need to use StmtTableScan instead
        // check that no modification happens to views
        if (tableListIncludesReadOnlyView(parsedStmt.m_tableList)) {
            throw new PlanningErrorException("Illegal to modify a materialized view.");
        }

        m_partitioning.setIsDML();

        // Check that only multi-partition writes are made to replicated tables.
        // figure out which table we're updating/deleting
        if (parsedStmt instanceof ParsedSwapStmt) {
            assert (parsedStmt.m_tableList.size() == 2);
            if (tableListIncludesExportOnly(parsedStmt.m_tableList)) {
                throw new PlanningErrorException("Illegal to swap a stream.");
            }
            m_parsedSwap = (ParsedSwapStmt) parsedStmt;
            return;
        }

        Table targetTable = parsedStmt.m_tableList.get(0);
        if (targetTable.getIsreplicated()) {
            if (m_partitioning.wasSpecifiedAsSingle()
                    && !m_partitioning.isReplicatedDmlToRunOnAllPartitions()) {
                String msg = "Trying to write to replicated table '" + targetTable.getTypeName()
                        + "' in a single-partition procedure.";
                throw new PlanningErrorException(msg);
            }
        }
        else if (m_partitioning.wasSpecifiedAsSingle() == false) {
            m_partitioning.setPartitioningColumnForDML(targetTable.getPartitioncolumn());
        }

        if (parsedStmt instanceof ParsedInsertStmt) {
            m_parsedInsert = (ParsedInsertStmt) parsedStmt;
            // The currently handled inserts are too simple to even require a subplan assembler. So, done.
            return;
        }

        if (parsedStmt instanceof ParsedUpdateStmt) {
            if (tableListIncludesExportOnly(parsedStmt.m_tableList)) {
                throw new PlanningErrorException("Illegal to update a stream.");
            }
            m_parsedUpdate = (ParsedUpdateStmt) parsedStmt;
        }
        else if (parsedStmt instanceof ParsedDeleteStmt) {
            if (tableListIncludesExportOnly(parsedStmt.m_tableList)) {
                throw new PlanningErrorException("Illegal to delete from a stream.");
            }
            m_parsedDelete = (ParsedDeleteStmt) parsedStmt;
        }
        else {
            throw new RuntimeException("Unknown subclass of AbstractParsedStmt.");
        }

        if ( ! m_partitioning.wasSpecifiedAsSingle()) {
            //TODO: When updates and deletes can contain joins, this step may have to be
            // deferred so that the valueEquivalence set can be analyzed per join order.
            // This appears to be an unfortunate side effect of how the HSQL interface
            // misleadingly organizes the placement of join/where filters on the statement tree.
            // This throws off the accounting of equivalence join filters until they can be
            // normalized in analyzeJoinFilters, but that normalization process happens on a
            // per-join-order basis, and so, so must this analysis.
            HashMap<AbstractExpression, Set<AbstractExpression>>
                valueEquivalence = parsedStmt.analyzeValueEquivalence();
            Collection<StmtTableScan> scans = parsedStmt.allScans();
            m_partitioning.analyzeForMultiPartitionAccess(scans, valueEquivalence);
        }
        m_subAssembler = new WriterSubPlanAssembler(m_catalogDb, parsedStmt, m_partitioning);
    }

    private boolean isPartitionColumnInWindowedAggregatePartitionByList() {
        assert (m_parsedSelect != null);
        return (m_parsedSelect.isPartitionColumnInWindowedAggregatePartitionByList());
    }

    private static void failIfNonDeterministicDml(AbstractParsedStmt parsedStmt, CompiledPlan plan) {
        // If we have content non-determinism on DML, then fail planning.
        // This can happen if:
        //   INSERT INTO ... SELECT ... where the select statement has a limit on unordered data.
        //   UPSERT INTO ... SELECT has the same issue, but no limit is required because
        //                      order may determine which rows are updated and which are inserted
        //   DELETE ... ORDER BY <n> LIMIT <n> also has this issue
        // Update doesn't have this issue yet (but having ORDER BY and LIMIT there doesn't seem out
        // of the question).
        // When subqueries in WHERE clauses of DML are allowed, we will need to make sure the
        // subqueries are content-deterministic too.

        if (plan == null || plan.isReadOnly()) {
            return;
        }

        boolean contentDeterministic = plan.isContentDeterministic();
        if (parsedStmt instanceof ParsedInsertStmt && !(plan.isOrderDeterministic() && contentDeterministic)) {
            ParsedInsertStmt parsedInsert = (ParsedInsertStmt)parsedStmt;
            boolean targetHasLimitRowsTrigger = parsedInsert.targetTableHasLimitRowsTrigger();
            String contentDeterministicMsg = "";
            if (!contentDeterministic) {
                contentDeterministicMsg = "  " + plan.nondeterminismDetail();
            }

            if (parsedStmt.m_isUpsert) {
                throw new PlanningErrorException(
                        "UPSERT statement manipulates data in a non-deterministic way.  "
                        + "Adding an ORDER BY clause to UPSERT INTO ... SELECT may address this issue."
                        + contentDeterministicMsg);
            }

            if (targetHasLimitRowsTrigger) {
                throw new PlanningErrorException(
                        "Order of rows produced by SELECT statement in INSERT INTO ... SELECT is "
                        + "non-deterministic.  Since the table being inserted into has a row limit "
                        + "trigger, the SELECT output must be ordered.  Add an ORDER BY clause "
                        + "to address this issue."
                        + contentDeterministicMsg
                        );
            }

            if (plan.hasLimitOrOffset()) {
                throw new PlanningErrorException(
                        "INSERT statement manipulates data in a content non-deterministic way.  "
                        + "Adding an ORDER BY clause to INSERT INTO ... SELECT may address this issue."
                        + contentDeterministicMsg);
            }

            if (!contentDeterministic) {
                throw new PlanningErrorException("INSERT statement manipulates data in a non-deterministic way."
                                                 + contentDeterministicMsg);
            }
        }

        if (parsedStmt instanceof ParsedDeleteStmt
                && !((ParsedDeleteStmt)parsedStmt).sideEffectsAreDeterministic()) {
                throw new PlanningErrorException(
                        "DELETE statement manipulates data in a non-deterministic way.  This may happen "
                                + "when the DELETE has an ORDER BY clause with a LIMIT, but the order is not "
                                + "well-defined.");
        }
    }

    /**
     * Generate the best cost plan for the current SQL statement context.
     *
     * @param parsedStmt Current SQL statement to generate plan for
     * @return The best cost plan or null.
     */
    static String IN_EXISTS_SCALAR_ERROR_MESSAGE = "Subquery expressions are only supported for "
            + "single partition procedures and AdHoc queries referencing only replicated tables.";

    CompiledPlan getBestCostPlan(AbstractParsedStmt parsedStmt) {
        // parse any subqueries that the statement contains
        List<StmtSubqueryScan> subqueryNodes = parsedStmt.getSubqueryScans();
        ParsedResultAccumulator fromSubqueryResult = null;
        if (! subqueryNodes.isEmpty()) {
            fromSubqueryResult = getBestCostPlanForFromSubQueries(subqueryNodes);
            if (fromSubqueryResult == null) {
                // There was at least one sub-query and we should have a compiled plan for it
                return null;
            }
        }

        // Get the best plans for the expression subqueries ( IN/EXISTS (SELECT...) )
        Set<AbstractExpression> subqueryExprs = parsedStmt.findSubquerySubexpressions();
        if ( ! subqueryExprs.isEmpty() ) {

            // guards against IN/EXISTS/Scalar subqueries
            if ( ! m_partitioning.wasSpecifiedAsSingle() ) {
                // Don't allow partitioned tables in subqueries.
                // This restriction stems from the lack of confidence that the
                // planner can reliably identify all cases of adequate and
                // inadequate partition key join criteria across different
                // levels of correlated subqueries.
                for (AbstractExpression e : subqueryExprs) {
                    assert(e instanceof SelectSubqueryExpression);
                    SelectSubqueryExpression subExpr = (SelectSubqueryExpression)e;
                    if (! subExpr.getSubqueryScan().getIsReplicated()) {
                        m_recentErrorMsg = IN_EXISTS_SCALAR_ERROR_MESSAGE;
                        return null;
                    }
                }
            }

            if (!getBestCostPlanForExpressionSubQueries(subqueryExprs)) {
                // There was at least one sub-query and we should have a compiled plan for it
                return null;
            }
        }

        // set up the plan assembler for this statement
        setupForNewPlans(parsedStmt);

        // get ready to find the plan with minimal cost
        CompiledPlan rawplan = null;

        // loop over all possible plans
        while (true) {
            rawplan = getNextPlan();

            // stop this while loop when no more plans are generated
            if (rawplan == null) {
                break;
            }

            // Update the best cost plan so far
            m_planSelector.considerCandidatePlan(rawplan, parsedStmt);
        }

        CompiledPlan retval = m_planSelector.m_bestPlan;
        if (retval == null) {
            return null;
        }

        if (fromSubqueryResult != null) {
            // Calculate the combined state of determinism for the parent and child statements
            boolean orderIsDeterministic = retval.isOrderDeterministic();
            String contentDeterminismDetail = fromSubqueryResult.m_isContentDeterministic;
            if (orderIsDeterministic && ! fromSubqueryResult.m_orderIsDeterministic) {
                //TODO: this reliance on the vague isOrderDeterministicInSpiteOfUnorderedSubqueries test
                // is subject to false negatives for determinism. It misses the subtlety of parent
                // queries that surgically add orderings for specific "key" columns of a subquery result
                // or a subquery-based join for an effectively deterministic result.
                // The first step towards repairing this would involve detecting deterministic and
                // non-deterministic subquery results IN CONTEXT where they are scanned in the parent
                // query, so that the parent query can ensure that ALL the columns from a
                // non-deterministic subquery are later sorted.
                // The next step would be to extend the model for "subquery scans"
                // to identify dependencies / uniqueness constraints in subquery results
                // that can be exploited to impose determinism with fewer parent order by columns
                // -- like just the keys.
                orderIsDeterministic = parsedStmt.isOrderDeterministicInSpiteOfUnorderedSubqueries();
            }
            boolean hasLimitOrOffset =
                    fromSubqueryResult.m_hasLimitOrOffset || retval.hasLimitOrOffset();
            retval.statementGuaranteesDeterminism(hasLimitOrOffset, orderIsDeterministic, contentDeterminismDetail);
            // Need to re-attach the sub-queries plans to the best parent plan. The same best plan for each
            // sub-query is reused with all parent candidate plans and needs to be reconnected with
            // the final best parent plan
            retval.rootPlanGraph = connectChildrenBestPlans(retval.rootPlanGraph);
        }

        /*
         * Find out if the query is inherently content deterministic and
         * remember it.
         */
        String contentDeterminismMessage = parsedStmt.getContentDeterminismMessage();
        if (contentDeterminismMessage != null) {
            retval.setNondeterminismDetail(contentDeterminismMessage);
        }
        failIfNonDeterministicDml(parsedStmt, retval);

        if (m_partitioning != null) {
            retval.setStatementPartitioning(m_partitioning);
        }

        return retval;
    }

    /**
     * Output the best cost plan.
     *
     */
    void finalizeBestCostPlan() {
        m_planSelector.finalizeOutput();
    }

    /**
     * Generate best cost plans for a list of FROM sub-queries.
     * @param subqueryNodes - list of FROM sub-queries.
     * @return ParsedResultAccumulator
     */
    private ParsedResultAccumulator getBestCostPlanForFromSubQueries(List<StmtSubqueryScan> subqueryNodes) {
        int nextPlanId = m_planSelector.m_planId;
        boolean orderIsDeterministic = true;
        boolean hasSignificantOffsetOrLimit = false;
        String isContentDeterministic = null;
        for (StmtSubqueryScan subqueryScan : subqueryNodes) {
            nextPlanId = planForParsedSubquery(subqueryScan, nextPlanId);
            CompiledPlan subqueryBestPlan = subqueryScan.getBestCostPlan();
            if (subqueryBestPlan == null) {
                throw new PlanningErrorException(m_recentErrorMsg);
            }
            orderIsDeterministic &= subqueryBestPlan.isOrderDeterministic();
            if (isContentDeterministic != null && !subqueryBestPlan.isContentDeterministic()) {
                isContentDeterministic = subqueryBestPlan.nondeterminismDetail();
            }
            // Offsets or limits in subqueries are only significant (only effect content determinism)
            // when they apply to un-ordered subquery contents.
            hasSignificantOffsetOrLimit |=
                    (( ! subqueryBestPlan.isOrderDeterministic() ) && subqueryBestPlan.hasLimitOrOffset());
        }

        // need to reset plan id for the entire SQL
        m_planSelector.m_planId = nextPlanId;

        return new ParsedResultAccumulator(orderIsDeterministic,
                                           hasSignificantOffsetOrLimit,
                                           isContentDeterministic);
    }


    /**
     * Generate best cost plans for each Subquery expression from the list
     * @param subqueryExprs - list of subquery expressions
     * @return true if a best plan was generated for each subquery, false otherwise
     */
    private boolean getBestCostPlanForExpressionSubQueries(Set<AbstractExpression> subqueryExprs) {
        int nextPlanId = m_planSelector.m_planId;

        for (AbstractExpression expr : subqueryExprs) {
            assert(expr instanceof SelectSubqueryExpression);
            if (!(expr instanceof SelectSubqueryExpression)) {
                continue; // DEAD CODE?
            }

            SelectSubqueryExpression subqueryExpr = (SelectSubqueryExpression) expr;
            StmtSubqueryScan subqueryScan = subqueryExpr.getSubqueryScan();
            nextPlanId = planForParsedSubquery(subqueryScan, nextPlanId);
            CompiledPlan bestPlan = subqueryScan.getBestCostPlan();
            if (bestPlan == null) {
                return false;
            }

            subqueryExpr.setSubqueryNode(bestPlan.rootPlanGraph);
            // The subquery plan must not contain Receive/Send nodes because it will be executed
            // multiple times during the parent statement execution.
            if (bestPlan.rootPlanGraph.hasAnyNodeOfType(PlanNodeType.SEND)) {
                // fail the whole plan
                m_recentErrorMsg = IN_EXISTS_SCALAR_ERROR_MESSAGE;
                return false;
            }
        }
        // need to reset plan id for the entire SQL
        m_planSelector.m_planId = nextPlanId;

        return true;
    }


    /**
     * Generate a unique and correct plan for the current SQL statement context.
     * This method gets called repeatedly until it returns null, meaning there
     * are no more plans.
     *
     * @return A not-previously returned query plan or null if no more
     *         computable plans.
     */
    private CompiledPlan getNextPlan() {
        CompiledPlan retval;
        AbstractParsedStmt nextStmt = null;
        if (m_parsedSelect != null) {
            nextStmt = m_parsedSelect;
            retval = getNextSelectPlan();
        }
        else if (m_parsedInsert != null) {
            nextStmt = m_parsedInsert;
            retval = getNextInsertPlan();
        }
        else if (m_parsedDelete != null) {
            nextStmt = m_parsedDelete;
            retval = getNextDeletePlan();
            // note that for replicated tables, multi-fragment plans
            // need to divide the result by the number of partitions
        }
        else if (m_parsedUpdate != null) {
            nextStmt = m_parsedUpdate;
            retval = getNextUpdatePlan();
        }
        else if (m_parsedUnion != null) {
            nextStmt = m_parsedUnion;
            retval = getNextUnionPlan();
        }
        else if (m_parsedSwap != null) {
            nextStmt = m_parsedSwap;
            retval = getNextSwapPlan();
        }
        else {
            throw new RuntimeException(
                    "setupForNewPlans encountered unsupported statement type.");
        }

        if (retval == null || retval.rootPlanGraph == null) {
            return null;
        }

        assert (nextStmt != null);
        retval.parameters = nextStmt.getParameters();
        return retval;
    }

    /**
     * This is a UNION specific method. Generate a unique and correct plan
     * for the current SQL UNION statement by building the best plans for each individual statements
     * within the UNION.
     *
     * @return A union plan or null.
     */
    private CompiledPlan getNextUnionPlan() {
        String isContentDeterministic = null;
        // Since only the one "best" plan is considered,
        // this method should be called only once.
        if (m_bestAndOnlyPlanWasGenerated) {
            return null;
        }

        m_bestAndOnlyPlanWasGenerated = true;
        // Simply return an union plan node with a corresponding union type set
        AbstractPlanNode subUnionRoot = new UnionPlanNode(m_parsedUnion.m_unionType);
        m_recentErrorMsg = null;

        ArrayList<CompiledPlan> childrenPlans = new ArrayList<>();
        StatementPartitioning commonPartitioning = null;

        // Build best plans for the children first
        int planId = 0;
        for (AbstractParsedStmt parsedChildStmt : m_parsedUnion.m_children) {
            StatementPartitioning partitioning = (StatementPartitioning)m_partitioning.clone();
            PlanSelector planSelector = (PlanSelector) m_planSelector.clone();
            planSelector.m_planId = planId;
            PlanAssembler assembler = new PlanAssembler(m_catalogDb, partitioning, planSelector);
            CompiledPlan bestChildPlan = assembler.getBestCostPlan(parsedChildStmt);
            partitioning = assembler.m_partitioning;

            // make sure we got a winner
            if (bestChildPlan == null) {
                m_recentErrorMsg = assembler.getErrorMessage();
                if (m_recentErrorMsg == null) {
                    m_recentErrorMsg = "Unable to plan for statement. Error unknown.";
                }
                return null;
            }

            childrenPlans.add(bestChildPlan);
            // Remember the content non-determinism message for the
            // first non-deterministic children we find.
            if (isContentDeterministic != null) {
                isContentDeterministic = bestChildPlan.nondeterminismDetail();
            }

            // Make sure that next child's plans won't override current ones.
            planId = planSelector.m_planId;

            // Decide whether child statements' partitioning is compatible.
            if (commonPartitioning == null) {
                commonPartitioning = partitioning;
                continue;
            }

            AbstractExpression statementPartitionExpression = partitioning.singlePartitioningExpression();
            if (commonPartitioning.requiresTwoFragments()) {
                if (partitioning.requiresTwoFragments() || statementPartitionExpression != null) {
                    // If two child statements need to use a second fragment,
                    // it can't currently be a two-fragment plan.
                    // The coordinator expects a single-table result from each partition.
                    // Also, currently the coordinator of a two-fragment plan is not allowed to
                    // target a particular partition, so neither can the union of the coordinator
                    // and a statement that wants to run single-partition.
                    throw new PlanningErrorException(
                            "Statements are too complex in set operation using multiple partitioned tables.");
                }
                // the new statement is apparently a replicated read and has no effect on partitioning
                continue;
            }

            AbstractExpression commonPartitionExpression = commonPartitioning.singlePartitioningExpression();
            if (commonPartitionExpression == null) {
                // the prior statement(s) were apparently replicated reads
                // and have no effect on partitioning
                commonPartitioning = partitioning;
                continue;
            }

            if (partitioning.requiresTwoFragments()) {
                // Again, currently the coordinator of a two-fragment plan is not allowed to
                // target a particular partition, so neither can the union of the coordinator
                // and a statement that wants to run single-partition.
                throw new PlanningErrorException(
                        "Statements are too complex in set operation using multiple partitioned tables.");
            }

            if (statementPartitionExpression == null) {
                // the new statement is apparently a replicated read and has no effect on partitioning
                continue;
            }

            if ( ! commonPartitionExpression.equals(statementPartitionExpression)) {
                throw new PlanningErrorException(
                        "Statements use conflicting partitioned table filters in set operation or sub-query.");
            }
        }

        if (commonPartitioning != null) {
            m_partitioning = commonPartitioning;
        }

        // need to reset plan id for the entire UNION
        m_planSelector.m_planId = planId;

        // Add and link children plans
        for (CompiledPlan selectPlan : childrenPlans) {
            subUnionRoot.addAndLinkChild(selectPlan.rootPlanGraph);
        }

        // order by
        if (m_parsedUnion.hasOrderByColumns()) {
            subUnionRoot = handleOrderBy(m_parsedUnion, subUnionRoot);
        }

        // limit/offset
        if (m_parsedUnion.hasLimitOrOffset()) {
            subUnionRoot = handleUnionLimitOperator(subUnionRoot);
        }

        CompiledPlan retval = new CompiledPlan();
        retval.rootPlanGraph = subUnionRoot;
        retval.setReadOnly(true);
        retval.sql = m_planSelector.m_sql;
        boolean orderIsDeterministic = m_parsedUnion.isOrderDeterministic();
        boolean hasLimitOrOffset = m_parsedUnion.hasLimitOrOffset();
        retval.statementGuaranteesDeterminism(hasLimitOrOffset, orderIsDeterministic, isContentDeterministic);

        // compute the cost - total of all children
        retval.cost = 0.0;
        for (CompiledPlan bestChildPlan : childrenPlans) {
            retval.cost += bestChildPlan.cost;
        }
        return retval;
    }

    private int planForParsedSubquery(StmtSubqueryScan subqueryScan, int planId) {
        AbstractParsedStmt subQuery = subqueryScan.getSubqueryStmt();
        assert(subQuery != null);
        PlanSelector planSelector = (PlanSelector) m_planSelector.clone();
        planSelector.m_planId = planId;
        StatementPartitioning currentPartitioning = (StatementPartitioning)m_partitioning.clone();
        PlanAssembler assembler = new PlanAssembler(m_catalogDb, currentPartitioning, planSelector);
        CompiledPlan compiledPlan = assembler.getBestCostPlan(subQuery);
        // make sure we got a winner
        if (compiledPlan == null) {
            String tbAlias = subqueryScan.getTableAlias();
            m_recentErrorMsg = "Subquery statement for table " + tbAlias
                    + " has error: " + assembler.getErrorMessage();
            return planSelector.m_planId;
        }

        subqueryScan.setSubqueriesPartitioning(currentPartitioning);

        // Remove the coordinator send/receive pair.
        // It will be added later for the whole plan.
        //TODO: It may make more sense to plan ahead and not generate the send/receive pair
        // at all for subquery contexts where it is not needed.
        if (subqueryScan.canRunInOneFragment()) {
            // The MergeReceivePlanNode always has an inline ORDER BY node and may have
            // LIMIT/OFFSET and aggregation node(s). Removing the MergeReceivePlanNode will
            // also remove its inline node(s) which may produce an invalid access plan.
            // For example,
            // SELECT TC1 FROM (SELECT C1 AS TC1 FROM P ORDER BY C1) PT LIMIT 4;
            // where P is partitioned and C1 is a non-partitioned index column.
            // Removing the subquery MergeReceivePlnaNode and its ORDER BY node results
            // in the invalid access plan - the subquery result order is significant in this case
            // The concern with generally keeping the (Merge)Receive node in the subquery is
            // that it would needlessly generate more-than-2-fragment plans in cases
            // where 2 fragments could have done the job.
            if ( ! compiledPlan.rootPlanGraph.hasAnyNodeOfClass(MergeReceivePlanNode.class)) {
                compiledPlan.rootPlanGraph = removeCoordinatorSendReceivePair(compiledPlan.rootPlanGraph);
            }
        }
        subqueryScan.setBestCostPlan(compiledPlan);
        return planSelector.m_planId;
    }

    /**
     * Remove the coordinator send/receive pair if any from the graph.
     *
     * @param root the complete plan node.
     * @return the plan without the send/receive pair.
     */
    static public AbstractPlanNode removeCoordinatorSendReceivePair(AbstractPlanNode root) {
        assert(root != null);
        return removeCoordinatorSendReceivePairRecursive(root, root);
    }

    static private AbstractPlanNode removeCoordinatorSendReceivePairRecursive(
            AbstractPlanNode root,
            AbstractPlanNode current) {
        if (current instanceof AbstractReceivePlanNode) {
            assert(current.getChildCount() == 1);

            AbstractPlanNode child = current.getChild(0);
            assert(child instanceof SendPlanNode);

            assert(child.getChildCount() == 1);
            child = child.getChild(0);
            child.clearParents();
            if (current == root) {
                return child;
            }
            assert(current.getParentCount() == 1);
            AbstractPlanNode parent = current.getParent(0);
            parent.unlinkChild(current);
            parent.addAndLinkChild(child);
            return root;
        }

        if (current.getChildCount() == 1) {
            // This is still a coordinator node
            return removeCoordinatorSendReceivePairRecursive(root,
                    current.getChild(0));
        }

        // We have hit a multi-child plan node -- a nestloop join or a union.
        // Can we really assume that there is no send/receive below this point?
        // TODO: It seems to me (--paul) that for a replicated-to-partitioned
        // left outer join, we should be following the second (partitioned)
        // child node of a nestloop join.
        // I'm not sure what the correct behavior is for a union.
        return root;
    }

    /**
     * For each Subquery node in the plan tree attach the subquery plan to the parent node.
     * @param initial plan
     * @return A complete plan tree for the entire SQl.
     */
    private AbstractPlanNode connectChildrenBestPlans(AbstractPlanNode parentPlan) {
        if (parentPlan instanceof AbstractScanPlanNode) {
            AbstractScanPlanNode scanNode = (AbstractScanPlanNode) parentPlan;
            StmtTableScan tableScan = scanNode.getTableScan();
            if (tableScan instanceof StmtSubqueryScan) {
                CompiledPlan bestCostPlan = ((StmtSubqueryScan)tableScan).getBestCostPlan();
                assert (bestCostPlan != null);
                AbstractPlanNode subQueryRoot = bestCostPlan.rootPlanGraph;
                subQueryRoot.disconnectParents();
                scanNode.clearChildren();
                scanNode.addAndLinkChild(subQueryRoot);
            }
        }
        else {
            for (int i = 0; i < parentPlan.getChildCount(); ++i) {
                connectChildrenBestPlans(parentPlan.getChild(i));
            }
        }
        return parentPlan;
    }

    private CompiledPlan getNextSelectPlan() {
        assert (m_subAssembler != null);

        // A matview reaggregation template plan may have been initialized
        // with a post-predicate expression moved from the statement's
        // join tree prior to any subquery planning.
        // Since normally subquery planning is driven from the join tree,
        // any subqueries that are moved out of the join tree would need
        // to be planned separately.
        // This planning would need to be done prior to calling
        // m_subAssembler.nextPlan()
        // because it can have query partitioning implications.
        // Under the current query limitations, the partitioning implications
        // are very simple -- subqueries are not allowed in multipartition
        // queries against partitioned data, so detection of a subquery in
        // the same query as a matview reaggregation can just return an error,
        // without any need for subquery planning here.
        HashAggregatePlanNode reAggNode = null;
        HashAggregatePlanNode mvReAggTemplate = m_parsedSelect.m_mvFixInfo.getReAggregationPlanNode();
        if (mvReAggTemplate != null) {
            reAggNode = new HashAggregatePlanNode(mvReAggTemplate);
            AbstractExpression postPredicate = reAggNode.getPostPredicate();
            if (postPredicate != null && postPredicate.hasSubquerySubexpression()) {
                // For now, this is just a special case violation of the limitation on
                // use of subquery expressions in MP queries on partitioned data.
                // That special case was going undetected when we didn't flag it here.
                m_recentErrorMsg = IN_EXISTS_SCALAR_ERROR_MESSAGE;
                return null;
            }

            // // Something more along these lines would have to be enabled
            // // to allow expression subqueries to be used in multi-partition
            // // matview queries.
            // if (!getBestCostPlanForExpressionSubQueries(subqueryExprs)) {
            //     // There was at least one sub-query and we should have a compiled plan for it
            //    return null;
            // }
        }

        AbstractPlanNode subSelectRoot = m_subAssembler.nextPlan();

        if (subSelectRoot == null) {
            m_recentErrorMsg = m_subAssembler.m_recentErrorMsg;
            return null;
        }

        AbstractPlanNode root = subSelectRoot;

        boolean mvFixNeedsProjection = false;
        /*
         * If the access plan for the table in the join order was for a
         * distributed table scan there must be a send/receive pair at the top
         * EXCEPT for the special outer join case in which a replicated table
         * was on the OUTER side of an outer join across from the (joined) scan
         * of the partitioned table(s) (all of them) in the query. In that case,
         * the one required send/receive pair is already in the plan below the
         * inner side of a NestLoop join.
         */
        if (m_partitioning.requiresTwoFragments()) {
            boolean mvFixInfoCoordinatorNeeded = true;
            boolean mvFixInfoEdgeCaseOuterJoin = false;

            ArrayList<AbstractPlanNode> receivers = root.findAllNodesOfClass(AbstractReceivePlanNode.class);
            if (receivers.size() == 1) {
                // The subplan SHOULD be good to go, but just make sure that it doesn't
                // scan a partitioned table except under the ReceivePlanNode that was just found.

                // Edge cases: left outer join with replicated table.
                if (m_parsedSelect.m_mvFixInfo.needed()) {
                    mvFixInfoCoordinatorNeeded = false;
                    AbstractPlanNode receiveNode = receivers.get(0);
                    if (receiveNode.getParent(0) instanceof NestLoopPlanNode) {
                        if (subSelectRoot.hasInlinedIndexScanOfTable(m_parsedSelect.m_mvFixInfo.getMVTableName())) {
                            return getNextSelectPlan();
                        }

                        List<AbstractPlanNode> nljs = receiveNode.findAllNodesOfType(PlanNodeType.NESTLOOP);
                        List<AbstractPlanNode> nlijs = receiveNode.findAllNodesOfType(PlanNodeType.NESTLOOPINDEX);

                        // outer join edge case does not have any join plan node under receive node.
                        // This is like a single table case.
                        if (nljs.size() + nlijs.size() == 0) {
                            mvFixInfoEdgeCaseOuterJoin = true;
                        }
                        root = handleMVBasedMultiPartQuery(reAggNode, root, mvFixInfoEdgeCaseOuterJoin);
                    }
                }
            }
            else {
                if (receivers.size() > 0) {
                    throw new PlanningErrorException(
                            "This special case join between an outer replicated table and " +
                            "an inner partitioned table is too complex and is not supported.");
                }
                root = SubPlanAssembler.addSendReceivePair(root);
                // Root is a receive node here.
                assert(root instanceof ReceivePlanNode);

                if (m_parsedSelect.mayNeedAvgPushdown()) {
                    m_parsedSelect.switchOptimalSuiteForAvgPushdown();
                }
                if (m_parsedSelect.m_tableList.size() > 1 && m_parsedSelect.m_mvFixInfo.needed()
                        && subSelectRoot.hasInlinedIndexScanOfTable(m_parsedSelect.m_mvFixInfo.getMVTableName())) {
                    // MV partitioned joined query needs reAggregation work on coordinator.
                    // Index scan on MV table can not be supported.
                    // So, in-lined index scan of Nested loop index join can not be possible.
                    return getNextSelectPlan();
                }
            }

            root = handleAggregationOperators(root);

            // Process the re-aggregate plan node and insert it into the plan.
            if (m_parsedSelect.m_mvFixInfo.needed() && mvFixInfoCoordinatorNeeded) {
                AbstractPlanNode tmpRoot = root;
                root = handleMVBasedMultiPartQuery(reAggNode, root, mvFixInfoEdgeCaseOuterJoin);
                if (root != tmpRoot) {
                    mvFixNeedsProjection = true;
                }
            }
        }
        else {
            /*
             * There is no receive node and root is a single partition plan.
             */

            // If there is no receive plan node and no distributed plan has been generated,
            // the fix set for MV is not needed.
            m_parsedSelect.m_mvFixInfo.setNeeded(false);
            root = handleAggregationOperators(root);
        }

        // If we have a windowed expression in the display list we want to
        // add a PartitionByPlanNode here.
        if (m_parsedSelect.hasWindowFunctionExpression()) {
            root = handleWindowedOperators(root);
        }

        if (m_parsedSelect.hasOrderByColumns()) {
            root = handleOrderBy(m_parsedSelect, root);
            if (m_parsedSelect.isComplexOrderBy() && root instanceof OrderByPlanNode) {
                AbstractPlanNode child = root.getChild(0);
                AbstractPlanNode grandChild = child.getChild(0);

                // swap the ORDER BY and complex aggregate Projection node
                if (child instanceof ProjectionPlanNode) {
                    root.unlinkChild(child);
                    child.unlinkChild(grandChild);

                    child.addAndLinkChild(root);
                    root.addAndLinkChild(grandChild);

                    // update the new root
                    root = child;
                }
                else if (m_parsedSelect.hasDistinctWithGroupBy() &&
                        child.getPlanNodeType() == PlanNodeType.HASHAGGREGATE &&
                        grandChild.getPlanNodeType() == PlanNodeType.PROJECTION) {

                    AbstractPlanNode grandGrandChild = grandChild.getChild(0);
                    child.clearParents();
                    root.clearChildren();
                    grandGrandChild.clearParents();
                    grandChild.clearChildren();

                    grandChild.addAndLinkChild(root);
                    root.addAndLinkChild(grandGrandChild);

                    root = child;
                }
            }
        }

        // Add a project node if we need one.  Some types of nodes can have their
        // own inline projection nodes, while others need an out-of-line projection
        // node.
        if (mvFixNeedsProjection || needProjectionNode(root)) {
            root = addProjection(root);
        }

        if (m_parsedSelect.hasLimitOrOffset()) {
            root = handleSelectLimitOperator(root);
        }

        CompiledPlan plan = new CompiledPlan();
        plan.rootPlanGraph = root;
        plan.setReadOnly(true);
        boolean orderIsDeterministic = m_parsedSelect.isOrderDeterministic();
        boolean hasLimitOrOffset = m_parsedSelect.hasLimitOrOffset();
        String contentDeterminismMessage = m_parsedSelect.getContentDeterminismMessage();
        plan.statementGuaranteesDeterminism(hasLimitOrOffset, orderIsDeterministic, contentDeterminismMessage);

        // Apply the micro-optimization:
        // LIMIT push down, Table count / Counting Index, Optimized Min/Max
        MicroOptimizationRunner.applyAll(plan, m_parsedSelect);

        return plan;
    }

    /**
     * Return true if the plan referenced by root node needs a
     * projection node appended to the top.
     *
     * This method does a lot of "if this node is an
     * instance of this class.... else if this node is an
     * instance of this other class..."   Perhaps it could be replaced
     * by a virtual method on AbstractPlanNode?
     *
     * @param root   The root node of a plan
     * @return true if a project node is required
     */
    private boolean needProjectionNode (AbstractPlanNode root) {
        if (!root.planNodeClassNeedsProjectionNode()) {
            return false;
        }
        // If there is a complexGroupby at his point, it means that
        // display columns contain all the order by columns and
        // does not require another projection node on top of sort node.

        // If there is a complex aggregation case, the projection plan node is already added
        // right above the group by plan node. In future, we may inline that projection node.
        if (m_parsedSelect.hasComplexGroupby() || m_parsedSelect.hasComplexAgg()) {
            return false;
        }

        if (root instanceof AbstractReceivePlanNode &&
                m_parsedSelect.hasPartitionColumnInGroupby()) {
            // Top aggregate has been removed, its schema is exactly the same to
            // its local aggregate node.
            return false;
        }

        return true;
    }

    // ENG-4909 Bug: currently disable NESTLOOPINDEX plan for IN
    private static boolean disableNestedLoopIndexJoinForInComparison (AbstractPlanNode root, AbstractParsedStmt parsedStmt) {
        if (root.getPlanNodeType() == PlanNodeType.NESTLOOPINDEX) {
            assert(parsedStmt != null);
            return true;
        }

        return false;
    }

    /** Returns true if this DELETE can be executed in the EE as a truncate operation */
    static private boolean deleteIsTruncate(ParsedDeleteStmt stmt, AbstractPlanNode plan) {
        if (!(plan instanceof SeqScanPlanNode)) {
            return false;
        }

        // Assume all index scans have filters in this context, so only consider seq scans.
        SeqScanPlanNode seqScanNode = (SeqScanPlanNode)plan;
        if (seqScanNode.getPredicate() != null) {
            return false;
        }

        if (stmt.hasLimitOrOffset()) {
            return false;
        }

        return true;
    }

    private CompiledPlan getNextDeletePlan() {
        assert (m_subAssembler != null);

        // figure out which table we're deleting from
        assert (m_parsedDelete.m_tableList.size() == 1);
        Table targetTable = m_parsedDelete.m_tableList.get(0);

        AbstractPlanNode subSelectRoot = m_subAssembler.nextPlan();
        if (subSelectRoot == null) {
            return null;
        }

        // ENG-4909 Bug: currently disable NESTLOOPINDEX plan for IN
        if (disableNestedLoopIndexJoinForInComparison(subSelectRoot, m_parsedDelete)) {
            // Recursion here, now that subAssembler.nextPlan() has been called,
            // simply jumps ahead to the next plan (if any).
            return getNextDeletePlan();
        }

        boolean isSinglePartitionPlan = m_partitioning.wasSpecifiedAsSingle() || m_partitioning.isInferredSingle();

        // generate the delete node with the right target table
        DeletePlanNode deleteNode = new DeletePlanNode();
        deleteNode.setTargetTableName(targetTable.getTypeName());


        assert(subSelectRoot instanceof AbstractScanPlanNode);

        // If the scan matches all rows, we can throw away the scan
        // nodes and use a truncate delete node.
        if (deleteIsTruncate(m_parsedDelete, subSelectRoot)) {
            deleteNode.setTruncate(true);
        }
        else {

            // User may have specified an ORDER BY ... LIMIT clause
            if (m_parsedDelete.orderByColumns().size() > 0
                    && !isSinglePartitionPlan
                    && !targetTable.getIsreplicated()) {
                throw new PlanningErrorException(
                        "DELETE statements affecting partitioned tables must "
                        + "be able to execute on one partition "
                        + "when ORDER BY and LIMIT or OFFSET clauses "
                        + "are present.");
            }

            boolean needsOrderByNode = isOrderByNodeRequired(m_parsedDelete, subSelectRoot);

            AbstractExpression addressExpr = new TupleAddressExpression();
            NodeSchema proj_schema = new NodeSchema();
            // This planner-created column is magic.
            proj_schema.addColumn(
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    "tuple_address", "tuple_address",
                    addressExpr);
            if (needsOrderByNode) {
                // Projection will need to pass the sort keys to the order by node
                for (ParsedColInfo col : m_parsedDelete.orderByColumns()) {
                    proj_schema.addColumn(col.asSchemaColumn());
                }
            }
            ProjectionPlanNode projectionNode =
                    new ProjectionPlanNode(proj_schema);
            subSelectRoot.addInlinePlanNode(projectionNode);

            AbstractPlanNode root = subSelectRoot;
            if (needsOrderByNode) {
                OrderByPlanNode ob = buildOrderByPlanNode(m_parsedDelete.orderByColumns());
                ob.addAndLinkChild(root);
                root = ob;
            }

            if (m_parsedDelete.hasLimitOrOffset()) {
                assert(m_parsedDelete.orderByColumns().size() > 0);
                root.addInlinePlanNode(m_parsedDelete.limitPlanNode());
            }

            deleteNode.addAndLinkChild(root);
        }

        CompiledPlan plan = new CompiledPlan();
        plan.setReadOnly(false);

        // check non-determinism status

        // treat this as deterministic for reporting purposes:
        // delete statements produce just one row that is the
        // number of rows affected
        boolean orderIsDeterministic = true;

        boolean hasLimitOrOffset = m_parsedDelete.hasLimitOrOffset();

        // The delete statement cannot be inherently content non-deterministic.
        // So, the last parameter is always null.
        plan.statementGuaranteesDeterminism(hasLimitOrOffset, orderIsDeterministic, null);

        if (isSinglePartitionPlan) {
            plan.rootPlanGraph = deleteNode;
            return plan;
        }

        // Add a compensating sum of modified tuple counts or a limit 1
        // AND a send on top of the union-like receive node.
        boolean isReplicated = targetTable.getIsreplicated();
        plan.rootPlanGraph = addCoordinatorToDMLNode(deleteNode, isReplicated);
        return plan;
    }

    /**
     * Get the next (only) plan for a VoltDB SWAP TABLE statement.
     * These are pretty simple and will only generate a single plan.
     *
     * @return The next (only) plan for a given SWAP TABLE statement, then null.
     */
    private CompiledPlan getNextSwapPlan() {
        // there's really only one way to do a swap, so just
        // plan it the right way once, then return null after that
        if (m_bestAndOnlyPlanWasGenerated) {
            return null;
        }
        m_bestAndOnlyPlanWasGenerated = true;

        // figure out which tables we're swapping
        assert (m_parsedSwap.m_tableList.size() == 2);
        Table theTable = m_parsedSwap.m_tableList.get(0);
        Table otherTable = m_parsedSwap.m_tableList.get(1);
        CompiledPlan retval = new CompiledPlan();
        retval.setReadOnly(false);

        // the root of the SWAP TABLE plan is always a SwapPlanNode
        SwapTablesPlanNode swapNode = new SwapTablesPlanNode();
        swapNode.initializeSwapTablesPlanNode(theTable, otherTable);

        // SWAP commands are only run single-partition when invoked from
        // an explicitly declared single-partition stored procedure.
        if (m_partitioning.wasSpecifiedAsSingle()) {
            retval.rootPlanGraph = swapNode;
            return retval;
        }

        // Add a compensating sum of modified tuple counts or a limit 1
        // AND a send on top of the union-like receive node.
        boolean isReplicated = theTable.getIsreplicated();
        retval.rootPlanGraph = addCoordinatorToDMLNode(swapNode, isReplicated);
        return retval;
    }

    private CompiledPlan getNextUpdatePlan() {
        assert (m_subAssembler != null);

        AbstractPlanNode subSelectRoot = m_subAssembler.nextPlan();
        if (subSelectRoot == null) {
            return null;
        }

        if (disableNestedLoopIndexJoinForInComparison(subSelectRoot, m_parsedUpdate)) {
            // Recursion here, now that subAssembler.nextPlan() has been called,
            // simply jumps ahead to the next plan (if any).
            return getNextUpdatePlan();
        }

        UpdatePlanNode updateNode = new UpdatePlanNode();
        //FIXME: does this assert need to be relaxed in the face of non-from-clause subquery support?
        // It was not in Mike A's original branch.
        assert (m_parsedUpdate.m_tableList.size() == 1);
        Table targetTable = m_parsedUpdate.m_tableList.get(0);
        updateNode.setTargetTableName(targetTable.getTypeName());
        // set this to false until proven otherwise
        updateNode.setUpdateIndexes(false);

        TupleAddressExpression tae = new TupleAddressExpression();
        NodeSchema proj_schema = new NodeSchema();
        // This planner-generated column is magic.
        proj_schema.addColumn(
                AbstractParsedStmt.TEMP_TABLE_NAME,
                AbstractParsedStmt.TEMP_TABLE_NAME,
                "tuple_address", "tuple_address",
                tae);

        // get the set of columns affected by indexes
        Set<String> affectedColumns = getIndexedColumnSetForTable(targetTable);

        // add the output columns we need to the projection
        //
        // Right now, the EE is going to use the original column names
        // and compare these to the persistent table column names in the
        // update executor in order to figure out which table columns get
        // updated.  We'll associate the actual values with VOLT_TEMP_TABLE
        // to avoid any false schema/column matches with the actual table.
        for (Entry<Column, AbstractExpression> colEntry :
            m_parsedUpdate.columns.entrySet()) {
            Column col = colEntry.getKey();
            String colName = col.getTypeName();
            AbstractExpression expr = colEntry.getValue();
            expr.setInBytes(colEntry.getKey().getInbytes());

            proj_schema.addColumn(
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    colName, colName,
                    expr);

            // check if this column is an indexed column
            if (affectedColumns.contains(colName)) {
                updateNode.setUpdateIndexes(true);
            }
        }
        ProjectionPlanNode projectionNode =
                new ProjectionPlanNode(proj_schema);


        // add the projection inline (TODO: this will break if more than one
        // layer is below this)
        //
        // When we inline this projection into the scan, we're going
        // to overwrite any original projection that we might have inlined
        // in order to simply cull the columns from the persistent table.
        assert(subSelectRoot instanceof AbstractScanPlanNode);
        subSelectRoot.addInlinePlanNode(projectionNode);

        // connect the nodes to build the graph
        updateNode.addAndLinkChild(subSelectRoot);

        CompiledPlan retval = new CompiledPlan();
        retval.setReadOnly (false);

        if (targetTable.getIsreplicated()) {
            retval.replicatedTableDML = true;
        }

        //FIXME: This assumption was only safe when we didn't support updates
        // w/ possibly non-deterministic subqueries.
        // Is there some way to integrate a "subquery determinism" check here?
        // because we didn't support updates with limits, either.
        // Since the update cannot be inherently non-deterministic, there is
        // no message, and the last parameter is null.
        retval.statementGuaranteesDeterminism(false, true, null);

        if (m_partitioning.wasSpecifiedAsSingle() || m_partitioning.isInferredSingle()) {
            retval.rootPlanGraph = updateNode;
            return retval;
        }

        // Send the local result counts to the coordinator.
        // Add a compensating sum of modified tuple counts or a limit 1
        // AND a send on top of the union-like receive node.
        boolean isReplicated = targetTable.getIsreplicated();
        retval.rootPlanGraph = addCoordinatorToDMLNode(updateNode, isReplicated);
        return retval;
    }

    static private AbstractExpression castExprIfNeeded(
            AbstractExpression expr, Column column) {
        if (expr.getValueType().getValue() != column.getType() ||
                expr.getValueSize() != column.getSize()) {
            expr = new OperatorExpression(ExpressionType.OPERATOR_CAST, expr, null);
            expr.setValueType(VoltType.get((byte) column.getType()));
            // We don't really support parameterized casting, such as specifically to "VARCHAR(3)"
            // vs. just VARCHAR, but set the size parameter anyway in this case to make sure that
            // the tuple that gets the result of the cast can be properly formatted as inline.
            // A too-wide value survives the cast (to generic VARCHAR of any length) but the
            // attempt to cache the result in the inline temp tuple storage will throw an early
            // runtime error on be  half of the target table column.
            // The important thing here is to leave the formatting hint in the output schema that
            // drives the temp tuple layout.
            expr.setValueSize(column.getSize());
        }

        return expr;
    }

    /**
     * Get the next (only) plan for a SQL insertion. Inserts are pretty simple
     * and this will only generate a single plan.
     *
     * @return The next (only) plan for a given insert statement, then null.
     */
    private CompiledPlan getNextInsertPlan() {
        // there's really only one way to do an insert, so just
        // do it the right way once, then return null after that
        if (m_bestAndOnlyPlanWasGenerated) {
            return null;
        }
        m_bestAndOnlyPlanWasGenerated = true;

        // The child of the insert node produces rows containing values
        // from one of
        //   - A VALUES clause.  In this case the child node is a MaterializeNode
        //   - a SELECT statement as in "INSERT INTO ... SELECT ...".  In this case
        //       the child node is the root of an arbitrary subplan.

        // figure out which table we're inserting into
        assert (m_parsedInsert.m_tableList.size() == 1);
        Table targetTable = m_parsedInsert.m_tableList.get(0);
        StmtSubqueryScan subquery = m_parsedInsert.getSubqueryScan();
        CompiledPlan retval = null;
        String isContentDeterministic = null;
        if (subquery != null) {
            isContentDeterministic = subquery.calculateContentDeterminismMessage();
            if (subquery.getBestCostPlan() == null) {
                // Seems like this should really be caught earlier
                // in getBestCostPlan, above.
                throw new PlanningErrorException("INSERT INTO ... SELECT subquery could not be planned: "
                        + m_recentErrorMsg);

            }

            boolean targetIsExportTable = tableListIncludesExportOnly(m_parsedInsert.m_tableList);
            InsertSubPlanAssembler subPlanAssembler =
                    new InsertSubPlanAssembler(m_catalogDb, m_parsedInsert, m_partitioning,
                            targetIsExportTable);
            AbstractPlanNode subplan = subPlanAssembler.nextPlan();
            if (subplan == null) {
                throw new PlanningErrorException(subPlanAssembler.m_recentErrorMsg);
            }

            assert(m_partitioning.isJoinValid());

            //  Use the subquery's plan as the basis for the insert plan.
            retval = subquery.getBestCostPlan();
        }
        else {
            retval = new CompiledPlan();
        }
        retval.setReadOnly(false);

        // Iterate over each column in the table we're inserting into:
        //   - Make sure we're supplying values for columns that require it.
        //     For a normal INSERT, these are the usual non-nullable values that
        //     don't have a default value.
        //     For an UPSERT, the (only) required values are the primary key
        //     components. Other required values can be supplied from the
        //     existing row in "UPDATE mode". If some other value is required
        //     for an INSERT, UPSERT's "INSERT mode" will throw a runtime
        //     constraint violation as the INSERT operation tries to set the
        //     non-nullable column to null.
        //   - Set partitioning expressions for VALUES (...) case.
        //     TODO: it would be good someday to do the same kind of processing
        //      for the INSERT ... SELECT ... case, by analyzing the subquery.
        if (m_parsedInsert.m_isUpsert) {
            boolean hasPrimaryKey = false;
            for (Constraint constraint : targetTable.getConstraints()) {
                if (constraint.getType() != ConstraintType.PRIMARY_KEY.getValue()) {
                    continue;
                }

                hasPrimaryKey = true;
                boolean targetsPrimaryKey = false;
                for (ColumnRef colRef : constraint.getIndex().getColumns()) {
                    int primary = colRef.getColumn().getIndex();
                    for (Column targetCol : m_parsedInsert.m_columns.keySet()) {
                        if (targetCol.getIndex() == primary) {
                            targetsPrimaryKey = true;
                            break;
                        }
                    }
                    if (! targetsPrimaryKey) {
                        throw new PlanningErrorException("UPSERT on table \"" +
                                targetTable.getTypeName() +
                                "\" must specify a value for primary key \"" +
                                colRef.getColumn().getTypeName() + "\".");
                    }
                }
            }
            if (! hasPrimaryKey) {
                throw new PlanningErrorException("UPSERT is not allowed on table \"" +
                        targetTable.getTypeName() + "\" that has no primary key.");
            }
        }
        CatalogMap<Column> targetTableColumns = targetTable.getColumns();
        for (Column col : targetTableColumns) {
            boolean needsValue = (!m_parsedInsert.m_isUpsert) &&
                    (col.getNullable() == false) && (col.getDefaulttype() == 0);
            if (needsValue && !m_parsedInsert.m_columns.containsKey(col)) {
                // This check could be done during parsing?
                throw new PlanningErrorException("Column " + col.getName()
                        + " has no default and is not nullable.");
            }

            // hint that this statement can be executed SP.
            if (col.equals(m_partitioning.getPartitionColForDML()) && subquery == null) {
                // When AdHoc insert-into-select is supported, we'll need to be able to infer
                // partitioning of the sub-select
                AbstractExpression expr = m_parsedInsert.getExpressionForPartitioning(col);
                String fullColumnName = targetTable.getTypeName() + "." + col.getTypeName();
                m_partitioning.addPartitioningExpression(fullColumnName, expr, expr.getValueType());
            }
        }

        NodeSchema matSchema = null;
        if (subquery == null) {
            matSchema = new NodeSchema();
        }

        int[] fieldMap = new int[m_parsedInsert.m_columns.size()];
        int i = 0;

        // The insert statement's set of columns are contained in a LinkedHashMap,
        // meaning that we'll iterate over the columns here in the order that the user
        // specified them in the original SQL.  (If the statement didn't specify any
        // columns, then all the columns will be in the map in schema order.)
        //   - Build the field map, used by insert executor to build tuple to execute
        //   - For VALUES(...) insert statements, build the materialize node's schema
        for (Map.Entry<Column, AbstractExpression> e : m_parsedInsert.m_columns.entrySet()) {
            Column col = e.getKey();
            fieldMap[i] = col.getIndex();

            if (matSchema != null) {
                AbstractExpression valExpr = e.getValue();
                valExpr.setInBytes(col.getInbytes());

                // Patch over any mismatched expressions with an explicit cast.
                // Most impossible-to-cast type combinations should have already been caught by the
                // parser, but there are also runtime checks in the casting code
                // -- such as for out of range values.
                valExpr = castExprIfNeeded(valExpr, col);

                matSchema.addColumn(
                        AbstractParsedStmt.TEMP_TABLE_NAME,
                        AbstractParsedStmt.TEMP_TABLE_NAME,
                        col.getTypeName(), col.getTypeName(),
                        valExpr);
            }

            i++;
        }

        // the root of the insert plan is always an InsertPlanNode
        InsertPlanNode insertNode = new InsertPlanNode();
        insertNode.setTargetTableName(targetTable.getTypeName());
        if (subquery != null) {
            insertNode.setSourceIsPartitioned(! subquery.getIsReplicated());
        }

        // The field map tells the insert node
        // where to put values produced by child into the row to be inserted.
        insertNode.setFieldMap(fieldMap);

        if (matSchema != null) {
            MaterializePlanNode matNode =
                    new MaterializePlanNode(matSchema);
            // connect the insert and the materialize nodes together
            insertNode.addAndLinkChild(matNode);

            retval.statementGuaranteesDeterminism(false, true, isContentDeterministic);
        }
        else {
            insertNode.addAndLinkChild(retval.rootPlanGraph);
        }

        if (m_partitioning.wasSpecifiedAsSingle() || m_partitioning.isInferredSingle()) {
            insertNode.setMultiPartition(false);
            retval.rootPlanGraph = insertNode;
            return retval;
        }

        insertNode.setMultiPartition(true);
        // Add a compensating sum of modified tuple counts or a limit 1
        // AND a send on top of a union-like receive node.
        boolean isReplicated = targetTable.getIsreplicated();
        retval.rootPlanGraph = addCoordinatorToDMLNode(insertNode, isReplicated);
        return retval;
    }

    /**
     * Add a receive node, a sum or limit node, and a send node to the given DML node.
     * If the DML target is a replicated table, it will add a limit node,
     * otherwise it adds a sum node.
     *
     * @param dmlRoot
     * @param isReplicated Whether or not the target table is a replicated table.
     * @return
     */
    private static AbstractPlanNode addCoordinatorToDMLNode(
            AbstractPlanNode dmlRoot, boolean isReplicated) {
        dmlRoot = SubPlanAssembler.addSendReceivePair(dmlRoot);
        AbstractPlanNode sumOrLimitNode;
        if (isReplicated) {
            // Replicated table DML result doesn't need to be summed. All partitions should
            // modify the same number of tuples in replicated table, so just pick the result from
            // any partition.
            LimitPlanNode limitNode = new LimitPlanNode();
            sumOrLimitNode = limitNode;
            limitNode.setLimit(1);
        }
        else {
            // create the nodes being pushed on top of dmlRoot.
            AggregatePlanNode countNode = new AggregatePlanNode();
            sumOrLimitNode = countNode;

            // configure the count aggregate (sum) node to produce a single
            // output column containing the result of the sum.
            // Create a TVE that should match the tuple count input column
            // This TVE is magic.
            // really really need to make this less hard-wired
            TupleValueExpression count_tve = new TupleValueExpression(
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    "modified_tuples",
                    "modified_tuples",
                    0);
            count_tve.setValueType(VoltType.BIGINT);
            count_tve.setValueSize(VoltType.BIGINT.getLengthInBytesForFixedTypes());
            countNode.addAggregate(ExpressionType.AGGREGATE_SUM, false, 0, count_tve);

            // The output column. Not really based on a TVE (it is really the
            // count expression represented by the count configured above). But
            // this is sufficient for now.  This looks identical to the above
            // TVE but it's logically different so we'll create a fresh one.
            TupleValueExpression tve = new TupleValueExpression(
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    "modified_tuples",
                    "modified_tuples",
                    0);
            tve.setValueType(VoltType.BIGINT);
            tve.setValueSize(VoltType.BIGINT.getLengthInBytesForFixedTypes());
            NodeSchema count_schema = new NodeSchema();
            count_schema.addColumn(
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    AbstractParsedStmt.TEMP_TABLE_NAME,
                    "modified_tuples",
                    "modified_tuples",
                    tve);
            countNode.setOutputSchema(count_schema);
        }

        // connect the nodes to build the graph
        sumOrLimitNode.addAndLinkChild(dmlRoot);
        SendPlanNode sendNode = new SendPlanNode();
        sendNode.addAndLinkChild(sumOrLimitNode);

        return sendNode;
    }

    /**
     * Given a relatively complete plan-sub-graph, apply a trivial projection
     * (filter) to it. If the root node can embed the projection do so. If not,
     * add a new projection node.
     *
     * @param rootNode
     *            The root of the plan-sub-graph to add the projection to.
     * @return The new root of the plan-sub-graph (might be the same as the
     *         input).
     */
    private AbstractPlanNode addProjection(AbstractPlanNode rootNode) {
        assert (m_parsedSelect != null);
        assert (m_parsedSelect.m_displayColumns != null);

        // Build the output schema for the projection based on the display columns
        NodeSchema proj_schema = m_parsedSelect.getFinalProjectionSchema();
        for (SchemaColumn col : proj_schema.getColumns()) {
            // Adjust the differentiator fields of TVEs, since they need to
            // reflect the inlined projection node in scan nodes.
            AbstractExpression colExpr = col.getExpression();
            Collection<TupleValueExpression> allTves =
                    ExpressionUtil.getTupleValueExpressions(colExpr);
            for (TupleValueExpression tve : allTves) {
                if ( ! tve.needsDifferentiation()) {
                    // PartitionByPlanNode and a following OrderByPlanNode
                    // can have an internally generated RANK column.
                    // These do not need to have their differentiator updated,
                    // since it's only used for disambiguation in some
                    // combinations of "SELECT *" and subqueries.
                    // In fact attempting to adjust this special column will
                    // cause failed assertions.  The tve for this expression
                    // will be marked as not needing differentiation,
                    // so we just ignore it here.
                    continue;
                }
                rootNode.adjustDifferentiatorField(tve);
            }
        }

        ProjectionPlanNode projectionNode = new ProjectionPlanNode();
        projectionNode.setOutputSchemaWithoutClone(proj_schema);

        // If the projection can be done inline. then add the
        // projection node inline.  Even if the rootNode is a
        // scan, if we have a windowed expression we need to
        // add it out of line.
        if (rootNode instanceof AbstractScanPlanNode) {
            rootNode.addInlinePlanNode(projectionNode);
            return rootNode;
        }

        projectionNode.addAndLinkChild(rootNode);
        return projectionNode;
    }

    /** Given a list of ORDER BY columns, construct and return an OrderByPlanNode. */
    private static OrderByPlanNode buildOrderByPlanNode(List<ParsedColInfo> cols) {
        OrderByPlanNode n = new OrderByPlanNode();

        for (ParsedColInfo col : cols) {
            n.addSort(col.expression,
                    col.ascending ? SortDirectionType.ASC
                                  : SortDirectionType.DESC);
        }

        return n;
    }

    /**
     * Determine if an OrderByPlanNode is needed.  This may return false if the
     * statement has no ORDER BY clause, or if the subtree is already producing
     * rows in the correct order.  Note that a hash aggregate node will cause this
     * to return true, and a serial or partial aggregate node may cause this
     * to return true.
     *
     * @param parsedStmt    The statement whose plan may need an OrderByPlanNode
     * @param root          The subtree which may need its output tuples ordered
     * @return true if the plan needs an OrderByPlanNode, false otherwise
     */
    private static boolean isOrderByNodeRequired(AbstractParsedStmt parsedStmt, AbstractPlanNode root) {
        // Only sort when the statement has an ORDER BY.
        if ( ! parsedStmt.hasOrderByColumns()) {
            return false;
        }

        // Skip the explicit ORDER BY plan step if an IndexScan is already providing the equivalent ordering.
        // Note that even tree index scans that produce values in their own "key order" only report
        // their sort direction != SortDirectionType.INVALID
        // when they enforce an ordering equivalent to the one requested in the ORDER BY
        // or window function clause.  Even an intervening non-hash aggregate will not interfere
        // in this optimization.

        // Is there a window function between the root and the
        // scan or join nodes?  Also, does this window function
        // use the index.
        int numberWindowFunctions = 0;
        int numberReceiveNodes = 0;
        int numberHashAggregates = 0;
        // EE keeps the insertion ORDER so that ORDER BY could apply before DISTINCT.
        // However, this probably is not optimal if there are low cardinality results.
        // Again, we have to replace the TVEs for ORDER BY clause for these cases in planning.
        //
        // Find the scan or join node.
        AbstractPlanNode probe;
        for (probe = root;
                ! ((probe instanceof AbstractJoinPlanNode)
                    || (probe instanceof AbstractScanPlanNode))
                && (probe != null);
            probe = (probe.getChildCount() > 0) ? probe.getChild(0) : null) {
            // Count the number of window functions between the
            // root and the join/scan node.  Note that we know we
            // have a statement level order by (SLOB) here.  If the SLOB
            // can use the index for ordering the scan or join node,
            // we will have recorded it in the scan or join node.
            if (probe.getPlanNodeType() == PlanNodeType.WINDOWFUNCTION) {
                numberWindowFunctions += 1;
            }
            // Also, see if there are receive nodes.  We need to
            // generate an ORDERBY node if there are RECEIVE nodes,
            // because the RECEIVE->MERGERECEIVE microoptimization
            // needs them.
            if (probe.getPlanNodeType() == PlanNodeType.RECEIVE) {
                numberReceiveNodes += 1;
            }
            // Finally, count the number of non-serial aggregate
            // nodes.  A hash or partial aggregate operation invalidates
            // the ordering, but a serial aggregation does not.
            if ((probe.getPlanNodeType() == PlanNodeType.HASHAGGREGATE)
                    || (probe.getPlanNodeType() == PlanNodeType.PARTIALAGGREGATE)) {
                numberHashAggregates += 1;
            }
        }
        if (probe == null) {
            // No idea what happened here.  We can't find a
            // scan or join node at all.  This seems unlikely
            // to be right.  Maybe this should be an assert?
            return true;
        }

        //
        //   o If the SLOB cannot use the index, then we
        //     need an order by node always.
        //   o If there are zero window functions, then
        //     - If the SLOB cannot use the index than we
        //       need an order by node.
        //     - If the SLOB can use the index, then
        //       = If the statement is a single fragment
        //         statement then we don't need an order by
        //         node.
        //       = If the statement is a two fragment
        //         statement then we need an order by node.
        //         This is because we will convert the RECEIVE
        //         node into a MERGERECEIVE node in the
        //         microoptimizer, and the MERGERECEIVE
        //         node needs an inline order by node to do
        //         the merge.
        //   o If there is only one window function, then
        //     - If the window function does not use the index
        //       then we always need an order by node.
        //     - If the window function can use the index but
        //       the SLOB can't use the index, then we need an
        //       order by node.
        //     - If both the SLOB and the window function can
        //       use the index, then we don't need an order
        //       by, no matter how many fragments this statement
        //       has.  This is because any RECEIVE node will be
        //       a descendent of the window function node.  So
        //       the RECEIVE to MERGERECEIVE conversion happens
        //       in the window function and not the order by.
        //   o If there is more than one window function then
        //     we always need an order by node.  The second
        //     window function will invalidate the ordering of
        //     the first one.  (Actually, if the SLOB order is
        //     compatible with the last window function then
        //     the situation is like the one-window function
        //     below.)
        //
        if ( ! (probe instanceof IndexSortablePlanNode)) {
            return true;
        }

        IndexUseForOrderBy indexUse = ((IndexSortablePlanNode)probe).indexUse();

        if (indexUse.getSortOrderFromIndexScan() == SortDirectionType.INVALID) {
            return true;
        }
        // Hash aggregates and partial aggregates
        // invalidate the index ordering.  So, we will need
        // an ORDERBY node.
        if (numberHashAggregates > 0) {
            return true;
        }
        if ( numberWindowFunctions == 0 ) {
            if ( indexUse.getWindowFunctionUsesIndex() == SubPlanAssembler.NO_INDEX_USE ) {
                return true;
            }
            assert( indexUse.getWindowFunctionUsesIndex() == SubPlanAssembler.STATEMENT_LEVEL_ORDER_BY_INDEX );
            // Return true for MP (numberReceiveNodes > 0) and
            // false for SP (numberReceiveNodes == 0);
            return numberReceiveNodes > 0;
        }
        if (numberWindowFunctions == 1) {
            // If the WF uses the index then getWindowFunctionUsesIndex()
            // will return 0.
            if ( ( indexUse.getWindowFunctionUsesIndex() != 0 )
                    || ( ! indexUse.isWindowFunctionCompatibleWithOrderBy() ) ) {
                return true;
            }
            // Both the WF and the SLOB can use the index.  Since the
            // window function will have the order by node, the SLOB
            // does not need one.  So this is a false.
            return false;
        }
        // This can actually never happen now,
        // because we only support one window function.
        return true;
    }

    /**
     * Create an order by node as required by the statement and make it a parent of root.
     * @param parsedStmt  Parsed statement, for context
     * @param root        The root of the plan needing ordering
     * @return new orderByNode (the new root) or the original root if no orderByNode was required.
     */
    private static AbstractPlanNode handleOrderBy(AbstractParsedStmt parsedStmt, AbstractPlanNode root) {
        assert (parsedStmt instanceof ParsedSelectStmt || parsedStmt instanceof ParsedUnionStmt ||
                parsedStmt instanceof ParsedDeleteStmt);

        if (! isOrderByNodeRequired(parsedStmt, root)) {
            return root;
        }

        OrderByPlanNode orderByNode = buildOrderByPlanNode(parsedStmt.orderByColumns());
        orderByNode.addAndLinkChild(root);
        return orderByNode;
    }

    /**
     * Add a limit, pushed-down if possible, and return the new root.
     * @param root top of the original plan
     * @return new plan's root node
     */
    private AbstractPlanNode handleSelectLimitOperator(AbstractPlanNode root)
    {
        // The coordinator's top limit graph fragment for a MP plan.
        // If planning "order by ... limit", getNextSelectPlan()
        // will have already added an order by to the coordinator frag.
        // This is the only limit node in a SP plan
        LimitPlanNode topLimit = m_parsedSelect.getLimitNodeTop();
        assert(topLimit != null);

        /*
         * TODO: allow push down limit with distinct (select distinct C from T limit 5)
         * , DISTINCT in aggregates and DISTINCT PUSH DOWN with partition column included.
         */
        AbstractPlanNode sendNode = null;
        // Whether or not we can push the limit node down
        boolean canPushDown = ! m_parsedSelect.hasDistinctWithGroupBy();
        if (canPushDown) {
            sendNode = checkLimitPushDownViability(root);
            if (sendNode == null) {
                canPushDown = false;
            }
            else {
                canPushDown = m_parsedSelect.getCanPushdownLimit();
            }
        }

        if (m_parsedSelect.m_mvFixInfo.needed()) {
            // Do not push down limit for mv based distributed query.
            canPushDown = false;
        }

        /*
         * Push down the limit plan node when possible even if offset is set. If
         * the plan is for a partitioned table, do the push down. Otherwise,
         * there is no need to do the push down work, the limit plan node will
         * be run in the partition.
         */
        if (canPushDown) {
            /*
             * For partitioned table, the pushed-down limit plan node has a limit based
             * on the combined limit and offset, which may require an expression if either of these
             * was not a hard-coded constant and didn't get parameterized.
             * The top level limit plan node remains the same, with the original limit and offset values.
             */
            LimitPlanNode distLimit = m_parsedSelect.getLimitNodeDist();

            // Disconnect the distributed parts of the plan below the SEND node
            AbstractPlanNode distributedPlan = sendNode.getChild(0);
            distributedPlan.clearParents();
            sendNode.clearChildren();

            // If the distributed limit must be performed on ordered input,
            // ensure the order of the data on each partition.
            if (m_parsedSelect.hasOrderByColumns()) {
                distributedPlan = handleOrderBy(m_parsedSelect, distributedPlan);
            }

            if (isInlineLimitPlanNodePossible(distributedPlan)) {
                // Inline the distributed limit.
                distributedPlan.addInlinePlanNode(distLimit);
                sendNode.addAndLinkChild(distributedPlan);
            }
            else {
                distLimit.addAndLinkChild(distributedPlan);
                // Add the distributed work back to the plan
                sendNode.addAndLinkChild(distLimit);
            }
        }
        // In future, inline LIMIT for join, Receive
        // Then we do not need to distinguish the order by node.
        return inlineLimitOperator(root, topLimit);
    }

    /**
     * Add a limit, and return the new root.
     * @param root top of the original plan
     * @return new plan's root node
     */
    private AbstractPlanNode handleUnionLimitOperator(AbstractPlanNode root) {
        // The coordinator's top limit graph fragment for a MP plan.
        // If planning "order by ... limit", getNextUnionPlan()
        // will have already added an order by to the coordinator frag.
        // This is the only limit node in a SP plan
        LimitPlanNode topLimit = m_parsedUnion.getLimitNodeTop();
        assert(topLimit != null);
        return inlineLimitOperator(root, topLimit);
    }

    /**
     * Inline Limit plan node if possible
     * @param root
     * @param topLimit
     * @return
     */
    private AbstractPlanNode inlineLimitOperator(AbstractPlanNode root,
            LimitPlanNode topLimit) {
        if (isInlineLimitPlanNodePossible(root)) {
            root.addInlinePlanNode(topLimit);
        }
        else if (root instanceof ProjectionPlanNode &&
                isInlineLimitPlanNodePossible(root.getChild(0)) ) {
            // In future, inlined this projection node for OrderBy and Aggregate
            // Then we could delete this ELSE IF block.
            root.getChild(0).addInlinePlanNode(topLimit);
        }
        else {
            topLimit.addAndLinkChild(root);
            root = topLimit;
        }
        return root;
    }

    /**
     * Inline limit plan node can be applied with ORDER BY node
     * and serial aggregation node
     * @param pn
     * @return
     */
    static private boolean isInlineLimitPlanNodePossible(AbstractPlanNode pn) {
        if (pn instanceof OrderByPlanNode ||
            pn.getPlanNodeType() == PlanNodeType.AGGREGATE) {
            return true;
        }
        return false;
    }


    private AbstractPlanNode handleMVBasedMultiPartQuery(
            HashAggregatePlanNode reAggNode,
            AbstractPlanNode root,
            boolean edgeCaseOuterJoin) {
        MaterializedViewFixInfo mvFixInfo = m_parsedSelect.m_mvFixInfo;

        AbstractPlanNode receiveNode = root;
        AbstractPlanNode reAggParent = null;
        // Find receive plan node and insert the constructed
        // re-aggregation plan node.
        if (root instanceof AbstractReceivePlanNode) {
            root = reAggNode;
        }
        else {
            List<AbstractPlanNode> recList = root.findAllNodesOfClass(AbstractReceivePlanNode.class);
            assert(recList.size() == 1);
            receiveNode = recList.get(0);

            reAggParent = receiveNode.getParent(0);
            boolean result = reAggParent.replaceChild(receiveNode, reAggNode);
            assert(result);
        }
        reAggNode.addAndLinkChild(receiveNode);
        reAggNode.m_isCoordinatingAggregator = true;

        assert(receiveNode instanceof ReceivePlanNode);
        AbstractPlanNode sendNode = receiveNode.getChild(0);
        assert(sendNode instanceof SendPlanNode);
        AbstractPlanNode sendNodeChild = sendNode.getChild(0);

        HashAggregatePlanNode reAggNodeForReplace = null;
        if (m_parsedSelect.m_tableList.size() > 1 && !edgeCaseOuterJoin) {
            reAggNodeForReplace = reAggNode;
        }
        boolean find = mvFixInfo.processScanNodeWithReAggNode(sendNode, reAggNodeForReplace);
        assert(find);

        // If it is a normal joined query, replace the node under the
        // receive node with materialized view scan node.
        if (m_parsedSelect.m_tableList.size() > 1 && !edgeCaseOuterJoin) {
            AbstractPlanNode joinNode = sendNodeChild;
            // No agg, limit pushed down at this point.
            assert(joinNode instanceof AbstractJoinPlanNode);

            // Fix the node after Re-aggregation node.
            joinNode.clearParents();

            assert(mvFixInfo.m_scanNode != null);
            mvFixInfo.m_scanNode.clearParents();

            // replace joinNode with MV scan node on each partition.
            sendNode.clearChildren();
            sendNode.addAndLinkChild(mvFixInfo.m_scanNode);

            // If reAggNode has parent node before we put it under join node,
            // its parent will be the parent of the new join node. Update the root node.
            if (reAggParent != null) {
                reAggParent.replaceChild(reAggNode, joinNode);
                root = reAggParent;
            }
            else {
                root = joinNode;
            }
        }

        return root;
    }

    private static class IndexGroupByInfo {
        boolean m_multiPartition = false;

        List<Integer> m_coveredGroupByColumns;
        boolean m_canBeFullySerialized = false;

        AbstractPlanNode m_indexAccess = null;

        boolean isChangedToSerialAggregate() {
            return m_canBeFullySerialized && m_indexAccess != null;
        }

        boolean isChangedToPartialAggregate() {
            return !m_canBeFullySerialized && m_indexAccess != null;
        }

        boolean needHashAggregator(AbstractPlanNode root, ParsedSelectStmt parsedSelect) {
            // A hash is required to build up per-group aggregates in parallel vs.
            // when there is only one aggregation over the entire table OR when the
            // per-group aggregates are being built serially from the ordered output
            // of an index scan.
            // Currently, an index scan only claims to have a sort direction when its output
            // matches the order demanded by the ORDER BY clause.
            if (! parsedSelect.isGrouped()) {
                return false;
            }

            if (isChangedToSerialAggregate() && ! m_multiPartition) {
                return false;
            }

            boolean predeterminedOrdering = false;
            if (root instanceof IndexScanPlanNode) {
                if (((IndexScanPlanNode)root).getSortDirection() !=
                        SortDirectionType.INVALID) {
                    predeterminedOrdering = true;
                }
            }
            else if (root instanceof AbstractJoinPlanNode) {
                if (((AbstractJoinPlanNode)root).getSortDirection() !=
                        SortDirectionType.INVALID) {
                    predeterminedOrdering = true;
                }
            }
            if (predeterminedOrdering) {
                // The ordering predetermined by indexed access is known
                // to cover (at least) the ORDER BY columns.
                // Yet, any additional non-ORDER-BY columns in the GROUP BY
                // clause will need partial aggregation.
                if (parsedSelect.groupByIsAnOrderByPermutation()) {
                    return false;
                }
            }

            return true;
        }

    }

    private static AbstractPlanNode findSeqScanCandidateForGroupBy(
            AbstractPlanNode candidate) {
        if (candidate.getPlanNodeType() == PlanNodeType.SEQSCAN &&
                ! candidate.isSubQuery()) {
            // scan on sub-query does not support index, early exit here
            // In future, support sub-query edge cases.
            return candidate;
        }

        // For join node, find outer sequential scan plan node
        if (candidate.getPlanNodeType() == PlanNodeType.NESTLOOP) {
            assert(candidate.getChildCount() == 2);
            return findSeqScanCandidateForGroupBy(candidate.getChild(0));
        }

        if (candidate.getPlanNodeType() == PlanNodeType.NESTLOOPINDEX) {
            return findSeqScanCandidateForGroupBy(candidate.getChild(0));
        }

        return null;
    }

    /**
     * For a seqscan feeding a GROUP BY, consider substituting an IndexScan
     * that pre-sorts by the GROUP BY keys.
     * If a candidate is already an indexscan,
     * simply calculate GROUP BY column coverage
     *
     * @param candidate
     * @param gbInfo
     * @return true when planner can switch to index scan
     *         from a sequential scan, and when the index scan
     *         has no parent plan node or the candidate is already
     *         an indexscan and covers all or some GROUP BY columns
     */
    private boolean switchToIndexScanForGroupBy(AbstractPlanNode candidate,
            IndexGroupByInfo gbInfo) {
        if (! m_parsedSelect.isGrouped()) {
            return false;
        }

        if (candidate instanceof IndexScanPlanNode) {
            calculateIndexGroupByInfo((IndexScanPlanNode) candidate, gbInfo);
            if (gbInfo.m_coveredGroupByColumns != null &&
                    !gbInfo.m_coveredGroupByColumns.isEmpty()) {
                // The candidate index does cover all or some
                // of the GROUP BY columns and can be serialized
                gbInfo.m_indexAccess = candidate;
                return true;
            }
            return false;
        }

        AbstractPlanNode sourceSeqScan = findSeqScanCandidateForGroupBy(candidate);
        if (sourceSeqScan == null) {
            return false;
        }
        assert(sourceSeqScan instanceof SeqScanPlanNode);

        AbstractPlanNode parent = null;
        if (sourceSeqScan.getParentCount() > 0) {
            parent = sourceSeqScan.getParent(0);
        }
        AbstractPlanNode indexAccess = indexAccessForGroupByExprs(
                (SeqScanPlanNode)sourceSeqScan, gbInfo);

        if (indexAccess.getPlanNodeType() != PlanNodeType.INDEXSCAN) {
            // does not find proper index to replace sequential scan
            return false;
        }

        gbInfo.m_indexAccess = indexAccess;
        if (parent != null) {
            // have a parent and would like to replace
            // the sequential scan with an index scan
            indexAccess.clearParents();
            // For two children join node, index 0 is its outer side
            parent.replaceChild(0, indexAccess);

            return false;
        }

        // parent is null and switched to index scan from sequential scan
        return true;
    }

    /**
     * Create nodes for windowed operations.
     *
     * @param root
     * @return
     */
    private AbstractPlanNode handleWindowedOperators(AbstractPlanNode root) {
        // Get the windowed expression.  We need to set its output
        // schema from the display list.
        WindowFunctionExpression winExpr = m_parsedSelect.getWindowFunctionExpressions().get(0);
        assert(winExpr != null);

        // This will set the output schema to contain the
        // windowed schema column only.  In generateOutputSchema
        // we will add the input columns.
        WindowFunctionPlanNode pnode = new WindowFunctionPlanNode();
        pnode.setWindowFunctionExpression(winExpr);
        // We always need an order by plan node, even if the sort
        // is optimized away by an index.  This may be turned
        // into an inline order by in a MergeReceivePlanNode.
        IndexUseForOrderBy scanNode = findScanNodeForWindowFunction(root);
        AbstractPlanNode cnode = null;
        int winfunc = (scanNode == null) ? SubPlanAssembler.NO_INDEX_USE : scanNode.getWindowFunctionUsesIndex();
        // If we have an index which is compatible with the statement
        // level order by, and we have a window function which can't
        // use the index we have to ignore the statement level order by
        // index use.  We will need to order the input according to the
        // window function first, and that will in general invalidate the
        // statement level order by ordering.
        if ((SubPlanAssembler.STATEMENT_LEVEL_ORDER_BY_INDEX == winfunc)
                || (SubPlanAssembler.NO_INDEX_USE == winfunc)) {
            // No index.  Calculate the expression order here and stuff it into
            // the order by node.  Note that if we support more than one window
            // function this would be the case when scanNode.getWindowFunctionUsesIndex()
            // returns a window function number which is different from the number
            // of winExpr.
            List<AbstractExpression> partitionByExpressions = winExpr.getPartitionByExpressions();
            // If the order by expression list contains a partition by expression then
            // we won't have to sort by it twice.  We sort by the partition by expressions
            // first, and we don't care what order we sort by them.  So, find the
            // sort direction in the order by list and use that in the partition by
            // list, and then mark that it was deleted in the order by
            // list.
            //
            // We choose to make this dontsort rather than dosort because the
            // Java default value for boolean is false, and we want to sort by
            // default.
            boolean dontsort[] = new boolean[winExpr.getOrderbySize()];
            List<AbstractExpression> orderByExpressions = winExpr.getOrderByExpressions();
            List<SortDirectionType>  orderByDirections  = winExpr.getOrderByDirections();
            OrderByPlanNode onode = new OrderByPlanNode();
            for (int idx = 0; idx < winExpr.getPartitionbySize(); ++idx) {
                SortDirectionType pdir = SortDirectionType.ASC;
                AbstractExpression partitionByExpression = partitionByExpressions.get(idx);
                int sidx = winExpr.getSortIndexOfOrderByExpression(partitionByExpression);
                if (0 <= sidx) {
                    pdir = orderByDirections.get(sidx);
                    dontsort[sidx] = true;
                }
                onode.addSort(partitionByExpression, pdir);
            }
            for (int idx = 0; idx < winExpr.getOrderbySize(); ++idx) {
                if (!dontsort[idx]) {
                    AbstractExpression orderByExpr = orderByExpressions.get(idx);
                    SortDirectionType  orderByDir  = orderByDirections.get(idx);
                    onode.addSort(orderByExpr, orderByDir);
                }
            }
            onode.addAndLinkChild(root);
            cnode = onode;
        } else {
            assert(scanNode != null);
            // This means the index is good for this window function.
            // If this is an MP statement we still need to generate the
            // order by node, because we may need to turn it into an
            // inline order by node of a MergeReceive node.
            assert( 0 == scanNode.getWindowFunctionUsesIndex() );
            if (m_partitioning.requiresTwoFragments()) {
                OrderByPlanNode onode = new OrderByPlanNode();
                SortDirectionType dir = scanNode.getSortOrderFromIndexScan();
                assert(dir != SortDirectionType.INVALID);
                // This was created when the index was determined.
                // We cached it in the scan node.
                List<AbstractExpression> orderExprs = scanNode.getFinalExpressionOrderFromIndexScan();
                assert(orderExprs != null);
                for (AbstractExpression ae : orderExprs) {
                    onode.addSort(ae, dir);
                }
                // Link in the OrderByNode.
                onode.addAndLinkChild(root);
                cnode = onode;
            } else {
                // Don't create and link in the order by node.
                cnode = root;
            }
        }
        pnode.addAndLinkChild(cnode);
        return pnode;
    }

    private IndexUseForOrderBy findScanNodeForWindowFunction(AbstractPlanNode root) {
        while (root != null) {
            if (root instanceof IndexSortablePlanNode) {
                return ((IndexSortablePlanNode) root).indexUse();
            }
            // Any other kind of scan or join plan
            // node cannot have a useful index.
            if ((root instanceof AbstractScanPlanNode)
                    || (root instanceof AbstractJoinPlanNode)) {
                return null;
            }
            if (root.getChildCount() == 0) {
                break;
            }
            root = root.getChild(0);
        }
        return null;
    }

    private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
        /* Check if any aggregate expressions are present */

        /*
         * "Select A from T group by A" is grouped but has no aggregate operator
         * expressions. Catch that case by checking the grouped flag
         */
        if (m_parsedSelect.hasAggregateOrGroupby()) {
            AggregatePlanNode aggNode = null;
            AggregatePlanNode topAggNode = null; // i.e., on the coordinator
            IndexGroupByInfo gbInfo = new IndexGroupByInfo();

            if (root instanceof AbstractReceivePlanNode) {
                // do not apply index scan for serial/partial aggregation
                // for distinct that does not group by partition column
                if ( ! m_parsedSelect.hasAggregateDistinct() ||
                        m_parsedSelect.hasPartitionColumnInGroupby()) {
                    AbstractPlanNode candidate = root.getChild(0).getChild(0);
                    gbInfo.m_multiPartition = true;
                    switchToIndexScanForGroupBy(candidate, gbInfo);
                }
            }
            else if (switchToIndexScanForGroupBy(root, gbInfo)) {
                root = gbInfo.m_indexAccess;
            }
            boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);

            // Construct the aggregate nodes
            if (needHashAgg) {
                if ( m_parsedSelect.m_mvFixInfo.needed() ) {
                    // TODO: may optimize this edge case in future
                    aggNode = new HashAggregatePlanNode();
                }
                else {
                    if (gbInfo.isChangedToSerialAggregate()) {
                        assert(root instanceof ReceivePlanNode);
                        aggNode = new AggregatePlanNode();
                    }
                    else if (gbInfo.isChangedToPartialAggregate()) {
                        aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
                    }
                    else {
                        aggNode = new HashAggregatePlanNode();
                    }

                    topAggNode = new HashAggregatePlanNode();
                }
            }
            else {
                aggNode = new AggregatePlanNode();

                if ( ! m_parsedSelect.m_mvFixInfo.needed()) {
                    topAggNode = new AggregatePlanNode();
                }
            }

            NodeSchema agg_schema = new NodeSchema();
            NodeSchema top_agg_schema = new NodeSchema();


            for ( int outputColumnIndex = 0;
                    outputColumnIndex < m_parsedSelect.m_aggResultColumns.size();
                    outputColumnIndex += 1) {
                ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
                AbstractExpression rootExpr = col.expression;
                AbstractExpression agg_input_expr = null;
                SchemaColumn schema_col = null;
                SchemaColumn top_schema_col = null;
                if (rootExpr instanceof AggregateExpression) {
                    ExpressionType agg_expression_type = rootExpr.getExpressionType();
                    agg_input_expr = rootExpr.getLeft();

                    // A bit of a hack: ProjectionNodes after the
                    // aggregate node need the output columns here to
                    // contain TupleValueExpressions (effectively on a temp table).
                    // So we construct one based on the output of the
                    // aggregate expression, the column alias provided by HSQL,
                    // and the offset into the output table schema for the
                    // aggregate node that we're computing.
                    // Oh, oh, it's magic, you know..
                    TupleValueExpression tve = new TupleValueExpression(
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            "", col.alias,
                            rootExpr, outputColumnIndex);
                    tve.setDifferentiator(col.differentiator);

                    boolean is_distinct = ((AggregateExpression)rootExpr).isDistinct();
                    aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
                    schema_col = new SchemaColumn(
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            "", col.alias,
                            tve, outputColumnIndex);
                    top_schema_col = new SchemaColumn(
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            AbstractParsedStmt.TEMP_TABLE_NAME,
                            "", col.alias,
                            tve, outputColumnIndex);

                    /*
                     * Special case count(*), count(), sum(), min() and max() to
                     * push them down to each partition. It will do the
                     * push-down if the select columns only contains the listed
                     * aggregate operators and other group-by columns. If the
                     * select columns includes any other aggregates, it will not
                     * do the push-down. - nshi
                     */
                    if (topAggNode != null) {
                        ExpressionType top_expression_type = agg_expression_type;
                        /*
                         * For count(*), count() and sum(), the pushed-down
                         * aggregate node doesn't change. An extra sum()
                         * aggregate node is added to the coordinator to sum up
                         * the numbers from all the partitions. The input schema
                         * and the output schema of the sum() aggregate node is
                         * the same as the output schema of the push-down
                         * aggregate node.
                         *
                         * If DISTINCT is specified, don't do push-down for
                         * count() and sum() when not group by partition column.
                         * An exception is the aggregation arguments are the
                         * partition column (ENG-4980).
                         */
                        if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR ||
                            agg_expression_type == ExpressionType.AGGREGATE_COUNT ||
                            agg_expression_type == ExpressionType.AGGREGATE_SUM) {
                            if (is_distinct &&
                                    ! (m_parsedSelect.hasPartitionColumnInGroupby() ||
                                            canPushDownDistinctAggregation((AggregateExpression)rootExpr) ) ) {
                                topAggNode = null;
                            }
                            else {
                                // for aggregate distinct when group by
                                // partition column, the top aggregate node
                                // will be dropped later, thus there is no
                                // effect to assign the top_expression_type.
                                top_expression_type = ExpressionType.AGGREGATE_SUM;
                            }
                        }
                        /*
                         * For min() and max(), the pushed-down aggregate node
                         * doesn't change. An extra aggregate node of the same
                         * type is added to the coordinator. The input schema
                         * and the output schema of the top aggregate node is
                         * the same as the output schema of the pushed-down
                         * aggregate node.
                         *
                         * APPROX_COUNT_DISTINCT can be similarly pushed down, but
                         * must be split into two different functions, which is
                         * done later, from pushDownAggregate().
                         */
                        else if (agg_expression_type != ExpressionType.AGGREGATE_MIN &&
                                 agg_expression_type != ExpressionType.AGGREGATE_MAX &&
                                 agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                            /*
                             * Unsupported aggregate for push-down (AVG for example).
                             */
                            topAggNode = null;
                        }

                        if (topAggNode != null) {
                            /*
                             * Input column of the top aggregate node is the
                             * output column of the push-down aggregate node
                             */
                            boolean topDistinctFalse = false;
                            topAggNode.addAggregate(top_expression_type,
                                    topDistinctFalse, outputColumnIndex, tve);
                        }
                    }// end if we have a top agg node
                }
                else {
                    // All complex aggregations have been simplified,
                    // cases like "MAX(counter)+1" or "MAX(col)/MIN(col)"
                    // has already been broken down.
                    assert( ! rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));

                    /*
                     * These columns are the pass through columns that are not being
                     * aggregated on. These are the ones from the SELECT list. They
                     * MUST already exist in the child node's output. Find them and
                     * add them to the aggregate's output.
                     */
                    schema_col = new SchemaColumn(
                            col.tableName, col.tableAlias,
                            col.columnName, col.alias,
                            col.expression,
                            outputColumnIndex);
                    AbstractExpression topExpr = null;
                    if (col.groupBy) {
                        topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
                    }
                    else {
                        topExpr = col.expression;
                    }
                    top_schema_col = new SchemaColumn(
                            col.tableName, col.tableAlias,
                            col.columnName, col.alias,
                            topExpr, outputColumnIndex);
                }

                agg_schema.addColumn(schema_col);
                top_agg_schema.addColumn(top_schema_col);
            }// end for each ParsedColInfo in m_aggResultColumns

            for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
                aggNode.addGroupByExpression(col.expression);

                if (topAggNode != null) {
                    topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
                }
            }
            aggNode.setOutputSchema(agg_schema);
            if (topAggNode != null) {
                if (m_parsedSelect.hasComplexGroupby()) {
                    topAggNode.setOutputSchema(top_agg_schema);
                }
                else {
                    topAggNode.setOutputSchema(agg_schema);
                }
            }

            // Never push down aggregation for MV fix case.
            root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
        }

        return handleDistinctWithGroupby(root);
    }

    // Sets IndexGroupByInfo for an IndexScan
    private void calculateIndexGroupByInfo(IndexScanPlanNode root,
            IndexGroupByInfo gbInfo) {
        String fromTableAlias = root.getTargetTableAlias();
        assert(fromTableAlias != null);

        Index index = root.getCatalogIndex();
        if ( ! IndexType.isScannable(index.getType())) {
            return;
        }

        ArrayList<AbstractExpression> bindings = new ArrayList<>();
        gbInfo.m_coveredGroupByColumns = calculateGroupbyColumnsCovered(
                index, fromTableAlias, bindings);
        gbInfo.m_canBeFullySerialized =
                (gbInfo.m_coveredGroupByColumns.size() ==
                m_parsedSelect.groupByColumns().size());
    }

    // Turn sequential scan to index scan for group by if possible
    private AbstractPlanNode indexAccessForGroupByExprs(SeqScanPlanNode root,
            IndexGroupByInfo gbInfo) {
        if (root.isSubQuery()) {
            // sub-query edge case will not be handled now
            return root;
        }

        String fromTableAlias = root.getTargetTableAlias();
        assert(fromTableAlias != null);

        List<ParsedColInfo> groupBys = m_parsedSelect.groupByColumns();
        Table targetTable = m_catalogDb.getTables().get(root.getTargetTableName());
        assert(targetTable != null);
        CatalogMap<Index> allIndexes = targetTable.getIndexes();

        List<Integer> maxCoveredGroupByColumns = new ArrayList<>();
        ArrayList<AbstractExpression> maxCoveredBindings = null;
        Index pickedUpIndex = null;
        boolean foundAllGroupByCoveredIndex = false;

        for (Index index : allIndexes) {
            if ( ! IndexType.isScannable(index.getType())) {
                continue;
            }

            if ( ! index.getPredicatejson().isEmpty()) {
                // do not try to look at Partial/Sparse index
                continue;
            }

            ArrayList<AbstractExpression> bindings = new ArrayList<>();
            List<Integer> coveredGroupByColumns = calculateGroupbyColumnsCovered(
                    index, fromTableAlias, bindings);

            if (coveredGroupByColumns.size() > maxCoveredGroupByColumns.size()) {
                maxCoveredGroupByColumns = coveredGroupByColumns;
                pickedUpIndex = index;
                maxCoveredBindings = bindings;

                if (maxCoveredGroupByColumns.size() == groupBys.size()) {
                    foundAllGroupByCoveredIndex = true;
                    break;
                }
            }
        }
        if (pickedUpIndex == null) {
            return root;
        }

        IndexScanPlanNode indexScanNode = new IndexScanPlanNode(
                root, null, pickedUpIndex, SortDirectionType.INVALID);
        indexScanNode.setForGroupingOnly();
        indexScanNode.setBindings(maxCoveredBindings);

        gbInfo.m_coveredGroupByColumns = maxCoveredGroupByColumns;
        gbInfo.m_canBeFullySerialized = foundAllGroupByCoveredIndex;
        return indexScanNode;
    }

    private List<Integer> calculateGroupbyColumnsCovered(Index index,
            String fromTableAlias,
            List<AbstractExpression> bindings) {
        List<Integer> coveredGroupByColumns = new ArrayList<>();

        List<ParsedColInfo> groupBys = m_parsedSelect.groupByColumns();
        String exprsjson = index.getExpressionsjson();
        if (exprsjson.isEmpty()) {
            List<ColumnRef> indexedColRefs =
                    CatalogUtil.getSortedCatalogItems(index.getColumns(), "index");

            for (int j = 0; j < indexedColRefs.size(); j++) {
                String indexColumnName = indexedColRefs.get(j).getColumn().getName();

                // ignore order of keys in GROUP BY expr
                int ithCovered = 0;
                boolean foundPrefixedColumn = false;
                for (; ithCovered < groupBys.size(); ithCovered++) {
                    AbstractExpression gbExpr = groupBys.get(ithCovered).expression;
                    if ( ! (gbExpr instanceof TupleValueExpression)) {
                        continue;
                    }

                    TupleValueExpression gbTVE = (TupleValueExpression) gbExpr;
                    // TVE column index has not been resolved currently
                    if (fromTableAlias.equals(gbTVE.getTableAlias()) &&
                            indexColumnName.equals(gbTVE.getColumnName())) {
                        foundPrefixedColumn = true;
                        break;
                    }
                }
                if ( ! foundPrefixedColumn) {
                    // no prefix match any more
                    break;
                }

                coveredGroupByColumns.add(ithCovered);

                if (coveredGroupByColumns.size() == groupBys.size()) {
                    // covered all group by columns already
                    break;
                }
            }
        }
        else {
            StmtTableScan fromTableScan = m_parsedSelect.getStmtTableScanByAlias(fromTableAlias);
            // either pure expression index or mix of expressions and simple columns
            List<AbstractExpression> indexedExprs = null;
            try {
                indexedExprs = AbstractExpression.fromJSONArrayString(exprsjson, fromTableScan);
            }
            catch (JSONException e) {
                e.printStackTrace();
                // This case sounds impossible
                return coveredGroupByColumns;
            }

            for (AbstractExpression indexExpr : indexedExprs) {
                // ignore order of keys in GROUP BY expr
                List<AbstractExpression> binding = null;
                for (int ithCovered = 0; ithCovered < groupBys.size(); ithCovered++) {
                    AbstractExpression gbExpr = groupBys.get(ithCovered).expression;
                    binding = gbExpr.bindingToIndexedExpression(indexExpr);
                    if (binding != null) {
                        bindings.addAll(binding);
                        coveredGroupByColumns.add(ithCovered);
                        break;
                    }
                }
                // no prefix match any more or covered all group by columns already
                if (binding == null || coveredGroupByColumns.size() == groupBys.size()) {
                    break;
                }
            }

        }
        return coveredGroupByColumns;
    }

    /**
     * This function is called once it's been determined that we can push down
     * an aggregation plan node.
     *
     * If an APPROX_COUNT_DISTINCT aggregate is distributed, then we need to
     * convert the distributed aggregate function to VALS_TO_HYPERLOGLOG,
     * and the coordinating aggregate function to HYPERLOGLOGS_TO_CARD.
     *
     * @param distNode    The aggregate node executed on each partition
     * @param coordNode   The aggregate node executed on the coordinator
     */
    private static void fixDistributedApproxCountDistinct(
            AggregatePlanNode distNode,
            AggregatePlanNode coordNode) {

        assert (distNode != null);
        assert (coordNode != null);

        // Patch up any APPROX_COUNT_DISTINCT on the distributed node.
        List<ExpressionType> distAggTypes = distNode.getAggregateTypes();
        boolean hasApproxCountDistinct = false;
        for (int i = 0; i < distAggTypes.size(); ++i) {
            ExpressionType et = distAggTypes.get(i);
            if (et == ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                hasApproxCountDistinct = true;
                distNode.updateAggregate(i, ExpressionType.AGGREGATE_VALS_TO_HYPERLOGLOG);
            }
        }

        if (hasApproxCountDistinct) {
            // Now, patch up any APPROX_COUNT_DISTINCT on the coordinating node.
            List<ExpressionType> coordAggTypes = coordNode.getAggregateTypes();
            for (int i = 0; i < coordAggTypes.size(); ++i) {
                ExpressionType et = coordAggTypes.get(i);
                if (et == ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                    coordNode.updateAggregate(i, ExpressionType.AGGREGATE_HYPERLOGLOGS_TO_CARD);
                }
            }
        }
    }

    /**
     * Push the given aggregate if the plan is distributed, then add the
     * coordinator node on top of the send/receive pair. If the plan
     * is not distributed, or coordNode is not provided, the distNode
     * is added at the top of the plan.
     *
     * Note: this works in part because the push-down node is also an acceptable
     * top level node if the plan is not distributed. This wouldn't be true
     * if we started pushing down something like (sum, count) to calculate
     * a distributed average.  (We already do something like this for
     * APPROX_COUNT_DISTINCT, which must be split into two different functions
     * for the pushed-down case.)
     *
     * @param root
     *            The root node
     * @param distNode
     *            The node to push down
     * @param coordNode [may be null]
     *            The top node to put on top of the send/receive pair after
     *            push-down. If this is null, no push-down will be performed.
     * @return The new root node.
     */
    private static AbstractPlanNode pushDownAggregate(AbstractPlanNode root,
                                       AggregatePlanNode distNode,
                                       AggregatePlanNode coordNode,
                                       ParsedSelectStmt selectStmt) {
        AggregatePlanNode rootAggNode;

        // remember that coordinating aggregation has a pushed-down
        // counterpart deeper in the plan. this allows other operators
        // to be pushed down past the receive as well.
        if (coordNode != null) {
            coordNode.m_isCoordinatingAggregator = true;
        }

        /*
         * Push this node down to partition if it's distributed. First remove
         * the send/receive pair, add the node, then put the send/receive pair
         * back on top of the node, followed by another top node at the
         * coordinator.
         */
        if (coordNode != null && root instanceof ReceivePlanNode) {
            AbstractPlanNode accessPlanTemp = root;
            root = accessPlanTemp.getChild(0).getChild(0);
            root.clearParents();
            accessPlanTemp.getChild(0).clearChildren();
            distNode.addAndLinkChild(root);

            if (selectStmt.hasPartitionColumnInGroupby()) {
                // Set post predicate for final distributed Aggregation node
                distNode.setPostPredicate(selectStmt.getHavingPredicate());

                // Edge case: GROUP BY clause contains the partition column
                // No related GROUP BY or even Re-agg will apply on coordinator
                // Projection plan node can just be pushed down also except for
                // a very edge ORDER BY case.
                if (selectStmt.isComplexOrderBy()) {
                    // Put the send/receive pair back into place
                    accessPlanTemp.getChild(0).addAndLinkChild(distNode);
                    root = processComplexAggProjectionNode(selectStmt, accessPlanTemp);
                    return root;
                }

                root = processComplexAggProjectionNode(selectStmt, distNode);
                // Put the send/receive pair back into place
                accessPlanTemp.getChild(0).addAndLinkChild(root);
                return accessPlanTemp;
            }

            // Without including partition column in GROUP BY clause,
            // there has to be a top GROUP BY plan node on coordinator.
            //
            // Now that we're certain the aggregate will be pushed down
            // (no turning back now!), fix any APPROX_COUNT_DISTINCT aggregates.
            fixDistributedApproxCountDistinct(distNode, coordNode);

            // Put the send/receive pair back into place
            accessPlanTemp.getChild(0).addAndLinkChild(distNode);
            // Add the top node
            coordNode.addAndLinkChild(accessPlanTemp);
            rootAggNode = coordNode;
        }
        else {
            distNode.addAndLinkChild(root);
            rootAggNode = distNode;
        }

        // Set post predicate for final Aggregation node.
        rootAggNode.setPostPredicate(selectStmt.getHavingPredicate());
        root = processComplexAggProjectionNode(selectStmt, rootAggNode);
        return root;
    }

    private static AbstractPlanNode processComplexAggProjectionNode(
            ParsedSelectStmt selectStmt, AbstractPlanNode root) {
        if ( ! selectStmt.hasComplexAgg()) {
            return root;
        }

        ProjectionPlanNode proj =
                new ProjectionPlanNode(selectStmt.getFinalProjectionSchema());
        proj.addAndLinkChild(root);
        return proj;
    }

    /**
     * Check if we can push the limit node down.
     *
     * Return a mid-plan send node, if one exists and can host a
     * distributed limit node.
     * There is guaranteed to be at most a single receive/send pair.
     * Abort the search if a node that a "limit" can't be pushed past
     * is found before its receive node.
     *
     * Can only push past:
     *   * coordinatingAggregator: a distributed aggregator
     *     a copy of which  has already been pushed down.
     *     Distributing a LIMIT to just above that aggregator is correct.
     *     (I've got some doubts that this is correct??? --paul)
     *
     *   * order by: if the plan requires a sort, getNextSelectPlan()
     *     will have already added an ORDER BY.
     *     A distributed LIMIT will be added above a copy
     *     of that ORDER BY node.
     *
     *   * projection: these have no effect on the application of limits.
     *
     * @param root
     * @return If we can push the limit down, the send plan node is returned.
     *         Otherwise null -- when the plan is single-partition when
     *         its "coordinator" part contains a push-blocking node type.
     */
    protected AbstractPlanNode checkLimitPushDownViability(
            AbstractPlanNode root) {
        AbstractPlanNode receiveNode = root;
        List<ParsedColInfo> orderBys = m_parsedSelect.orderByColumns();
        boolean orderByCoversAllGroupBy = m_parsedSelect.groupByIsAnOrderByPermutation();

        while ( ! (receiveNode instanceof ReceivePlanNode)) {

            // Limitation: can only push past some nodes (see above comment)
            // Delete the aggregate node case to handle ENG-6485,
            // or say we don't push down meeting aggregate node
            // TODO: We might want to optimize/push down "limit" for some cases
            if ( ! (receiveNode instanceof OrderByPlanNode) &&
                    ! (receiveNode instanceof ProjectionPlanNode) &&
                    ! isValidAggregateNodeForLimitPushdown(receiveNode,
                            orderBys, orderByCoversAllGroupBy) ) {
                return null;
            }

            if (receiveNode instanceof OrderByPlanNode) {
                // if grouping by the partition key,
                // limit can still push down if ordered by aggregate values.
                if (! m_parsedSelect.hasPartitionColumnInGroupby() &&
                        isOrderByAggregationValue(m_parsedSelect.orderByColumns())) {
                    return null;
                }
            }

            // Traverse...
            if (receiveNode.getChildCount() == 0) {
                return null;
            }

            // nothing that allows pushing past has multiple inputs
            assert(receiveNode.getChildCount() == 1);
            receiveNode = receiveNode.getChild(0);
        }
        return receiveNode.getChild(0);
    }

    private static boolean isOrderByAggregationValue(List<ParsedColInfo> orderBys) {
        for (ParsedColInfo col : orderBys) {
            AbstractExpression rootExpr = col.expression;
            // Fix ENG-3487: can't usually push down limits
            // when results are ordered by aggregate values.
            for (AbstractExpression tve :
                rootExpr.findAllTupleValueSubexpressions()) {
                if (((TupleValueExpression) tve).hasAggregate()) {
                    return true;
                }
            }
        }

        return false;
    }

    private static boolean isValidAggregateNodeForLimitPushdown(
            AbstractPlanNode aggregateNode,
            List<ParsedColInfo> orderBys,
            boolean orderByCoversAllGroupBy) {
        if (aggregateNode instanceof AggregatePlanNode == false) {
            return false;
        }

        if (aggregateNode.getParentCount() == 0) {
            return false;
        }

        // Limitation: can only push past coordinating aggregation nodes
        if ( ! ((AggregatePlanNode)aggregateNode).m_isCoordinatingAggregator) {
            return false;
        }

        AbstractPlanNode parent = aggregateNode.getParent(0);
        AbstractPlanNode orderByNode = null;
        if (parent instanceof OrderByPlanNode) {
            orderByNode = parent;
        }
        else if ( parent instanceof ProjectionPlanNode &&
                parent.getParentCount() > 0 &&
                parent.getParent(0) instanceof OrderByPlanNode) {
            // Xin really wants inline project with aggregation
            orderByNode = parent.getParent(0);
        }

        if (orderByNode == null) {
            // When an aggregate without order by and group by columns
            // does not contain the partition column,
            // the limit should not be pushed down.
            return false;
        }

        if (( ! orderByCoversAllGroupBy) || isOrderByAggregationValue(orderBys)) {
            return false;
        }

        return true;
    }

    /**
     * Handle DISTINCT with GROUP BY if it is not redundant with the
     * aggregation/grouping.
     * DISTINCT is basically rewritten with GROUP BY to benefit from
     * all kinds of GROUP BY optimizations.
     * Trivial case DISTINCT in a statement with no GROUP BY has been
     * rewritten very early at query parsing time.
     * In the non-trivial case, where an existing GROUP BY column is NOT
     * in the select list, DISTINCT can be implemented via a final aggregation
     * (never pushed down) added to the top of the plan.
     * @param root can be an aggregate plan node or projection plan node
     * @return
     */
    private AbstractPlanNode handleDistinctWithGroupby(AbstractPlanNode root) {
        if (! m_parsedSelect.hasDistinctWithGroupBy()) {
            return root;
        }

        assert(m_parsedSelect.isGrouped());

        // DISTINCT is redundant with GROUP BY IFF
        // all of the grouping columns are present in the display columns.
        if (m_parsedSelect.displayColumnsContainAllGroupByColumns()) {
            return root;
        }

        // Now non complex aggregation cases are handled already
        assert(m_parsedSelect.hasComplexAgg());

        AggregatePlanNode distinctAggNode = new HashAggregatePlanNode();
        distinctAggNode.setOutputSchema(m_parsedSelect.getDistinctProjectionSchema());

        for (ParsedColInfo col : m_parsedSelect.distinctGroupByColumns()) {
            distinctAggNode.addGroupByExpression(col.expression);
        }

        // TODO(xin): push down the DISTINCT for certain cases
        // Ticket: ENG-7360
        /*
        boolean pushedDown = false;
        boolean canPushdownDistinctAgg =
                m_parsedSelect.hasPartitionColumnInDistinctGroupby();
        //
        // disable pushdown, DISTINCT push down turns out complex
        //
        canPushdownDistinctAgg = false;

        if (canPushdownDistinctAgg && !m_parsedSelect.m_mvFixInfo.needed()) {
            assert(m_parsedSelect.hasPartitionColumnInGroupby());
            AbstractPlanNode receive = root;

            if (receive instanceof ReceivePlanNode) {
                // Temporarily strip send/receive pair
                AbstractPlanNode distNode = receive.getChild(0).getChild(0);
                receive.getChild(0).unlinkChild(distNode);

                distinctAggNode.addAndLinkChild(distNode);
                receive.getChild(0).addAndLinkChild(distinctAggNode);

                pushedDown = true;
            }
        }*/

        distinctAggNode.addAndLinkChild(root);
        root = distinctAggNode;

        return root;
    }

    /**
     * Get the unique set of names of all columns that are part of an index on
     * the given table.
     *
     * @param table
     *            The table to build the list of index-affected columns with.
     * @return The set of column names affected by indexes with duplicates
     *         removed.
     */
    private static Set<String> getIndexedColumnSetForTable(Table table) {
        HashSet<String> columns = new HashSet<>();

        for (Index index : table.getIndexes()) {
            for (ColumnRef colRef : index.getColumns()) {
                columns.add(colRef.getColumn().getTypeName());
            }
        }

        return columns;
    }

    String getErrorMessage() {
        return m_recentErrorMsg;
    }

    /**
     * Outer join simplification using null rejection.
     * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.2531
     * Outerjoin Simplification and Reordering for Query Optimization
     * by Cesar A. Galindo-Legaria , Arnon Rosenthal
     * Algorithm:
     * Traverse the join tree top-down:
     *  For each join node n1 do:
     *    For each expression expr (join and where) at the node n1
     *      For each join node n2 descended from n1 do:
     *          If expr rejects nulls introduced by n2 inner table, then
     *              - convert LEFT OUTER n2 to an INNER join.
     *              - convert FULL OUTER n2 to RIGHT OUTER join
     *          If expr rejects nulls introduced by n2 outer table, then
     *              - convert RIGHT OUTER n2 to an INNER join.
     *              - convert FULL OUTER n2 to LEFT OUTER join
     */
    private static void simplifyOuterJoin(BranchNode joinTree) {
        assert(joinTree != null);
        List<AbstractExpression> exprs = new ArrayList<>();
        JoinNode leftNode = joinTree.getLeftNode();
        JoinNode rightNode = joinTree.getRightNode();
        // For the top level node only,
        // WHERE expressions need to be evaluated for NULL-rejection
        if (leftNode.getWhereExpression() != null) {
            exprs.add(leftNode.getWhereExpression());
        }
        if (rightNode.getWhereExpression() != null) {
            exprs.add(rightNode.getWhereExpression());
        }
        simplifyOuterJoinRecursively(joinTree, exprs);
    }

    private static void simplifyOuterJoinRecursively(BranchNode joinNode,
            List<AbstractExpression> exprs) {
        assert (joinNode != null);
        JoinNode leftNode = joinNode.getLeftNode();
        JoinNode rightNode = joinNode.getRightNode();
        if (joinNode.getJoinType() == JoinType.LEFT) {
            // Get all the inner tables underneath this node and
            // see if the expression is NULL-rejecting for any of them
            if (isNullRejecting(rightNode.generateTableJoinOrder(), exprs)) {
                joinNode.setJoinType(JoinType.INNER);
            }
        }
        else if (joinNode.getJoinType() == JoinType.RIGHT) {
            // Get all the outer tables underneath this node and
            // see if the expression is NULL-rejecting for any of them
            if (isNullRejecting(leftNode.generateTableJoinOrder(), exprs)) {
                joinNode.setJoinType(JoinType.INNER);
            }
        }
        else if (joinNode.getJoinType() == JoinType.FULL) {
            // Get all the outer tables underneath this node and
            // see if the expression is NULL-rejecting for any of them
            if (isNullRejecting(leftNode.generateTableJoinOrder(), exprs)) {
                joinNode.setJoinType(JoinType.LEFT);
            }
            // Get all the inner tables underneath this node and
            // see if the expression is NULL-rejecting for any of them
            if (isNullRejecting(rightNode.generateTableJoinOrder(), exprs)) {
                if (JoinType.FULL == joinNode.getJoinType()) {
                    joinNode.setJoinType(JoinType.RIGHT);
                }
                else {
                    // LEFT join was just removed
                    joinNode.setJoinType(JoinType.INNER);
                }
            }
        }

        // Now add this node expression to the list and descend.
        // The WHERE expressions can be combined with the input list
        // because they simplify both inner and outer nodes.
        if (leftNode.getWhereExpression() != null) {
            exprs.add(leftNode.getWhereExpression());
        }
        if (rightNode.getWhereExpression() != null) {
            exprs.add(rightNode.getWhereExpression());
        }

        // The JOIN expressions (ON) are only applicable
        // to the INNER node of an outer join.
        List<AbstractExpression> exprsForInnerNode = new ArrayList<>(exprs);
        if (leftNode.getJoinExpression() != null) {
            exprsForInnerNode.add(leftNode.getJoinExpression());
        }
        if (rightNode.getJoinExpression() != null) {
            exprsForInnerNode.add(rightNode.getJoinExpression());
        }

        List<AbstractExpression> leftNodeExprs;
        List<AbstractExpression> rightNodeExprs;
        switch (joinNode.getJoinType()) {
            case INNER:
                leftNodeExprs = exprsForInnerNode;
                rightNodeExprs = exprsForInnerNode;
                break;
            case LEFT:
                leftNodeExprs = exprs;
                rightNodeExprs = exprsForInnerNode;
                break;
            case RIGHT:
                leftNodeExprs = exprsForInnerNode;
                rightNodeExprs = exprs;
                break;
            case FULL:
                leftNodeExprs = exprs;
                rightNodeExprs = exprs;
                break;
            default:
                // shouldn't get there
                leftNodeExprs = null;
                rightNodeExprs = null;
                assert(false);
        }
        if (leftNode instanceof BranchNode) {
            simplifyOuterJoinRecursively((BranchNode)leftNode, leftNodeExprs);
        }
        if (rightNode instanceof BranchNode) {
            simplifyOuterJoinRecursively((BranchNode)rightNode, rightNodeExprs);
        }
    }

    /**
     * Verify if an expression from the input list is NULL-rejecting
     * for any of the tables from the list
     * @param tableAliases list of tables
     * @param exprs list of expressions
     * @return TRUE if there is a NULL-rejecting expression
     */
    private static boolean isNullRejecting(Collection<String> tableAliases,
            List<AbstractExpression> exprs) {
        for (AbstractExpression expr : exprs) {
            for (String tableAlias : tableAliases) {
                if (ExpressionUtil.isNullRejectingExpression(expr, tableAlias)) {
                    // We are done at this level
                    return true;
                }
            }
        }
        return false;
    }

}