QueryPlanner.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.planner;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.hsqldb_voltpatches.HSQLInterface;
import org.hsqldb_voltpatches.HSQLInterface.HSQLParseException;
import org.hsqldb_voltpatches.VoltXMLElement;
import org.voltdb.ParameterSet;
import org.voltdb.VoltType;
import org.voltdb.catalog.Constraint;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Table;
import org.voltdb.compiler.DatabaseEstimates;
import org.voltdb.compiler.DeterminismMode;
import org.voltdb.compiler.ScalarValueHints;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AbstractReceivePlanNode;
import org.voltdb.plannodes.InsertPlanNode;
import org.voltdb.plannodes.SchemaColumn;
import org.voltdb.plannodes.SendPlanNode;
import org.voltdb.types.ConstraintType;
import org.voltdb.types.PlanNodeType;

/**
 * The query planner accepts catalog data, SQL statements from the catalog, then
 * outputs the plan with the lowest cost according to the cost model.
 *
 */
public class QueryPlanner {
    String m_sql;
    String m_stmtName;
    String m_procName;
    HSQLInterface m_HSQL;
    DatabaseEstimates m_estimates;
    Database m_db;
    String m_recentErrorMsg;
    StatementPartitioning m_partitioning;
    int m_maxTablesPerJoin;
    AbstractCostModel m_costModel;
    ScalarValueHints[] m_paramHints;
    String m_joinOrder;
    DeterminismMode m_detMode;
    PlanSelector m_planSelector;
    boolean m_isUpsert;

    // generated by parse(..)
    VoltXMLElement m_xmlSQL = null;
    ParameterizationInfo m_paramzInfo = null;

    // generated by parameterize(...)
    int m_adhocUserParamsCount = 0;

    // generated by plan(...)
    boolean m_hasExceptionWhenParameterized = false;

    static boolean m_debuggingStaticModeToRetryOnError = true;

    public static String UPSERT_TAG = "isUpsert";

    /**
     * Initialize planner with physical schema info and a reference to HSQLDB parser.
     *
     * @param sql Literal SQL statement to parse
     * @param stmtName The name of the statement for logging/debugging
     * @param procName The name of the proc for logging/debugging
     * @param catalogDb Catalog info about schema, metadata and procedures.
     * @param partitioning Describes the specified and inferred partition context.
     * @param HSQL HSQLInterface pointer used for parsing SQL into XML.
     * @param estimates
     * @param suppressDebugOutput
     * @param maxTablesPerJoin
     * @param costModel The current cost model to evaluate plans with.
     * @param paramHints
     * @param joinOrder
     */
    public QueryPlanner(String sql,
                        String stmtName,
                        String procName,
                        Database catalogDb,
                        StatementPartitioning partitioning,
                        HSQLInterface HSQL,
                        DatabaseEstimates estimates,
                        boolean suppressDebugOutput,
                        int maxTablesPerJoin,
                        AbstractCostModel costModel,
                        ScalarValueHints[] paramHints,
                        String joinOrder,
                        DeterminismMode detMode) {
        assert(sql != null);
        assert(stmtName != null);
        assert(procName != null);
        assert(HSQL != null);
        assert(catalogDb != null);
        assert(costModel != null);
        assert(detMode != null);

        m_sql = sql;
        m_stmtName = stmtName;
        m_procName = procName;
        m_HSQL = HSQL;
        m_db = catalogDb;
        m_estimates = estimates;
        m_partitioning = partitioning;
        m_maxTablesPerJoin = maxTablesPerJoin;
        m_costModel = costModel;
        m_paramHints = paramHints;
        m_joinOrder = joinOrder;
        m_detMode = detMode;
        m_planSelector = new PlanSelector(m_estimates, m_stmtName,
                m_procName, m_sql, m_costModel, m_paramHints, m_detMode,
                suppressDebugOutput);
        m_isUpsert = false;
    }

    /**
     * Parse a SQL literal statement into an unplanned, intermediate representation.
     * This is normally followed by a call to
     * {@link this#plan(AbstractCostModel, String, String, String, String, int, ScalarValueHints[]) },
     * but splitting these two affords an opportunity to check a cache for a plan matching
     * the auto-parameterized parsed statement.
     */
    public void parse() throws PlanningErrorException {
        // reset any error message
        m_recentErrorMsg = null;

        // Reset plan node ids to start at 1 for this plan
        AbstractPlanNode.resetPlanNodeIds();

        // Fast track the parsing for the pseudo-statement generated by the
        // @SwapTables system stored procedure.
        // If this functionality turns out to fall within the behavior of a
        // more general SWAP TABLE statement supported in a (future) SQL
        // parser, the system stored procedure can fall back to using it.
        // Extending the HSQL parser NOW to support a "SWAP TABLE" statement
        // JUST for this special case, but keeping that support disabled for
        // the normal @AdHoc and compiled stored procedures code paths would
        // be overkill at this point.
        // So we settle for this early return of a "forged" parser result.
        if (m_sql.startsWith("@SwapTables ")) {
            String[] swapTableArgs = m_sql.split(" ");
            m_xmlSQL = forgeVoltXMLForSwapTables(
                    swapTableArgs[1], swapTableArgs[2]);
            m_planSelector.outputCompiledStatement(m_xmlSQL);
            return;
        }

        // determine the type of the query
        //
        // (Hmmm...  seems like this pre-processing of the SQL text
        // and subsequent placement of UPSERT_TAG should be pushed down
        // into getXMLCompiledStatement)
        m_sql = m_sql.trim();
        if (m_sql.length() > 6 && m_sql.substring(0,6).toUpperCase().startsWith("UPSERT")) { // ENG-7395
            m_isUpsert = true;
            m_sql = "INSERT" + m_sql.substring(6);
        }

        // use HSQLDB to get XML that describes the semantics of the statement
        // this is much easier to parse than SQL and is checked against the catalog
        try {
            m_xmlSQL = m_HSQL.getXMLCompiledStatement(m_sql);
            //* enable to debug */ System.out.println("DEBUG: HSQL parsed:" + m_xmlSQL);
        }
        catch (HSQLParseException e) {
            // XXXLOG probably want a real log message here
            throw new PlanningErrorException(e.getMessage());
        }

        if (m_isUpsert) {
            assert(m_xmlSQL.name.equalsIgnoreCase("INSERT"));
            // for AdHoc cache distinguish purpose which is based on the XML
            m_xmlSQL.attributes.put(UPSERT_TAG, "true");
        }

        m_planSelector.outputCompiledStatement(m_xmlSQL);
    }

    // Generate a Volt XML tree for a hypothetical SWAP TABLE statement.
    // This can take any form that makes it easy for the planner to turn
    // it into an AbstractParsedStmt and then an AbstractPlanNode (tree).
    private VoltXMLElement forgeVoltXMLForSwapTables(
            String theTable, String otherTable) {
        if (theTable.equalsIgnoreCase(otherTable)) {
            throw new PlanningErrorException("Can not swap table \"" + theTable + "\" with itself.");
        }
        VoltXMLElement result = new VoltXMLElement("swap");
        result.attributes.put("thetable", theTable.toUpperCase());
        result.attributes.put("othertable", otherTable.toUpperCase());
        return result;
    }

    /**
     * This method behaves similarly to parse(), but allows the caller to pass in XML
     * to avoid re-parsing SQL text that has already gone through HSQL.
     *
     * @param  xmlSql  XML produced by previous invocation of HSQL
     * */
    public void parseFromXml(VoltXMLElement xmlSQL) {
        m_recentErrorMsg = null;
        m_xmlSQL = xmlSQL;
        if (m_xmlSQL.attributes.containsKey(UPSERT_TAG)) {
            m_isUpsert = true;
        }

        m_planSelector.outputCompiledStatement(m_xmlSQL);
    }

    /**
     * Auto-parameterize all of the literals in the parsed SQL statement.
     *
     * @return An opaque token representing the parsed statement with (possibly) parameterization.
     */
    public String parameterize() {
        m_paramzInfo = ParameterizationInfo.parameterize(m_xmlSQL);

        Set<Integer> paramIds = new HashSet<Integer>();
        ParameterizationInfo.findUserParametersRecursively(m_xmlSQL, paramIds);
        m_adhocUserParamsCount = paramIds.size();

        // skip plans with pre-existing parameters and plans that don't parameterize
        // assume a user knows how to cache/optimize these
        if (m_paramzInfo != null) {
            // if requested output the second version of the parsed plan
            m_planSelector.outputParameterizedCompiledStatement(m_paramzInfo.parameterizedXmlSQL);
            return m_paramzInfo.parameterizedXmlSQL.toMinString();
        }

        // fallback when parameterization is
        return m_xmlSQL.toMinString();
    }

    public String[] extractedParamLiteralValues() {
        if (m_paramzInfo == null) {
            return null;
        }
        return m_paramzInfo.paramLiteralValues;
    }

    public ParameterSet extractedParamValues(VoltType[] parameterTypes) throws Exception {
        if (m_paramzInfo == null) {
            return null;
        }
        return m_paramzInfo.extractedParamValues(parameterTypes);
    }

    /**
     * Get the best plan for the SQL statement given, assuming the given costModel.
     *
     * @return The best plan found for the SQL statement.
     * @throws PlanningErrorException on failure.
     */
    public CompiledPlan plan() throws PlanningErrorException {
        // reset any error message
        m_recentErrorMsg = null;

        // what's going to happen next:
        //  If a parameterized statement exists, try to make a plan with it
        //  On success return the plan.
        //  On failure, try the plan again without parameterization

        if (m_paramzInfo != null) {
            try {
                // compile the plan with new parameters
                CompiledPlan plan = compileFromXML(m_paramzInfo.parameterizedXmlSQL,
                                                   m_paramzInfo.paramLiteralValues);
                if (plan != null) {
                    if (m_isUpsert) {
                        replacePlanForUpsert(plan);
                    }
                    if (plan.extractParamValues(m_paramzInfo)) {
                        return plan;
                    }
                }
                else if (m_debuggingStaticModeToRetryOnError) {
                    compileFromXML(m_paramzInfo.parameterizedXmlSQL,
                                   m_paramzInfo.paramLiteralValues);
                }
                // fall through to try replan without parameterization.
            }
            catch (Exception | StackOverflowError e) {
                // ignore any errors planning with parameters
                // fall through to re-planning without them
                m_hasExceptionWhenParameterized = true;

                // note, expect real planning errors ignored here to be thrown again below
                m_recentErrorMsg = null;
                m_partitioning.resetAnalysisState();
            }
        }

        // if parameterization isn't requested or if it failed, plan here
        CompiledPlan plan = compileFromXML(m_xmlSQL, null);
        if (plan == null) {
            if (m_debuggingStaticModeToRetryOnError) {
                plan = compileFromXML(m_xmlSQL, null);
            }
            throw new PlanningErrorException(m_recentErrorMsg);
        }

        if (m_isUpsert) {
            replacePlanForUpsert(plan);
        }
        return plan;
    }

    private static void replacePlanForUpsert (CompiledPlan plan) {
        plan.rootPlanGraph = replaceInsertPlanNodeWithUpsert(plan.rootPlanGraph);
        plan.subPlanGraph  = replaceInsertPlanNodeWithUpsert(plan.subPlanGraph);

        if (plan.explainedPlan != null) {
            plan.explainedPlan = plan.explainedPlan.replace("INSERT", "UPSERT");
        }
    }

    /**
     * @return Was this statement planned with auto-parameterization?
     */
    public boolean compiledAsParameterizedPlan() {
        return m_paramzInfo != null;
    }

    public int getAdhocUserParamsCount() {
        return m_adhocUserParamsCount;
    }

    public boolean wasBadPameterized() {
        return m_hasExceptionWhenParameterized;
    }

    /**
     * Find the best plan given the VoltXMLElement.  By best here we mean the plan
     * which is scored the best according to our plan metric scoring.  The plan
     * metric scoring takes into account join order and index use, but it does
     * not take into account the output schema.  Consequently, we don't compute the
     * output schema for the plan nodes until after the best plan is discovered.
     *
     * The order here is:
     * <ol>
     * <li>
     *   Parse the VoltXMLElement to create an AbstractParsedStatement.  This has
     *   a second effect of loading lists of join orders and access paths for planning.
     *   For us, and access path is a way of scanning something scannable.  It's a generalization
     *   of the notion of scanning a table or an index.
     * </li>
     * <li>
     *   Create a PlanAssembler, and ask it for the best cost plan.  This uses the
     *   side data created by the parser in the previous step.
     * </li>
     * <li>
     *   If the plan is read only, slap a SendPlanNode on the front.  Presumably
     *   an insert, delete or upsert will have added the SendPlanNode into the plan node tree already.
     * </li>
     * <li>
     *   Compute the output schema.  This computes the output schema for each
     *   node recursively, using a node specific method.
     * </li>
     * <li>
     *   Resolve the column indices.  This makes sure that the indices of all
     *   TVEs in the output columns refer to the right input columns.
     * </li>
     * <li>
     *   Do some final cleaning up and verifying of the plan.  For example,
     *   We renumber the nodes starting at 1.
     * </li>
     * </ol>
     *
     * @param xmlSQL
     * @param paramValues
     * @return
     */
    private CompiledPlan compileFromXML(VoltXMLElement xmlSQL, String[] paramValues) {
        // Get a parsed statement from the xml
        // The callers of compilePlan are ready to catch any exceptions thrown here.
        AbstractParsedStmt parsedStmt = AbstractParsedStmt.parse(m_sql, xmlSQL, paramValues, m_db, m_joinOrder);
        if (parsedStmt == null) {
            m_recentErrorMsg = "Failed to parse SQL statement: " + getOriginalSql();
            return null;
        }

        if (m_isUpsert) {
            // no insert/upsert with joins
            if (parsedStmt.m_tableList.size() != 1) {
                m_recentErrorMsg = "UPSERT is supported only with one single table: " + getOriginalSql();
                return null;
            }

            Table tb = parsedStmt.m_tableList.get(0);
            Constraint pkey = null;
            for (Constraint ct: tb.getConstraints()) {
                if (ct.getType() == ConstraintType.PRIMARY_KEY.getValue()) {
                    pkey = ct;
                    break;
                }
            }

            if (pkey == null) {
                m_recentErrorMsg = "Unsupported UPSERT table without primary key: " + getOriginalSql();
                return null;
            }
        }

        m_planSelector.outputParsedStatement(parsedStmt);

        // Init Assembler. Each plan assembler requires a new instance of the PlanSelector
        // to keep track of the best plan
        PlanAssembler assembler = new PlanAssembler(m_db, m_partitioning,
                (PlanSelector) m_planSelector.clone());
        // find the plan with minimal cost
        CompiledPlan bestPlan = assembler.getBestCostPlan(parsedStmt);

        // This processing of bestPlan outside/after getBestCostPlan
        // allows getBestCostPlan to be called both here and
        // in PlanAssembler.getNextUnion on each branch of a union.

        // make sure we got a winner
        if (bestPlan == null) {
            if (m_debuggingStaticModeToRetryOnError) {
                assembler.getBestCostPlan(parsedStmt);
            }
            m_recentErrorMsg = assembler.getErrorMessage();
            if (m_recentErrorMsg == null) {
                m_recentErrorMsg = "Unable to plan for statement. Error unknown.";
            }
            return null;
        }

        if (bestPlan.isReadOnly()) {
            SendPlanNode sendNode = new SendPlanNode();
            // connect the nodes to build the graph
            sendNode.addAndLinkChild(bestPlan.rootPlanGraph);
            // this plan is final, generate schema and resolve all the column index references
            bestPlan.rootPlanGraph = sendNode;
        }

        // Execute the generateOutputSchema and resolveColumnIndexes once for the best plan
        bestPlan.rootPlanGraph.generateOutputSchema(m_db);
        bestPlan.rootPlanGraph.resolveColumnIndexes();

        if (parsedStmt instanceof ParsedSelectStmt) {
            List<SchemaColumn> columns = bestPlan.rootPlanGraph.getOutputSchema().getColumns();
            ((ParsedSelectStmt)parsedStmt).checkPlanColumnMatch(columns);
        }

        // Output the best plan debug info
        assembler.finalizeBestCostPlan();

        // reset all the plan node ids for a given plan
        // this makes the ids deterministic
        bestPlan.resetPlanNodeIds(1);

        // split up the plan everywhere we see send/receive into multiple plan fragments
        List<AbstractPlanNode> receives = bestPlan.rootPlanGraph.findAllNodesOfClass(AbstractReceivePlanNode.class);
        if (receives.size() > 1) {
            // Have too many receive node for two fragment plan limit
            m_recentErrorMsg = "This join of multiple partitioned tables is too complex. "
                    + "Consider simplifying its subqueries: " + getOriginalSql();
            return null;
        }

        /*/ enable for debug ...
        if (receives.size() > 1) {
            System.out.println(plan.rootPlanGraph.toExplainPlanString());
        }
        // ... enable for debug */
        if (receives.size() == 1) {
            AbstractReceivePlanNode recvNode = (AbstractReceivePlanNode) receives.get(0);
            fragmentize(bestPlan, recvNode);
        }

        return bestPlan;
    }

    private static void fragmentize(CompiledPlan plan, AbstractReceivePlanNode recvNode) {
        assert(recvNode.getChildCount() == 1);
        AbstractPlanNode childNode = recvNode.getChild(0);
        assert(childNode instanceof SendPlanNode);
        SendPlanNode sendNode = (SendPlanNode) childNode;

        // disconnect the send and receive nodes
        sendNode.clearParents();
        recvNode.clearChildren();

        plan.subPlanGraph = sendNode;
        return;
    }

    public static AbstractPlanNode replaceInsertPlanNodeWithUpsert(AbstractPlanNode root) {
        if (root == null) {
            return null;
        }

        List<AbstractPlanNode> inserts = root.findAllNodesOfType(PlanNodeType.INSERT);
        if (inserts.size() == 1) {
            InsertPlanNode insertNode = (InsertPlanNode)inserts.get(0);
            insertNode.setUpsert(true);
        }

        return root;
    }

    private String getOriginalSql() {
        if (! m_isUpsert) {
            return m_sql;
        }

        return "UPSERT" + m_sql.substring(6);
    }
}