QueryPlanner.java example

Explorer
h-store-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2010 VoltDB L.L.C.
 *
 * VoltDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * VoltDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.planner;

import java.io.PrintStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.log4j.Logger;
import org.hsqldb.HSQLInterface;
import org.hsqldb.HSQLInterface.HSQLParseException;
import org.json.JSONException;
import org.json.JSONObject;
import org.voltdb.catalog.Cluster;
import org.voltdb.catalog.Database;
import org.voltdb.compiler.DatabaseEstimates;
import org.voltdb.compiler.ScalarValueHints;
import org.voltdb.planner.microoptimizations.MicroOptimizationRunner;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.PlanNodeList;
import org.voltdb.utils.BuildDirectoryUtils;

import edu.brown.logging.LoggerUtil.LoggerBoolean;

/**
 * The query planner accepts catalog data, SQL statements from the catalog, then
 * outputs the plan with the lowest cost according to the cost model.
 *
 */
public class QueryPlanner {
    private static final Logger LOG = Logger.getLogger(QueryPlanner.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    
    private static boolean TPCE_WARNING = false;
    
    PlanAssembler m_assembler;
    HSQLInterface m_HSQL;
    DatabaseEstimates m_estimates;
    Cluster m_cluster;
    Database m_db;
    String m_recentErrorMsg;
    Throwable m_recentError;
    boolean m_useGlobalIds;
    boolean m_quietPlanner;
    final PlannerContext m_context;

    /**
     * Initialize planner with physical schema info and a reference to HSQLDB parser.
     *
     * @param catalogCluster Catalog info about the physical layout of the cluster.
     * @param catalogDb Catalog info about schema, metadata and procedures.
     * @param HSQL HSQLInterface pointer used for parsing SQL into XML.
     * @param useGlobalIds
     */
    public QueryPlanner(Cluster catalogCluster, Database catalogDb,
                        HSQLInterface HSQL, DatabaseEstimates estimates,
                        boolean useGlobalIds, boolean suppressDebugOutput) {
        assert(HSQL != null);
        assert(catalogCluster != null);
        assert(catalogDb != null);

        m_HSQL = HSQL;
        // PAVLO: We have to use the global singleton in order to get the same guids across queries
        m_context = PlannerContext.singleton(); // new PlannerContext();
        m_assembler = new PlanAssembler(m_context, catalogCluster, catalogDb);
        m_db = catalogDb;
        m_cluster = catalogCluster;
        m_estimates = estimates;
        m_useGlobalIds = useGlobalIds;
        m_quietPlanner = suppressDebugOutput;
    }

    /**
     * Get the best plan for the SQL statement given, assuming the given costModel.
     *
     * @param costModel The current cost model to evaluate plans with.
     * @param sql SQL stmt text to be planned.
     * @param stmtName The name of the sql statement to be planned.
     * @param procName The name of the procedure containing the sql statement to be planned.
     * @param singlePartition Is the stmt single-partition?
     * @param paramHints
     * @return The best plan found for the SQL statement or null if none can be found.
     */
    public CompiledPlan compilePlan(AbstractCostModel costModel, String sql, String stmtName, String procName, boolean singlePartition, ScalarValueHints[] paramHints) {
        assert(costModel != null);
        assert(sql != null);
        assert(stmtName != null);
        assert(procName != null);

        // reset any error message
        m_recentErrorMsg = null;

        // set the usage of global ids in the plan assembler
        PlanAssembler.setUseGlobalIds(m_useGlobalIds);

        // use HSQLDB to get XML that describes the semantics of the statement
        // this is much easier to parse than SQL and is checked against the catalog
        String xmlSQL = null;
        try {
            xmlSQL = m_HSQL.getXMLCompiledStatement(sql);
        } catch (HSQLParseException e) {
            if (debug.val) LOG.warn(String.format("Failed to retrieve compiled XML for %s.%s\n%s", procName, stmtName, sql));
            m_recentErrorMsg = e.getMessage();
            return null;
        }

        if (!m_quietPlanner)
        {
            // output the xml from hsql to disk for debugging
            PrintStream xmlDebugOut =
                BuildDirectoryUtils.getDebugOutputPrintStream("statement-hsql-xml", procName + "_" + stmtName + ".xml");
            xmlDebugOut.println(xmlSQL);
            xmlDebugOut.close();
        }

        // get a parsed statement from the xml
        AbstractParsedStmt initialParsedStmt = null;
        try {
            initialParsedStmt = AbstractParsedStmt.parse(sql, xmlSQL, m_db);
        }
        catch (Throwable e) {
            LOG.error(String.format("Failed to parse SQL for %s.%s", procName, stmtName), e);
            m_recentErrorMsg = e.getMessage();
            return null;
        }
        if (initialParsedStmt == null)
        {
            m_recentErrorMsg = "Failed to parse SQL statement: " + sql;
            return null;
        }

        if (!m_quietPlanner)
        {
            // output a description of the parsed stmt
            PrintStream parsedDebugOut =
                BuildDirectoryUtils.getDebugOutputPrintStream("statement-parsed", procName + "_" + stmtName + ".txt");
            parsedDebugOut.println(initialParsedStmt.toString());
            parsedDebugOut.close();
        }

        // get ready to find the plan with minimal cost
        CompiledPlan rawplan = null;
        CompiledPlan bestPlan = null;
        double minCost = Double.MAX_VALUE;

        HashMap<String, String> planOutputs = new HashMap<String, String>();
        HashMap<String, String> dotPlanOutputs = new HashMap<String, String>();
        String winnerName = "";

        // index of the currently being "costed" plan
        int i = 0;

        PlanStatistics stats = null;

        Integer tpce_limit = null;
        
        // iterate though all the variations on the abstract parsed stmts
        for (AbstractParsedStmt parsedStmt : ExpressionEquivalenceProcessor.getEquivalentStmts(initialParsedStmt)) {
            // ---------------------------------------------------------------
            //              uuuuuuuuuuuuuuuuuuuu
            //            u" uuuuuuuuuuuuuuuuuu "u
            //          u" u$$$$$$$$$$$$$$$$$$$$u "u
            //        u" u$$$$$$$$$$$$$$$$$$$$$$$$u "u
            //      u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u
            //    u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u
            //  u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u
            //  $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $
            //  $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $
            //  $ $$$" ... "$...  ...$" ... "$$$  ... "$$$ $
            //  $ $$$u `"$$$$$$$  $$$  $$$$$  $$  $$$  $$$ $
            //  $ $$$$$$uu "$$$$  $$$  $$$$$  $$  """ u$$$ $
            //  $ $$$""$$$  $$$$  $$$u "$$$" u$$  $$$$$$$$ $
            //  $ $$$$....,$$$$$..$$$$$....,$$$$..$$$$$$$$ $
            //  $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $
            //  "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u"
            //    "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u"
            //      "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u"
            //        "u "$$$$$$$$$$$$$$$$$$$$$$$$" u"
            //          "u "$$$$$$$$$$$$$$$$$$$$" u"
            //            "u """""""""""""""""" u"
            //              """"""""""""""""""""
            // ---------------------------------------------------------------
            // PAVLO TERRIBLE TERRIBLE TERRIBLE HACK!!!!
            // If this is the BrokerVolume query in TPC-E, then don't let it go past 25,000 iterations
            // ---------------------------------------------------------------
            if (parsedStmt.sql.contains("TRADE_REQUEST, SECTOR, INDUSTRY, COMPANY, BROKER, SECURITY")) {
                tpce_limit = 25000;
                LOG.warn("PAVLO: Enabled TPC-E BrokerVolume limit: " + tpce_limit);
            }

            // set up the plan assembler for this particular plan
            m_assembler.setupForNewPlans(parsedStmt, singlePartition);

            // loop over all possible plans
            while (true) {

                try {
                    rawplan = m_assembler.getNextPlan();
                }
                // on exception, set the error message and bail...
                catch (Throwable e) {
                    m_recentError = e;
                    m_recentErrorMsg = e.getMessage();
                    return null;
                }

                // stop this while loop when no more plans are generated
                if (rawplan == null)
                    break;

                // run the set of microptimizations, which may return many plans (or not)
                List<CompiledPlan> optimizedPlans = MicroOptimizationRunner.applyAll(rawplan);

                // iterate through the subset of plans
                for (CompiledPlan plan : optimizedPlans) {
                    // HACK: There is one query in TPC-E that our planner always chokes on and gets stuck
                    // in an infinite loop. So we'll print an error message and break out.
                    if (tpce_limit != null && tpce_limit-- <= 0) {
                        if (TPCE_WARNING == false) {
                            TPCE_WARNING = true;
                            LOG.warn("PAVLO: The TPC-E BrokerVolume BREAKOUT! The legend lives on!!!");
                        }
                        break;
                    }

                    // compute resource usage using the single stats collector
                    stats = new PlanStatistics();
                    AbstractPlanNode planGraph = plan.fragments.get(0).planGraph;
                    
                    // compute statistics about a plan
                    boolean result = planGraph.computeEstimatesRecursively(stats, m_cluster, m_db, m_estimates, paramHints);
                    assert(result);

                    // GENERATE JSON DEBUGGING OUTPUT BEFORE WE CLEAN UP THE PlanColumns
                    // convert a tree into an execution list
                    PlanNodeList nodeList = new PlanNodeList(planGraph);

                    // get the json serialized version of the plan
                    String json = "";
//                    try {
//                        String crunchJson = nodeList.toJSONString();
                        //System.out.println(crunchJson);
                        //System.out.flush();
                        /* FIXME
                        JSONObject jobj = new JSONObject(crunchJson);
                        json = jobj.toString(4);
                    } catch (JSONException e2) {
                        // Any plan that can't be serialized to JSON to
                        // write to debugging output is also going to fail
                        // to get written to the catalog, to sysprocs, etc.
                        // Just bail.
                        m_recentErrorMsg = "Plan for sql: '" + sql +
                                           "' can't be serialized to JSON";
                        return null;
                    }
                    */

                    // compute the cost based on the resources using the current cost model
                    double cost = costModel.getPlanCost(stats);

                    // find the minimum cost plan
                    if (cost < minCost) {
                        minCost = cost;
                        // free the PlanColumns held by the previous best plan
                        if (bestPlan != null) {
                            bestPlan.freePlan(m_context, plan.getColumnGuids());
                        }
                        bestPlan = plan;
                    } else {
                        plan.freePlan(m_context, bestPlan.getColumnGuids());
                    }

                    // output a description of the parsed stmt
                    String filename = String.valueOf(i++);
                    if (bestPlan == plan) winnerName = filename;
                    json = "COST: " + String.valueOf(cost) + "\n" + json;
                    planOutputs.put(filename, json);

                    // create a graph friendly version
                    dotPlanOutputs.put(filename, nodeList.toDOTString("name"));
                }
            }
            tpce_limit = null;
        }

        // make sure we got a winner
        if (bestPlan == null) {
            m_recentErrorMsg = "Unable to plan for statement. Error unknown.";
            return null;
        }

        // Validate that everything is there
        Set<Integer> bestPlan_columns = bestPlan.getColumnGuids(); 
        for (Integer column_guid : bestPlan_columns) {
            if (m_context.hasColumn(column_guid) == false) {
                m_recentErrorMsg = "Missing column guid " + column_guid;
                return (null);
            }
        } // FOR
        if (debug.val) LOG.debug(String.format("All columns are there for %s.%s: %s", procName, stmtName, bestPlan_columns));

        // reset all the plan node ids for a given plan
        bestPlan.resetPlanNodeIds();

        if (!m_quietPlanner)
        {
            // print all the plans to disk for debugging
            for (Entry<String, String> output : planOutputs.entrySet()) {
                String filename = output.getKey();
                if (winnerName.equals(filename)) {
                    filename = "WINNER " + filename;
                }
                PrintStream candidatePlanOut =
                    BuildDirectoryUtils.getDebugOutputPrintStream("statement-all-plans/" + procName + "_" + stmtName,
                                                                  filename + ".txt");

                candidatePlanOut.println(output.getValue());
                candidatePlanOut.close();
            }

            for (Entry<String, String> output : dotPlanOutputs.entrySet()) {
                String filename = output.getKey();
                if (winnerName.equals(filename)) {
                    filename = "WINNER " + filename;
                }
                PrintStream candidatePlanOut =
                    BuildDirectoryUtils.getDebugOutputPrintStream("statement-all-plans/" + procName + "_" + stmtName,
                                                                  filename + ".dot");

                candidatePlanOut.println(output.getValue());
                candidatePlanOut.close();
            }

            // output the plan statistics to disk for debugging
            PrintStream plansOut =
                BuildDirectoryUtils.getDebugOutputPrintStream("statement-stats", procName + "_" + stmtName + ".txt");
            plansOut.println(stats.toString());
            plansOut.close();
        }

        // PAVLO: Get the full plan json
        AbstractPlanNode root = bestPlan.fragments.get(0).planGraph;
//        String orig_debug = PlanNodeUtil.debug(root);
        assert(root != null);
        String json = null;
        try {
            JSONObject jobj = new JSONObject(new PlanNodeList(root).toJSONString());
            json = jobj.toString();
        } catch (JSONException e2) {
            throw new RuntimeException(String.format("Failed to serialize JSON query plan for %s.%s", procName, stmtName), e2);
        }
        assert(json != null);
        
        // split up the plan everywhere we see send/recieve into multiple plan fragments
        bestPlan = Fragmentizer.fragmentize(bestPlan, m_db);
        bestPlan.fullplan_json = json;
        if (debug.val) LOG.debug(String.format("Stored serialized JSON query plan for %s.%s", procName, stmtName));
        
        // PAVLO:
//        if (singlePartition == false && procName.equalsIgnoreCase("GetTableCounts") && stmtName.equalsIgnoreCase("HistoryCount")) {
//            System.err.println(sql + "\n+++++++++++++++++++++++++++++++++");
//            
//            System.err.println("ORIGINAL:\n" + orig_debug + StringUtil.SINGLE_LINE);
//            System.err.println("NEW:");
//            
//            for (int ii = 0; ii < bestPlan.fragments.size(); ii++) {
//                Fragment f = bestPlan.fragments.get(ii);
//                System.err.println(String.format("Fragment #%02d\n%s\n", ii, PlanNodeUtil.debug(f.planGraph)));
//            }
//            System.err.println(StringUtil.DOUBLE_LINE);
//            System.exit(1);
//        }

        // DTXN/EE can't handle plans that have more than 2 fragments yet.
//        if (bestPlan.fragments.size() > 2) {
//            m_recentErrorMsg = "Unable to plan for statement. Likely statement is "+
//                "joining two partitioned tables in a multi-partition stamtent. " +
//                "This is not supported at this time.";
//            return null;
//        }

        return bestPlan;
    }

    public PlannerContext getPlannerContext() {
        return m_context;
    }
    public Throwable getError() {
        return m_recentError;
    }
    public String getErrorMessage() {
        return m_recentErrorMsg;
    }
}