/* This file is part of VoltDB. * Copyright (C) 2008-2010 VoltDB L.L.C. * * VoltDB is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * VoltDB is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.planner; import java.io.PrintStream; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; import java.util.Set; import org.apache.log4j.Logger; import org.hsqldb.HSQLInterface; import org.hsqldb.HSQLInterface.HSQLParseException; import org.json.JSONException; import org.json.JSONObject; import org.voltdb.catalog.Cluster; import org.voltdb.catalog.Database; import org.voltdb.compiler.DatabaseEstimates; import org.voltdb.compiler.ScalarValueHints; import org.voltdb.planner.microoptimizations.MicroOptimizationRunner; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.plannodes.PlanNodeList; import org.voltdb.utils.BuildDirectoryUtils; import edu.brown.logging.LoggerUtil.LoggerBoolean; /** * The query planner accepts catalog data, SQL statements from the catalog, then * outputs the plan with the lowest cost according to the cost model. * */ public class QueryPlanner { private static final Logger LOG = Logger.getLogger(QueryPlanner.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); private static boolean TPCE_WARNING = false; PlanAssembler m_assembler; HSQLInterface m_HSQL; DatabaseEstimates m_estimates; Cluster m_cluster; Database m_db; String m_recentErrorMsg; Throwable m_recentError; boolean m_useGlobalIds; boolean m_quietPlanner; final PlannerContext m_context; /** * Initialize planner with physical schema info and a reference to HSQLDB parser. * * @param catalogCluster Catalog info about the physical layout of the cluster. * @param catalogDb Catalog info about schema, metadata and procedures. * @param HSQL HSQLInterface pointer used for parsing SQL into XML. * @param useGlobalIds */ public QueryPlanner(Cluster catalogCluster, Database catalogDb, HSQLInterface HSQL, DatabaseEstimates estimates, boolean useGlobalIds, boolean suppressDebugOutput) { assert(HSQL != null); assert(catalogCluster != null); assert(catalogDb != null); m_HSQL = HSQL; // PAVLO: We have to use the global singleton in order to get the same guids across queries m_context = PlannerContext.singleton(); // new PlannerContext(); m_assembler = new PlanAssembler(m_context, catalogCluster, catalogDb); m_db = catalogDb; m_cluster = catalogCluster; m_estimates = estimates; m_useGlobalIds = useGlobalIds; m_quietPlanner = suppressDebugOutput; } /** * Get the best plan for the SQL statement given, assuming the given costModel. * * @param costModel The current cost model to evaluate plans with. * @param sql SQL stmt text to be planned. * @param stmtName The name of the sql statement to be planned. * @param procName The name of the procedure containing the sql statement to be planned. * @param singlePartition Is the stmt single-partition? * @param paramHints * @return The best plan found for the SQL statement or null if none can be found. */ public CompiledPlan compilePlan(AbstractCostModel costModel, String sql, String stmtName, String procName, boolean singlePartition, ScalarValueHints[] paramHints) { assert(costModel != null); assert(sql != null); assert(stmtName != null); assert(procName != null); // reset any error message m_recentErrorMsg = null; // set the usage of global ids in the plan assembler PlanAssembler.setUseGlobalIds(m_useGlobalIds); // use HSQLDB to get XML that describes the semantics of the statement // this is much easier to parse than SQL and is checked against the catalog String xmlSQL = null; try { xmlSQL = m_HSQL.getXMLCompiledStatement(sql); } catch (HSQLParseException e) { if (debug.val) LOG.warn(String.format("Failed to retrieve compiled XML for %s.%s\n%s", procName, stmtName, sql)); m_recentErrorMsg = e.getMessage(); return null; } if (!m_quietPlanner) { // output the xml from hsql to disk for debugging PrintStream xmlDebugOut = BuildDirectoryUtils.getDebugOutputPrintStream("statement-hsql-xml", procName + "_" + stmtName + ".xml"); xmlDebugOut.println(xmlSQL); xmlDebugOut.close(); } // get a parsed statement from the xml AbstractParsedStmt initialParsedStmt = null; try { initialParsedStmt = AbstractParsedStmt.parse(sql, xmlSQL, m_db); } catch (Throwable e) { LOG.error(String.format("Failed to parse SQL for %s.%s", procName, stmtName), e); m_recentErrorMsg = e.getMessage(); return null; } if (initialParsedStmt == null) { m_recentErrorMsg = "Failed to parse SQL statement: " + sql; return null; } if (!m_quietPlanner) { // output a description of the parsed stmt PrintStream parsedDebugOut = BuildDirectoryUtils.getDebugOutputPrintStream("statement-parsed", procName + "_" + stmtName + ".txt"); parsedDebugOut.println(initialParsedStmt.toString()); parsedDebugOut.close(); } // get ready to find the plan with minimal cost CompiledPlan rawplan = null; CompiledPlan bestPlan = null; double minCost = Double.MAX_VALUE; HashMap<String, String> planOutputs = new HashMap<String, String>(); HashMap<String, String> dotPlanOutputs = new HashMap<String, String>(); String winnerName = ""; // index of the currently being "costed" plan int i = 0; PlanStatistics stats = null; Integer tpce_limit = null; // iterate though all the variations on the abstract parsed stmts for (AbstractParsedStmt parsedStmt : ExpressionEquivalenceProcessor.getEquivalentStmts(initialParsedStmt)) { // --------------------------------------------------------------- // uuuuuuuuuuuuuuuuuuuu // u" uuuuuuuuuuuuuuuuuu "u // u" u$$$$$$$$$$$$$$$$$$$$u "u // u" u$$$$$$$$$$$$$$$$$$$$$$$$u "u // u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u // u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u // u" u$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$u "u // $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ // $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ // $ $$$" ... "$... ...$" ... "$$$ ... "$$$ $ // $ $$$u `"$$$$$$$ $$$ $$$$$ $$ $$$ $$$ $ // $ $$$$$$uu "$$$$ $$$ $$$$$ $$ """ u$$$ $ // $ $$$""$$$ $$$$ $$$u "$$$" u$$ $$$$$$$$ $ // $ $$$$....,$$$$$..$$$$$....,$$$$..$$$$$$$$ $ // $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ // "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u" // "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u" // "u "$$$$$$$$$$$$$$$$$$$$$$$$$$$$" u" // "u "$$$$$$$$$$$$$$$$$$$$$$$$" u" // "u "$$$$$$$$$$$$$$$$$$$$" u" // "u """""""""""""""""" u" // """""""""""""""""""" // --------------------------------------------------------------- // PAVLO TERRIBLE TERRIBLE TERRIBLE HACK!!!! // If this is the BrokerVolume query in TPC-E, then don't let it go past 25,000 iterations // --------------------------------------------------------------- if (parsedStmt.sql.contains("TRADE_REQUEST, SECTOR, INDUSTRY, COMPANY, BROKER, SECURITY")) { tpce_limit = 25000; LOG.warn("PAVLO: Enabled TPC-E BrokerVolume limit: " + tpce_limit); } // set up the plan assembler for this particular plan m_assembler.setupForNewPlans(parsedStmt, singlePartition); // loop over all possible plans while (true) { try { rawplan = m_assembler.getNextPlan(); } // on exception, set the error message and bail... catch (Throwable e) { m_recentError = e; m_recentErrorMsg = e.getMessage(); return null; } // stop this while loop when no more plans are generated if (rawplan == null) break; // run the set of microptimizations, which may return many plans (or not) List<CompiledPlan> optimizedPlans = MicroOptimizationRunner.applyAll(rawplan); // iterate through the subset of plans for (CompiledPlan plan : optimizedPlans) { // HACK: There is one query in TPC-E that our planner always chokes on and gets stuck // in an infinite loop. So we'll print an error message and break out. if (tpce_limit != null && tpce_limit-- <= 0) { if (TPCE_WARNING == false) { TPCE_WARNING = true; LOG.warn("PAVLO: The TPC-E BrokerVolume BREAKOUT! The legend lives on!!!"); } break; } // compute resource usage using the single stats collector stats = new PlanStatistics(); AbstractPlanNode planGraph = plan.fragments.get(0).planGraph; // compute statistics about a plan boolean result = planGraph.computeEstimatesRecursively(stats, m_cluster, m_db, m_estimates, paramHints); assert(result); // GENERATE JSON DEBUGGING OUTPUT BEFORE WE CLEAN UP THE PlanColumns // convert a tree into an execution list PlanNodeList nodeList = new PlanNodeList(planGraph); // get the json serialized version of the plan String json = ""; // try { // String crunchJson = nodeList.toJSONString(); //System.out.println(crunchJson); //System.out.flush(); /* FIXME JSONObject jobj = new JSONObject(crunchJson); json = jobj.toString(4); } catch (JSONException e2) { // Any plan that can't be serialized to JSON to // write to debugging output is also going to fail // to get written to the catalog, to sysprocs, etc. // Just bail. m_recentErrorMsg = "Plan for sql: '" + sql + "' can't be serialized to JSON"; return null; } */ // compute the cost based on the resources using the current cost model double cost = costModel.getPlanCost(stats); // find the minimum cost plan if (cost < minCost) { minCost = cost; // free the PlanColumns held by the previous best plan if (bestPlan != null) { bestPlan.freePlan(m_context, plan.getColumnGuids()); } bestPlan = plan; } else { plan.freePlan(m_context, bestPlan.getColumnGuids()); } // output a description of the parsed stmt String filename = String.valueOf(i++); if (bestPlan == plan) winnerName = filename; json = "COST: " + String.valueOf(cost) + "\n" + json; planOutputs.put(filename, json); // create a graph friendly version dotPlanOutputs.put(filename, nodeList.toDOTString("name")); } } tpce_limit = null; } // make sure we got a winner if (bestPlan == null) { m_recentErrorMsg = "Unable to plan for statement. Error unknown."; return null; } // Validate that everything is there Set<Integer> bestPlan_columns = bestPlan.getColumnGuids(); for (Integer column_guid : bestPlan_columns) { if (m_context.hasColumn(column_guid) == false) { m_recentErrorMsg = "Missing column guid " + column_guid; return (null); } } // FOR if (debug.val) LOG.debug(String.format("All columns are there for %s.%s: %s", procName, stmtName, bestPlan_columns)); // reset all the plan node ids for a given plan bestPlan.resetPlanNodeIds(); if (!m_quietPlanner) { // print all the plans to disk for debugging for (Entry<String, String> output : planOutputs.entrySet()) { String filename = output.getKey(); if (winnerName.equals(filename)) { filename = "WINNER " + filename; } PrintStream candidatePlanOut = BuildDirectoryUtils.getDebugOutputPrintStream("statement-all-plans/" + procName + "_" + stmtName, filename + ".txt"); candidatePlanOut.println(output.getValue()); candidatePlanOut.close(); } for (Entry<String, String> output : dotPlanOutputs.entrySet()) { String filename = output.getKey(); if (winnerName.equals(filename)) { filename = "WINNER " + filename; } PrintStream candidatePlanOut = BuildDirectoryUtils.getDebugOutputPrintStream("statement-all-plans/" + procName + "_" + stmtName, filename + ".dot"); candidatePlanOut.println(output.getValue()); candidatePlanOut.close(); } // output the plan statistics to disk for debugging PrintStream plansOut = BuildDirectoryUtils.getDebugOutputPrintStream("statement-stats", procName + "_" + stmtName + ".txt"); plansOut.println(stats.toString()); plansOut.close(); } // PAVLO: Get the full plan json AbstractPlanNode root = bestPlan.fragments.get(0).planGraph; // String orig_debug = PlanNodeUtil.debug(root); assert(root != null); String json = null; try { JSONObject jobj = new JSONObject(new PlanNodeList(root).toJSONString()); json = jobj.toString(); } catch (JSONException e2) { throw new RuntimeException(String.format("Failed to serialize JSON query plan for %s.%s", procName, stmtName), e2); } assert(json != null); // split up the plan everywhere we see send/recieve into multiple plan fragments bestPlan = Fragmentizer.fragmentize(bestPlan, m_db); bestPlan.fullplan_json = json; if (debug.val) LOG.debug(String.format("Stored serialized JSON query plan for %s.%s", procName, stmtName)); // PAVLO: // if (singlePartition == false && procName.equalsIgnoreCase("GetTableCounts") && stmtName.equalsIgnoreCase("HistoryCount")) { // System.err.println(sql + "\n+++++++++++++++++++++++++++++++++"); // // System.err.println("ORIGINAL:\n" + orig_debug + StringUtil.SINGLE_LINE); // System.err.println("NEW:"); // // for (int ii = 0; ii < bestPlan.fragments.size(); ii++) { // Fragment f = bestPlan.fragments.get(ii); // System.err.println(String.format("Fragment #%02d\n%s\n", ii, PlanNodeUtil.debug(f.planGraph))); // } // System.err.println(StringUtil.DOUBLE_LINE); // System.exit(1); // } // DTXN/EE can't handle plans that have more than 2 fragments yet. // if (bestPlan.fragments.size() > 2) { // m_recentErrorMsg = "Unable to plan for statement. Likely statement is "+ // "joining two partitioned tables in a multi-partition stamtent. " + // "This is not supported at this time."; // return null; // } return bestPlan; } public PlannerContext getPlannerContext() { return m_context; } public Throwable getError() { return m_recentError; } public String getErrorMessage() { return m_recentErrorMsg; } }