package edu.brown.optimizer; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.apache.commons.collections15.set.ListOrderedSet; import org.apache.log4j.Logger; import org.voltdb.VoltType; import org.voltdb.catalog.Column; import org.voltdb.catalog.Table; import org.voltdb.expressions.AbstractExpression; import org.voltdb.expressions.TupleValueExpression; import org.voltdb.planner.PlanColumn; import org.voltdb.plannodes.AbstractJoinPlanNode; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.plannodes.AbstractScanPlanNode; import org.voltdb.plannodes.AggregatePlanNode; import org.voltdb.plannodes.DistinctPlanNode; import org.voltdb.plannodes.IndexScanPlanNode; import org.voltdb.plannodes.LimitPlanNode; import org.voltdb.plannodes.NestLoopIndexPlanNode; import org.voltdb.plannodes.NestLoopPlanNode; import org.voltdb.plannodes.OrderByPlanNode; import org.voltdb.plannodes.ProjectionPlanNode; import org.voltdb.plannodes.ReceivePlanNode; import org.voltdb.plannodes.SendPlanNode; import org.voltdb.types.ExpressionType; import org.voltdb.types.PlanNodeType; import org.voltdb.utils.Pair; import edu.brown.catalog.CatalogUtil; import edu.brown.expressions.ExpressionTreeWalker; import edu.brown.expressions.ExpressionUtil; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.plannodes.PlanNodeTreeWalker; import edu.brown.plannodes.PlanNodeUtil; import edu.brown.utils.CollectionUtil; public abstract class PlanOptimizerUtil { private static final Logger LOG = Logger.getLogger(PlanOptimizerUtil.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } /** * * @param state * @param orig_pc * @param output_cols * @return */ protected static Pair<PlanColumn, Integer> findMatchingColumn(final PlanOptimizerState state, PlanColumn orig_pc, List<Integer> output_cols) { PlanColumn new_pc = null; int new_idx = 0; for (Integer new_guid : output_cols) { new_pc = state.plannerContext.get(new_guid); assert (new_pc != null) : "Unexpected PlanColumn #" + new_guid; if (new_pc.equals(orig_pc, true, true)) { if (trace.val) LOG.trace(String.format("[%02d] Found non-expression PlanColumn match:\nORIG: %s\nNEW: %s", new_idx, orig_pc, new_pc)); break; } new_pc = null; new_idx++; } // FOR return (new_pc != null ? Pair.of(new_pc, new_idx) : null); } /** * Populates the two data structures with information on the planNodes and * Tables and their referenced columns * @param state * @param rootNode */ public static void populateTableNodeInfo(final PlanOptimizerState state, final AbstractPlanNode rootNode) { // Traverse tree and build up our data structures that maps all nodes to // the columns they affect new PlanNodeTreeWalker(true) { @Override protected void callback(AbstractPlanNode element) { try { extractColumnInfo(state, element, this.getDepth() == 0); } catch (Exception ex) { if (debug.val) LOG.fatal(PlanNodeUtil.debug(rootNode)); throw new RuntimeException("Failed to extract column information for " + element, ex); } } }.traverse(rootNode); } /** * Populate the mappings between AbstractPlanNodes and the tableNames, and * the element id(?) to set of columns * @param state * @param rootNode */ public static void populateJoinTableInfo(final PlanOptimizerState state, final AbstractPlanNode rootNode) { final Set<String> join_tbls = new HashSet<String>(); // Traverse from the bottom up and figure out what tables are referenced // in each AbstractJoinPlanNode for (AbstractPlanNode leaf : PlanNodeUtil.getLeafPlanNodes(rootNode)) { new PlanNodeTreeWalker(false, true) { @Override protected void callback(AbstractPlanNode element) { // --------------------------------------------------- // AbstractScanPlanNode // --------------------------------------------------- if (element instanceof AbstractScanPlanNode) { join_tbls.add(((AbstractScanPlanNode) element).getTargetTableName()); } // --------------------------------------------------- // AbstractJoinPlanNode // --------------------------------------------------- else if (element instanceof AbstractJoinPlanNode) { if (debug.val) LOG.debug("Updating the list of tables joined at " + element); // We don't NestLoopPlanNode for now assert ((element instanceof NestLoopPlanNode) == false); // Get target table of inline scan Collection<AbstractScanPlanNode> inline_nodes = element.getInlinePlanNodes(AbstractScanPlanNode.class); assert (inline_nodes.isEmpty() == false); AbstractScanPlanNode inline_scan_node = CollectionUtil.first(inline_nodes); assert (inline_scan_node != null); join_tbls.add(inline_scan_node.getTargetTableName()); // Add all of the tables that we've seen at this point // in the tree state.join_tbl_mapping.put(element, new HashSet<String>(join_tbls)); // Add to join index map which depth is the index state.join_node_index.put(this.getDepth(), (AbstractJoinPlanNode) element); Map<String, Integer> single_join_node_output = new HashMap<String, Integer>(); for (int i = 0; i < element.getOutputColumnGUIDCount(); i++) { int guid = element.getOutputColumnGUID(i); PlanColumn pc = state.plannerContext.get(guid); single_join_node_output.put(pc.getDisplayName(), i); } // FOR state.join_outputs.put((AbstractJoinPlanNode) element, single_join_node_output); } } }.traverse(leaf); } // FOR } /** * * @param state * @param node * @param is_root * @throws Exception */ protected static void extractColumnInfo(final PlanOptimizerState state, final AbstractPlanNode node, final boolean is_root) throws Exception { if (trace.val) LOG.trace("Extracting Column Info for " + node); // Store the original output column information per node if (state.orig_node_output.containsKey(node) == false) { if (trace.val) LOG.trace("Storing original PlanNode output information for " + node); state.orig_node_output.put(node, new ArrayList<Integer>(node.getOutputColumnGUIDs())); } // Get all of the AbstractExpression roots for this node final Collection<AbstractExpression> exps = PlanNodeUtil.getExpressionsForPlanNode(node); // If this is the root node, then include the output columns + also // include output columns if its a projection or limit node if (is_root || node instanceof ProjectionPlanNode | node instanceof LimitPlanNode) { for (Integer col_guid : node.getOutputColumnGUIDs()) { PlanColumn col = state.plannerContext.get(col_guid); assert (col != null) : "Invalid PlanColumn #" + col_guid; if (col.getExpression() != null) { exps.add(col.getExpression()); // root_column_expressions.addAll(ExpressionUtil.getExpressions(col.getExpression(), // TupleValueExpression.class)); } } // FOR } // PlanNode specific extractions // --------------------------------------------------- // AGGREGATE // --------------------------------------------------- if (node instanceof AggregatePlanNode) { AggregatePlanNode agg_node = (AggregatePlanNode) node; for (Integer col_guid : agg_node.getAggregateColumnGuids()) { PlanColumn col = state.plannerContext.get(col_guid); assert (col != null) : "Invalid PlanColumn #" + col_guid; if (col.getExpression() != null) exps.add(col.getExpression()); } // FOR for (Integer col_guid : agg_node.getGroupByColumnGuids()) { PlanColumn col = state.plannerContext.get(col_guid); assert (col != null) : "Invalid PlanColumn #" + col_guid; if (col.getExpression() != null) exps.add(col.getExpression()); } // FOR // --------------------------------------------------- // ORDER BY // --------------------------------------------------- } else if (node instanceof OrderByPlanNode) { OrderByPlanNode orby_node = (OrderByPlanNode) node; for (Integer col_guid : orby_node.getSortColumnGuids()) { PlanColumn col = state.plannerContext.get(col_guid); assert (col != null) : "Invalid PlanColumn #" + col_guid; if (col.getExpression() != null) exps.add(col.getExpression()); } // FOR } if (debug.val) LOG.debug("Extracted " + exps.size() + " expressions from " + node); // Now go through our expressions and extract out the columns that are // referenced StringBuilder sb = new StringBuilder(); for (AbstractExpression exp : exps) { for (Column catalog_col : ExpressionUtil.getReferencedColumns(state.catalog_db, exp)) { if (trace.val) sb.append(String.format("\n%s => %s", node, catalog_col.fullName())); state.addTableColumn(catalog_col); state.addPlanNodeColumn(node, catalog_col); } // FOR } // FOR if (trace.val && sb.length() > 0) LOG.trace("Extracted Column References:" + sb); // Populate our map from Column objects to PlanColumn GUIDs for (Integer col_guid : node.getOutputColumnGUIDs()) { PlanColumn col = state.plannerContext.get(col_guid); assert (col != null) : "Invalid PlanColumn #" + col_guid; if (col.getExpression() != null) { Collection<Column> catalog_cols = ExpressionUtil.getReferencedColumns(state.catalog_db, col.getExpression()); // If there is more than one column, then it's some sort of // compound expression // So we don't want to include in our mapping if (catalog_cols.size() == 1) { state.addColumnMapping(CollectionUtil.first(catalog_cols), col_guid); } } } // FOR } // ------------------------------------------------------------ // QUERY PLAN HELPERS // ------------------------------------------------------------ /** * Walk up the tree in reverse so that we get all the Column offsets right * @param state * @param rootNode * @param force * @return */ public static boolean updateAllColumns(final PlanOptimizerState state, final AbstractPlanNode rootNode, final boolean force) { for (AbstractPlanNode leafNode : PlanNodeUtil.getLeafPlanNodes(rootNode)) { new PlanNodeTreeWalker(false, true) { @Override protected void callback(AbstractPlanNode element) { if (trace.val) LOG.trace("CURRENT:\n" + PlanNodeUtil.debugNode(element)); // --------------------------------------------------- // JOIN // --------------------------------------------------- if (element instanceof AbstractJoinPlanNode) { if ((state.areChildrenDirty(element) || force) && PlanOptimizerUtil.updateJoinsColumns(state, (AbstractJoinPlanNode) element) == false) { this.stop(); return; } // --------------------------------------------------- // ORDER BY // --------------------------------------------------- } else if (element instanceof OrderByPlanNode) { if ((state.areChildrenDirty(element) || force) && PlanOptimizerUtil.updateOrderByColumns(state, (OrderByPlanNode) element) == false) { this.stop(); return; } } // --------------------------------------------------- // AGGREGATE // --------------------------------------------------- else if (element instanceof AggregatePlanNode) { if ((state.areChildrenDirty(element) || force) && PlanOptimizerUtil.updateAggregateColumns(state, (AggregatePlanNode) element) == false) { this.stop(); return; } } // --------------------------------------------------- // DISTINCT // --------------------------------------------------- else if (element instanceof DistinctPlanNode) { if ((state.areChildrenDirty(element) || force) && PlanOptimizerUtil.updateDistinctColumns(state, (DistinctPlanNode) element) == false) { this.stop(); return; } } // --------------------------------------------------- // PROJECTION // --------------------------------------------------- else if (element instanceof ProjectionPlanNode) { if ((state.areChildrenDirty(element) || force) && PlanOptimizerUtil.updateProjectionColumns(state, (ProjectionPlanNode) element) == false) { this.stop(); return; } } // --------------------------------------------------- // SEND + RECIEVE + LIMIT // --------------------------------------------------- else if (element instanceof SendPlanNode || element instanceof ReceivePlanNode || element instanceof LimitPlanNode) { // I think we should always call this to ensure that our // offsets are ok // This might be because we don't call whatever that // bastardized // AbstractPlanNode.updateOutputColumns() that messes // everything up for us if (element instanceof LimitPlanNode || state.areChildrenDirty(element)) { if (element.getChildPlanNodeCount() != 1) { LOG.warn("Invalid PlanNode Tree:\n" + PlanNodeUtil.debug(rootNode)); } assert (element.getChildPlanNodeCount() == 1) : String.format("%s has %d children when it should have one: %s", element, element.getChildPlanNodeCount(), element.getChildren()); AbstractPlanNode child_node = element.getChild(0); assert (child_node != null); element.setOutputColumns(child_node.getOutputColumnGUIDs()); PlanOptimizerUtil.updateOutputOffsets(state, element); if (trace.val) LOG.trace("Set Output Columns for " + element + "\n" + PlanNodeUtil.debugNode(element)); } } } }.traverse(leafNode); } return (true); } /** * Update DISTINCT columns * @param state * @param node * @return */ public static boolean updateDistinctColumns(final PlanOptimizerState state, DistinctPlanNode node) { // We really have one child here assert (node.getChildPlanNodeCount() == 1) : node; AbstractPlanNode child_node = node.getChild(0); assert (child_node != null); // Find the offset of our distinct column in our output. That will // tell us where to get the guid in the input table information int orig_guid = node.getDistinctColumnGuid(); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); node.setOutputColumns(child_node.getOutputColumnGUIDs()); // PlanColumn new_pc = null; // int new_idx = 0; // for (Integer guid : node.getOutputColumnGUIDs()) { // PlanColumn pc = state.m_context.get(guid); // assert (pc != null); // if (pc.equals(orig_pc, true, true)) { // if (trace.val) // LOG.trace(String.format("[%02d] Found non-expression PlanColumn match:\nORIG: %s\nNEW: %s", // new_idx, orig_pc, pc)); // new_pc = pc; // break; // } // new_idx++; // } // FOR // assert (new_pc != null); // // // // // Now we can update output columns and set the distinct column to be // // the guid // node.setDistinctColumnGuid(new_pc.guid()); PlanColumn found = null; for (Integer new_guid : node.getOutputColumnGUIDs()) { PlanColumn new_pc = state.plannerContext.get(new_guid); assert (new_pc != null); if (new_pc.equals(orig_pc, true, true)) { found = new_pc; break; } } // FOR assert(found != null) : "Failed to find DistinctColumn " + orig_pc + " in " + node + " output columns"; node.setDistinctColumnGuid(found.guid()); state.markDirty(node); if (debug.val) LOG.debug(String.format("Updated %s with proper distinct column guid: ORIG[%d] => NEW[%d]", node, orig_guid, found.guid())); return (true); } /** * Update OrderBy columns * @param state * @param node * @return */ public static boolean updateOrderByColumns(final PlanOptimizerState state, OrderByPlanNode node) { if (debug.val) LOG.debug("Updating Sort Columns for " + node); // We really have one child here assert (node.getChildPlanNodeCount() == 1) : node; AbstractPlanNode child_node = node.getChild(0); assert (child_node != null); node.setOutputColumns(child_node.getOutputColumnGUIDs()); // updateOutputOffsets(state, node); // Look at each of the SortColumns and make sure that it references a // PlanColumn // that is in our child node's output PlanColumns for (int i = 0, cnt = node.getSortColumnGuids().size(); i < cnt; i++) { int orig_guid = node.getSortColumnGuids().get(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); if (trace.val) LOG.trace("Looking for matching PlanColumn: " + orig_pc); // We can't use the offset of original sort PlanColumn because the // number of output // columns in our child may have changed. So we need to loop through // and find the one // that references the same value. This will probably not work if // they are trying to do // a sort on an aggregate output value... Pair<PlanColumn, Integer> p = findMatchingColumn(state, orig_pc, node.getOutputColumnGUIDs()); PlanColumn new_pc = p.getFirst(); assert (new_pc != null); node.getSortColumnGuids().set(i, new_pc.guid()); // // XXX: Can we just loop through all our PlanColumns and find the // one we want? // if (new_pc == null) { // for (PlanColumn pc : state.plannerContext.getAllPlanColumns()) { // if (pc.equals(orig_pc, true, true)) { // new_pc = pc; // } // } // FOR // } // if (new_pc == null) { // LOG.error(String.format("[%02d] Failed to find %s", i, orig_pc)); // if (trace.val) LOG.error("PlannerContext Dump:\n" + // state.plannerContext.debug()); // } // assert (new_pc != null); // if (trace.val) LOG.trace(String.format("[%02d] %s", i, // new_pc)); // node.getSortColumnGuids().set(i, new_pc.guid()); } // FOR state.markDirty(node); if (debug.val) LOG.debug(String.format("Updated %s with proper orderby column guid", node)); return (true); } /** * Update AggregatePlanNode columns * @param state * @param node * @return */ public static boolean updateAggregateColumns(final PlanOptimizerState state, AggregatePlanNode node) { // We really have one child here assert (node.getChildPlanNodeCount() == 1) : node; AbstractPlanNode child_node = node.getChild(0); assert (child_node != null); for (int i = 0, cnt = node.getAggregateColumnGuids().size(); i < cnt; i++) { Integer orig_guid = node.getAggregateColumnGuids().get(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); PlanColumn new_pc = null; int new_idx = 0; for (Integer guid : child_node.getOutputColumnGUIDs()) { PlanColumn pc = state.plannerContext.get(guid); assert (pc != null); if (pc.equals(orig_pc, true, true)) { if (trace.val) LOG.trace(String.format("[%02d] Found non-expression PlanColumn match:\nORIG: %s\nNEW: %s", new_idx, orig_pc, pc)); new_pc = pc; break; } new_idx++; } // FOR if (new_pc == null) { LOG.error(String.format("Failed to find %d => %s\n", new_idx, new_pc)); LOG.error(PlanNodeUtil.debug(PlanNodeUtil.getRoot(node))); } assert (new_pc != null) : String.format("Failed to find %s at offset %d for %s", orig_pc, i, node); node.getAggregateColumnGuids().set(i, new_pc.guid()); } // FOR // Need to update output column guids for GROUP BYs... for (int i = 0, cnt = node.getGroupByColumnGuids().size(); i < cnt; i++) { Integer orig_guid = node.getGroupByColumnGuids().get(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); Pair<PlanColumn, Integer> p = findMatchingColumn(state, orig_pc, child_node.getOutputColumnGUIDs()); if (p == null) { LOG.error(String.format("Failed to find %s's output %s from child node %s", node, orig_pc, child_node)); LOG.error(PlanNodeUtil.debug(PlanNodeUtil.getRoot(node))); } assert (p != null) : String.format("Failed to find %s's output %s from child node %s", node, orig_pc, child_node); PlanColumn new_pc = p.getFirst(); assert (new_pc != null); if (debug.val) LOG.debug(String.format("[%02d] Setting %s GroupByColumnGuid to %s", i, node, new_pc)); node.getGroupByColumnGuids().set(i, new_pc.guid()); } // FOR for (int i = 0, cnt = node.getOutputColumnGUIDs().size(); i < cnt; i++) { Integer orig_guid = node.getOutputColumnGUIDs().get(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); // XXX: We might need to do something different if this part of our // aggregate output if (node.getAggregateOutputColumns().contains(i) == false) { Pair<PlanColumn, Integer> p = findMatchingColumn(state, orig_pc, child_node.getOutputColumnGUIDs()); PlanColumn new_pc = p.getFirst(); assert (new_pc != null); if (debug.val) LOG.debug(String.format("[%02d] Setting %s OutputColumnGUID to %s", i, node, new_pc)); node.getOutputColumnGUIDs().set(i, new_pc.guid()); } } // FOR // System.err.println(this.sql); // System.err.println("AGGREGATE_OUTPUT_COLUMNS: " + // agg_node.getAggregateOutputColumns()); // System.err.println("AGGREGATE_OUTPUT_COLUMN_GUIDS: " + // agg_node.getAggregateColumnGuids()); // System.err.println("AGGREGATE_OUTPUT_COLUMN_NAMES: " + // agg_node.getAggregateColumnNames()); // System.err.println("AGGREGATE_OUTPUT_COLUMN_TYPES: " + // agg_node.getAggregateTypes()); // System.err.println("ORIG_CHILD_OUTPUT: " + orig_child_output); // System.err.println("NEW_CHILD_OUTPUT: " + // child_node.getOutputColumnGUIDs()); // System.err.println(PlanNodeUtil.debug(PlanNodeUtil.getRoot(agg_node))); state.markDirty(node); if (debug.val) LOG.debug(String.format("Updated %s with %d proper aggregate column guids", node, node.getAggregateColumnGuids().size())); return (true); } /** * * @param state * @param node * @return */ public static boolean updateProjectionColumns(final PlanOptimizerState state, final ProjectionPlanNode node) { assert (node.getChildPlanNodeCount() == 1) : node; final AbstractPlanNode child_node = node.getChild(0); assert (child_node != null); final List<Integer> orig_child_guids = state.orig_node_output.get(child_node); for (int i = 0, cnt = node.getOutputColumnGUIDCount(); i < cnt; i++) { // Check to make sure that the offset in the tuple value expression // matches int orig_guid = node.getOutputColumnGUID(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); // Fix all of the offsets in the ExpressionTree // We have to clone it so that we don't mess up anybody else that // may be referencing the same PlanColumn AbstractExpression new_exp = null; try { new_exp = (AbstractExpression) orig_pc.getExpression().clone(); } catch (Exception ex) { throw new RuntimeException("Unable to clone " + orig_pc, ex); } try { new ExpressionTreeWalker() { @Override protected void callback(AbstractExpression exp_element) { if (exp_element instanceof TupleValueExpression) { TupleValueExpression tv_exp = (TupleValueExpression) exp_element; int orig_idx = tv_exp.getColumnIndex(); PlanColumn orig_child_pc = null; // If this is referencing a column that we don't // have a direct link to // then we will see if we can match one based on its // name if (orig_idx >= orig_child_guids.size()) { for (Integer orig_child_guid : child_node.getOutputColumnGUIDs()) { orig_child_pc = state.plannerContext.get(orig_child_guid); if (orig_child_pc.getExpression() instanceof TupleValueExpression) { TupleValueExpression orig_child_tve = (TupleValueExpression) orig_child_pc.getExpression(); if (tv_exp.getTableName().equals(orig_child_tve.getTableName()) && tv_exp.getColumnAlias().equals(orig_child_tve.getColumnAlias())) { break; } orig_child_pc = null; } } // FOR } else { orig_child_pc = state.plannerContext.get(orig_child_guids.get(orig_idx)); } assert (orig_child_pc != null); PlanColumn new_child_pc = null; int new_idx = 0; for (Integer orig_child_guid : child_node.getOutputColumnGUIDs()) { new_child_pc = state.plannerContext.get(orig_child_guid); if (orig_child_pc.equals(new_child_pc, true, true)) { break; } new_child_pc = null; new_idx++; } // FOR if (new_child_pc == null) LOG.warn("Problems up ahead:\n" + state + "\n" + PlanNodeUtil.debug(PlanNodeUtil.getRoot(node))); assert (new_child_pc != null) : String.format("Failed to find matching output column %s in %s", orig_child_pc, node); tv_exp.setColumnIndex(new_idx); } } }.traverse(new_exp); } catch (Throwable ex) { System.err.println(PlanNodeUtil.debug(node)); throw new RuntimeException(ex); } // Always try make a new PlanColumn and update the // TupleValueExpresion index // This ensures that we always get the ordering correct PlanColumn new_col = state.plannerContext.getPlanColumn(new_exp, orig_pc.getDisplayName(), orig_pc.getSortOrder(), orig_pc.getStorage()); assert (new_col != null); node.getOutputColumnGUIDs().set(i, new_col.guid()); } // FOR state.markDirty(node); if (debug.val) LOG.debug(String.format("Updated %s with %d output columns offsets", node, node.getOutputColumnGUIDCount())); return (true); } /** * * @param state * @param node * @return */ public static boolean updateOutputOffsets(final PlanOptimizerState state, AbstractPlanNode node) { for (int i = 0, cnt = node.getOutputColumnGUIDCount(); i < cnt; i++) { // Check to make sure that the offset in the tuple value expression // matches int orig_guid = node.getOutputColumnGUID(i); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); AbstractExpression orig_pc_exp = orig_pc.getExpression(); if (!(orig_pc_exp instanceof TupleValueExpression)) { TupleValueExpression new_exp = new TupleValueExpression(); new_exp.setColumnIndex(i); new_exp.setColumnAlias(orig_pc.getDisplayName()); new_exp.setValueType(VoltType.STRING); PlanColumn new_col = state.plannerContext.getPlanColumn(new_exp, orig_pc.getDisplayName(), orig_pc.getSortOrder(), orig_pc.getStorage()); assert (new_col != null); node.getOutputColumnGUIDs().set(i, new_col.guid()); } else { // Always try make a new PlanColumn and update the // TupleValueExpresion index // This ensures that we always get the ordering correct TupleValueExpression orig_exp = (TupleValueExpression) orig_pc.getExpression(); int orig_idx = orig_exp.getColumnIndex(); if (orig_idx != i) { TupleValueExpression clone_exp = null; try { clone_exp = (TupleValueExpression) orig_pc.getExpression().clone(); } catch (Exception ex) { LOG.fatal("Unable to clone " + orig_pc, ex); throw new RuntimeException(ex); } clone_exp.setColumnIndex(i); PlanColumn new_col = state.plannerContext.getPlanColumn(clone_exp, orig_pc.getDisplayName(), orig_pc.getSortOrder(), orig_pc.getStorage()); assert (new_col != null); node.getOutputColumnGUIDs().set(i, new_col.guid()); } } } // FOR state.markDirty(node); if (debug.val) LOG.debug(String.format("Updated %s with %d output columns offsets", node, node.getOutputColumnGUIDCount())); return (true); } /** * @param node * @return */ public static boolean updateJoinsColumns(final PlanOptimizerState state, final AbstractJoinPlanNode node) { // There's always going to be two input tables. One is always going to // come // from a child node, while the second may come from a child node *or* // directly from // a table being scanned. Therefore, we need to first figure out the // original size // of the first input table and then use that to adjust the offsets of // the new tables AbstractPlanNode outer_node = node.getChild(0); assert (outer_node != null); final List<Integer> outer_output_guids = outer_node.getOutputColumnGUIDs(); if (debug.val) LOG.debug("Calculating OUTER offsets from child node: " + outer_node); // Mapping from the index in the new output list to the original // PlanColumn guid final SortedMap<Integer, Integer> new_sorted_output_guids = new TreeMap<Integer, Integer>(); // Mapping from original index to the new index final Map<Integer, Integer> offset_xref = new HashMap<Integer, Integer>(); // Build a map from original offsets to the new offsets that need to be // stored // for the TupleValueExpressions (and possible TupleAddressExpression) final List<Integer> outer_orig_input_guids = state.orig_node_output.get(outer_node); assert (outer_orig_input_guids != null); StringBuilder sb = new StringBuilder(); for (int orig_idx = 0, cnt = outer_orig_input_guids.size(); orig_idx < cnt; orig_idx++) { int orig_col_guid = outer_orig_input_guids.get(orig_idx); PlanColumn orig_pc = state.plannerContext.get(orig_col_guid); // Figure out what the new PlanColumn GUID is for this column // It may be the case that we need to make a new one because the // underlying expression has the wrong offsets // Find the new index of this same PlanColumn guid in the outer // table's output columns Integer new_idx = outer_output_guids.indexOf(orig_col_guid); // If this column is not in the outer table's output columns if (new_idx != -1) { // PlanColumn new_pc = state.plannerContext.get(orig_col_guid); // new_output_guids.add(orig_col_guid); new_sorted_output_guids.put(new_idx, orig_col_guid); if (debug.val) LOG.debug(String.format("[%02d] Remapped PlanColumn to new offset %02d", orig_idx, new_idx)); } // Check whether we even have this column. We'll compare everything // but the Expression else { Pair<PlanColumn, Integer> p = findMatchingColumn(state, orig_pc, outer_output_guids); // If we have this PlanColumn, then we need to clone it and set // the new column index // Make sure that we replace update outer_new_input_guids if (p != null) { // PlanColumn new_pc = p.getFirst(); assert (p.getFirst() != null); new_idx = p.getSecond(); TupleValueExpression clone_exp = null; try { clone_exp = (TupleValueExpression) orig_pc.getExpression().clone(); } catch (CloneNotSupportedException ex) { throw new RuntimeException(ex); } clone_exp.setColumnIndex(new_idx); PlanColumn new_col = state.plannerContext.getPlanColumn(clone_exp, orig_pc.getDisplayName(), orig_pc.getSortOrder(), orig_pc.getStorage()); assert (new_col != null); outer_output_guids.set(new_idx, new_col.guid()); // new_output_guids.add(new_col.guid()); new_sorted_output_guids.put(new_idx, new_col.guid()); if (debug.val) LOG.debug(String.format("OUTER OFFSET %d => %d [new_guid=%d]", orig_idx, new_idx, new_col.guid())); } // If we don't have this PlanColumn, that means that it isn't // being passed up from the // outer table and therefore don't want it anymore in our output else { new_idx = null; } } if (new_idx != null) { assert (offset_xref.containsKey(orig_idx) == false) : orig_idx + " ==> " + offset_xref; offset_xref.put(orig_idx, new_idx); } // Just because we couldn't find the offset doesn't mean it's a bad // thing // It might be because we projected those columns out down below in // the tree // and therefore we don't need to worry about them anymore. else { String msg = String.format("[%02d] Failed to find new offset for OUTER %s", orig_idx, orig_pc); sb.append(msg).append("\n"); if (debug.val) LOG.warn(msg); } } // FOR if (trace.val) { LOG.trace("Original Outer Input GUIDs: " + outer_orig_input_guids); LOG.trace("New Outer Input GUIDs: " + outer_output_guids); } if (outer_output_guids.size() != offset_xref.size()) { LOG.error("Outer Node: " + outer_node); String temp = ""; for (int i = 0; i < outer_orig_input_guids.size(); i++) { PlanColumn pc = state.plannerContext.get(outer_orig_input_guids.get(i)); temp += String.format("[%02d] %s\n", i, pc); temp += ExpressionUtil.debug(pc.getExpression()) + "\n--------\n"; } temp += "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"; LOG.error("Original Outer Input GUIDs: " + outer_orig_input_guids + "\n" + temp); temp = ""; for (int i = 0; i < outer_output_guids.size(); i++) { PlanColumn pc = state.plannerContext.get(outer_output_guids.get(i)); temp += String.format("[%02d] %s\n", i, pc); temp += ExpressionUtil.debug(pc.getExpression()) + "\n--------\n"; } LOG.error("New Outer Input GUIDs: " + outer_output_guids + "\n" + temp); LOG.error("Output Xref Offsets: " + offset_xref); // LOG.info("Trace Information:\n" + sb); LOG.error("Unexpected Query Plan\n" + PlanNodeUtil.debug(PlanNodeUtil.getRoot(node))); } assert (outer_output_guids.size() == offset_xref.size()) : "outer_new_input_guids size: " + outer_output_guids.size() + " offset_xref size: " + offset_xref.size(); // add the sorted columns into new_columns list final List<Integer> new_output_guids = new ArrayList<Integer>(new_sorted_output_guids.values()); // For the inner table, we always have to offset ourselves based on the // size of the new outer table int offset = outer_output_guids.size(); AbstractPlanNode inner_node = null; // These are the set of expressions for the join clause that we need to // fix their offsets for final Collection<AbstractExpression> expressions_to_fix = PlanNodeUtil.getExpressionsForPlanNode(node); // -------------------------------------------- // NEST LOOP // -------------------------------------------- if (node.getChildPlanNodeCount() > 1) { assert (node instanceof NestLoopPlanNode); inner_node = node.getChild(1); if (debug.val) LOG.debug("Calculating INNER offsets from child node: " + inner_node); List<Integer> inner_orig_input_guids = state.orig_node_output.get(inner_node); assert (inner_orig_input_guids != null); List<Integer> inner_new_input_guids = inner_node.getOutputColumnGUIDs(); for (int orig_idx = 0, cnt = inner_orig_input_guids.size(); orig_idx < cnt; orig_idx++) { int col_guid = inner_orig_input_guids.get(orig_idx); // Find the new index of this same PlanColumn guid int new_idx = inner_new_input_guids.indexOf(col_guid); if (new_idx != -1) { int offset_orig_idx = outer_orig_input_guids.size() + orig_idx; int offset_new_idx = offset + new_idx; if (trace.val) LOG.trace(String.format("INNER NODE OFFSET %d => %d", offset_orig_idx, offset_new_idx)); assert (offset_xref.containsKey(offset_orig_idx) == false) : orig_idx + " ==> " + offset_xref; offset_xref.put(offset_orig_idx, offset_new_idx); new_output_guids.add(col_guid); // sorted_new_output_guids.put(new_idx, col_guid); } else { PlanColumn pc = state.plannerContext.get(col_guid); LOG.warn("Failed to find new offset for INNER " + pc); } } // FOR if (trace.val) LOG.trace("Original Inner Input GUIDs: " + inner_orig_input_guids); if (trace.val) LOG.trace("New Inner Input GUIDs: " + inner_new_input_guids); // --------------------------------------------------- // NEST LOOP INDEX // --------------------------------------------------- } else { // Otherwise, just grab all of the columns for the target table in // the inline scan assert (node instanceof NestLoopIndexPlanNode); IndexScanPlanNode idx_node = node.getInlinePlanNode(PlanNodeType.INDEXSCAN); assert (idx_node != null); inner_node = idx_node; Table catalog_tbl = null; try { catalog_tbl = CollectionUtil.first(CatalogUtil.getReferencedTablesForPlanNode(state.catalog_db, idx_node)); } catch (Exception ex) { LOG.fatal(ex); throw new RuntimeException(ex); } assert (catalog_tbl != null); if (debug.val) LOG.debug("Calculating INNER offsets from INLINE Scan: " + catalog_tbl); for (Column catalog_col : CatalogUtil.getSortedCatalogItems(catalog_tbl.getColumns(), "index")) { int i = catalog_col.getIndex(); int offset_orig_idx = outer_orig_input_guids.size() + i; int offset_new_idx = offset + i; if (trace.val) LOG.trace(String.format("INNER INLINE OFFSET %d => %d", offset_orig_idx, offset_new_idx)); offset_xref.put(offset_orig_idx, offset_new_idx); // Since we're going in order, we know what column is at this // position. // That means we can grab the catalog object and convert it to a // PlanColumn GUID // Always try make a new PlanColumn and update the // TupleValueExpresion index // This ensures that we always get the ordering correct // int orig_guid = // idx_node.getOutputColumnGUID(offset_orig_idx); int orig_guid = CollectionUtil.first(state.column_guid_xref.get(catalog_col)); assert (orig_guid != -1); PlanColumn orig_pc = state.plannerContext.get(orig_guid); assert (orig_pc != null); // PlanColumn new_pc = null; // int new_idx = 0; // for (Integer guid : idx_node.getOutputColumnGUIDs()) { // PlanColumn pc = state.m_context.get(guid); // assert (pc != null); // if (pc.equals(orig_pc, true, true)) { // if (trace.val) // LOG.trace(String.format("[%02d] Found inline output PlanColumn match:\nORIG: %s\nNEW: %s", // new_idx, orig_pc, pc)); // new_pc = pc; // break; // } // new_idx++; // } // FOR // assert (new_pc != null); idx_node.getOutputColumnGUIDs().set(i, orig_pc.guid()); new_output_guids.add(orig_pc.guid()); // sorted_new_output_guids.put(i,orig_pc.guid()); // TupleValueExpression clone_exp = // (TupleValueExpression)orig_col.getExpression().clone(); // clone_exp.setColumnIndex(offset_new_idx); // Storage storage = (catalog_tbl.getIsreplicated() ? // Storage.kReplicated : Storage.kPartitioned); // PlanColumn new_col = state.m_context.getPlanColumn(clone_exp, // orig_col.displayName(), orig_col.getSortOrder(), storage); // assert(new_col != null); } // FOR // We also need to fix all of the search key expressions used in the // inline scan expressions_to_fix.addAll(PlanNodeUtil.getExpressionsForPlanNode(idx_node)); // System.out.println("expressions_to_fix: " + expressions_to_fix); } if (debug.val) { LOG.debug("Output Xref Offsets: " + offset_xref); LOG.debug("New Output Columns GUIDS: " + new_sorted_output_guids); } // Get all of the AbstractExpression roots for this node // Now fix the offsets for everyone for (AbstractExpression exp : expressions_to_fix) { new ExpressionTreeWalker() { @Override protected void callback(AbstractExpression exp_element) { if (exp_element instanceof TupleValueExpression) { TupleValueExpression tv_exp = (TupleValueExpression) exp_element; int orig_idx = tv_exp.getColumnIndex(); // If we're in a NestLoopJoin (and not a // NestLoopIndexJoin), then what we need to // do is take the original offset (which points to a // column in the original inner input), and s Integer new_idx = offset_xref.get(orig_idx); if (new_idx == null) { LOG.debug(PlanNodeUtil.debug(PlanNodeUtil.getRoot(node))); LOG.debug(state.plannerContext.debug()); } assert (new_idx != null) : String.format("Missing New Offset of Original Offset %02d:\n%s", orig_idx, ExpressionUtil.debug(tv_exp)); if (orig_idx != new_idx) { if (debug.val) LOG.debug(String.format("Changing offset for %s.%s [%d ==> %d]", tv_exp.getTableName(), tv_exp.getColumnName(), orig_idx, new_idx)); tv_exp.setColumnIndex(new_idx); } } } }.traverse(exp); } // Then update the output columns to reflect the change node.setOutputColumns(new_output_guids); for (int new_idx = 0, cnt = node.getOutputColumnGUIDs().size(); new_idx < cnt; new_idx++) { Integer col_guid = node.getOutputColumnGUIDs().get(new_idx); PlanColumn pc = state.plannerContext.get(col_guid); // Look at what our offset used versus what it is needs to be // If it's different, then we need to make a new PlanColumn. // Note that we will clone TupleValueExpression so that we do not // mess with // other PlanColumns // Assume that AbstractExpression is always a TupleValueExpression TupleValueExpression tv_exp = (TupleValueExpression) pc.getExpression(); assert (tv_exp != null); int orig_idx = tv_exp.getColumnIndex(); // assert(new_idx == offset_xref.get(orig_idx)) : // String.format("Offset Mismatch [orig_idx=%d] => [%d] != [%d]:\noffset_xref = %s\n%s", // orig_idx, new_idx, offset_xref.get(orig_idx), offset_xref, // PlanNodeUtil.debugNode(element)); if (orig_idx != new_idx) { TupleValueExpression clone_exp = null; try { clone_exp = (TupleValueExpression) tv_exp.clone(); } catch (Exception ex) { LOG.fatal(ex); throw new RuntimeException(ex); } assert (clone_exp != null); // compare with child's output columns to see whether orig_idx // or new_idx is correct assert (node.getChildPlanNodeCount() == 1); List<Integer> child_output = node.getChild(0).getOutputColumnGUIDs(); if (orig_idx < child_output.size() && pc.guid() == child_output.get(orig_idx)) { clone_exp.setColumnIndex(orig_idx); } else { clone_exp.setColumnIndex(new_idx); } PlanColumn new_pc = state.plannerContext.getPlanColumn(clone_exp, pc.getDisplayName(), pc.getSortOrder(), pc.getStorage()); assert (new_pc != null); node.getOutputColumnGUIDs().set(new_idx, new_pc.guid()); } if (trace.val) LOG.trace(String.format("OUTPUT[%d] => %s", new_idx, state.plannerContext.get(node.getOutputColumnGUIDs().get(new_idx)))); } // FOR // IMPORTANT: If the inner_node is inline (meaning it was a // NestLoopIndex), then we need to also update // its output columns to match our new ones. This is necessary because // the nestloopindexexecutor will // generate its output table from the inline node and not the actual // output columns if (inner_node.isInline()) { assert (inner_node instanceof IndexScanPlanNode); inner_node.setOutputColumns(node.getOutputColumnGUIDs()); if (trace.val) LOG.trace("Updated INNER inline " + inner_node + " output columns"); } // if (debug.val) LOG.debug("PlanNodeTree:\n" + // PlanNodeUtil.debug(rootNode)); // LOG.debug(PlanNodeUtil.debugNode(element)); state.markDirty(node); return (true); } /** * Correct any offsets in join nodes * * @param root */ public static void fixJoinColumnOffsets(final PlanOptimizerState state, AbstractPlanNode root) { new PlanNodeTreeWalker(false) { @Override protected void callback(AbstractPlanNode element) { if (element instanceof NestLoopPlanNode || element instanceof NestLoopIndexPlanNode) { // Make sure the column reference offsets of the output // column are consecutive // If it doesn't match, then we'll have to make a new // PlanColumn for (int i = 0, cnt = element.getOutputColumnGUIDCount(); i < cnt; i++) { Integer col_guid = element.getOutputColumnGUID(i); PlanColumn pc_col = state.plannerContext.get(col_guid); assert (pc_col != null) : "Missing output column " + i + " for " + element; AbstractExpression exp = pc_col.getExpression(); if (exp.getExpressionType() == ExpressionType.VALUE_TUPLE && ((TupleValueExpression) exp).getColumnIndex() != i) { // NOTE: You can't just update the // TupleValueExpression because other nodes might be // referencing it. We have to clone the expression // tree, update the offset and then register // the PlanColumn TupleValueExpression clone_exp = null; try { clone_exp = (TupleValueExpression) exp.clone(); } catch (CloneNotSupportedException ex) { LOG.fatal("Unexpected error", ex); throw new RuntimeException(ex); } assert (clone_exp != null); clone_exp.setColumnIndex(i); PlanColumn new_col = state.plannerContext.getPlanColumn(clone_exp, pc_col.getDisplayName(), pc_col.getSortOrder(), pc_col.getStorage()); assert (new_col != null); assert (new_col != pc_col); element.getOutputColumnGUIDs().set(i, new_col.guid()); if (trace.val) LOG.trace(String.format("Updated %s Output Column at position %d: %s", element, i, new_col)); } } // FOR } } }.traverse(root); } /** * Extract all the PlanColumns that we are going to need in the query plan * tree above the given node. * * @param node * @param tables * @return */ public static Set<PlanColumn> extractReferencedColumns(final PlanOptimizerState state, final AbstractPlanNode node) { if (debug.val) LOG.debug("Extracting referenced column set for " + node); // Walk up the tree from the current node and figure out what columns // that we need from it are // referenced. This will tell us how many we can actually project out at // this point final Collection<Integer> col_guids = new ListOrderedSet<Integer>(); new PlanNodeTreeWalker(true, true) { @Override protected void callback(AbstractPlanNode element) { // If this is the same node that we're examining, then we can // skip it // Otherwise, anything that this guy references but nobody else // does // will incorrectly get included in the projection if (element == node) return; if (trace.val) LOG.trace("Examining " + element + " :: " + this.getVisitPath()); int ctr = 0; // --------------------------------------------------- // ProjectionPlanNode // AbstractScanPlanNode // AggregatePlanNode // --------------------------------------------------- if (element instanceof ProjectionPlanNode || element instanceof AbstractScanPlanNode || element instanceof AggregatePlanNode) { // This is set can actually be null because we can // parallelize certain operations on each node // so that we don't have to send the entire data set back to // the base partition Collection<Column> col_set = state.getPlanNodeColumns(element); // Check whether we're the top-most join, or that we don't // have any referenced columns if (col_set == null) { ctr += element.getOutputColumnGUIDCount(); col_guids.addAll(element.getOutputColumnGUIDs()); } else { for (Column col : col_set) { col_guids.add(CollectionUtil.first(state.column_guid_xref.get(col))); ctr++; } // FOR } } // --------------------------------------------------- // DistinctPlanNode // --------------------------------------------------- else if (element instanceof DistinctPlanNode) { ctr++; col_guids.add(((DistinctPlanNode) element).getDistinctColumnGuid()); } // --------------------------------------------------- // OrderByPlanNode // --------------------------------------------------- else if (element instanceof OrderByPlanNode) { ctr += ((OrderByPlanNode) element).getSortColumnGuids().size(); col_guids.addAll(((OrderByPlanNode) element).getSortColumnGuids()); } if (debug.val && ctr > 0) LOG.debug(String.format("%s -> Found %d PlanColumns referenced in %s", node, ctr, element)); } }.traverse(node); if (debug.val) LOG.debug(String.format("Referenced PlanColumns for %s: %s", node, col_guids)); // Now extract the TupleValueExpression and get the PlanColumn that we // really want // For each of the PlanColumn Guids that were referenced up above, check // to see whether // it references a column that our PlanNode knows about in its output // columns Set<PlanColumn> ref_columns = new ListOrderedSet<PlanColumn>(); for (Integer col_guid : col_guids) { PlanColumn above_pc = state.plannerContext.get(col_guid); // Check whether we have anything similar to it in our output // If we do, then we want to... do something... I don't know what... Pair<PlanColumn, Integer> p = findMatchingColumn(state, above_pc, node.getOutputColumnGUIDs()); if (p != null) { // Now look to see whether we already have a reference to a // similar PlanColumn // in our set of referenced columns boolean exists = false; for (PlanColumn existing : ref_columns) { if (above_pc.equals(existing, false, true)) { exists = true; break; } } // FOR // We didn't find it in our existing set, so we'll want to add // it if (exists == false) { ref_columns.add(above_pc); if (trace.val) LOG.trace(String.format("Added PlanColumn #%d to list of referenced columns. [%s]", col_guid, above_pc.getDisplayName())); } else if (trace.val) { LOG.trace("Skipped PlanColumn #" + col_guid + " because it already exists."); } } } // FOR return (ref_columns); } }