/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.planner.microoptimizations; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.Set; import org.voltdb.expressions.AbstractExpression; import org.voltdb.plannodes.IndexSortablePlanNode; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.plannodes.AbstractScanPlanNode; import org.voltdb.plannodes.AggregatePlanNode; import org.voltdb.plannodes.HashAggregatePlanNode; import org.voltdb.plannodes.MergeReceivePlanNode; import org.voltdb.plannodes.OrderByPlanNode; import org.voltdb.plannodes.ReceivePlanNode; import org.voltdb.plannodes.SendPlanNode; import org.voltdb.plannodes.WindowFunctionPlanNode; import org.voltdb.types.PlanNodeType; public class InlineOrderByIntoMergeReceive extends MicroOptimization { @Override protected AbstractPlanNode recursivelyApply(AbstractPlanNode planNode) { assert(planNode != null); // This optimization was interfering with some UPSERT ... FROM // queries because once the optimization is applied to a subquery, // it becomes difficult to correct the subquery to work in a // multi-partition DML statement context. That's no longer // simply a matter of removing the Send/Receive pair without // side effects. if (m_parsedStmt.topmostParentStatementIsDML()) { return planNode; // Do not apply the optimization. } Queue<AbstractPlanNode> children = new LinkedList<>(); children.add(planNode); while(!children.isEmpty()) { AbstractPlanNode plan = children.remove(); PlanNodeType nodeType = plan.getPlanNodeType(); if (PlanNodeType.RECEIVE == nodeType) { // continue. We are after the coordinator ORDER BY or WINDOWFUNCTION node. return planNode; } if (PlanNodeType.ORDERBY == nodeType) { assert(plan instanceof OrderByPlanNode); AbstractPlanNode newPlan = applyOptimization((OrderByPlanNode)plan); // (*) If we have changed plan to newPlan, then the // new nodes are inside the tree unless plan is the top. // So, return the original argument, planNode, unless // we actually changed the top plan node. Then return // the new plan node. if (newPlan != plan) { // Only one coordinator ORDER BY node is possible if (plan == planNode) { return newPlan; } else { return planNode; // Do not apply the optimization. } } } else if (PlanNodeType.WINDOWFUNCTION == nodeType) { assert(plan instanceof WindowFunctionPlanNode); AbstractPlanNode newPlan = applyOptimization((WindowFunctionPlanNode)plan); // See above for why this is the way it is. if (newPlan != plan) { return newPlan; } else { return planNode; } } for (int i = 0; i < plan.getChildCount(); i++) { children.add(plan.getChild(i)); } } return planNode; // Do not apply the optimization. } /** * Convert ReceivePlanNodes into MergeReceivePlanNodes when the * RECEIVE node's nearest parent is a window function. We won't * have any inline limits or aggregates here, so this is somewhat * simpler than the order by case. * * @param plan * @return */ private AbstractPlanNode applyOptimization(WindowFunctionPlanNode plan) { assert(plan.getChildCount() == 1); assert(plan.getChild(0) != null); AbstractPlanNode child = plan.getChild(0); assert(child != null); // SP Plans which have an index which can provide // the window function ordering don't create // an order by node. if ( ! ( child instanceof OrderByPlanNode ) ) { return plan; } OrderByPlanNode onode = (OrderByPlanNode)child; child = onode.getChild(0); // The order by node needs a RECEIVE node child // for this optimization to work. if ( ! ( child instanceof ReceivePlanNode)) { return plan; } ReceivePlanNode receiveNode = (ReceivePlanNode)child; assert(receiveNode.getChildCount() == 1); child = receiveNode.getChild(0); // The Receive node needs a send node child. assert( child instanceof SendPlanNode ); SendPlanNode sendNode = (SendPlanNode)child; child = sendNode.getChild(0); // If this window function does not use the // index then this optimization is not possible. // We've recorded a number of the window function // in the root of the subplan, which will be // the first child of the send node. // // Right now the only window function has number // 0, and we don't record that in the // WINDOWFUNCTION plan node. If there were // more than one window function we would need // to record a number in the plan node and // then check that child.getWindowFunctionUsesIndex() // returns the number in the plan node. if ( ! ( child instanceof IndexSortablePlanNode)) { return plan; } IndexSortablePlanNode indexed = (IndexSortablePlanNode)child; if (indexed.indexUse().getWindowFunctionUsesIndex() != 0) { return plan; } // Remove the Receive node and the Order by node // and replace them with a MergeReceive node. Leave // the order by node inline in the MergeReceive node, // since we need it to calculate the merge. plan.clearChildren(); receiveNode.removeFromGraph(); MergeReceivePlanNode mrnode = new MergeReceivePlanNode(); mrnode.addInlinePlanNode(onode); mrnode.addAndLinkChild(sendNode); plan.addAndLinkChild(mrnode); return plan; } /** * For MP queries, the coordinator's OrderBy node can be replaced with * a specialized Receive node that merges individual partitions results * into a final result set if the partitions result set is sorted * in the order matching the ORDER BY order * * @param orderbyNode - ORDER BY node to optimize * @return optimized plan */ AbstractPlanNode applyOptimization(OrderByPlanNode orderbyNode) { // Find all child RECEIVE nodes. We are not interested in the MERGERECEIVE nodes there // because they could only come from subqueries. List<AbstractPlanNode> receives = orderbyNode.findAllNodesOfType(PlanNodeType.RECEIVE); if (receives.isEmpty()) { return orderbyNode; } assert(receives.size() == 1); ReceivePlanNode receive = (ReceivePlanNode)receives.get(0); // Make sure that this receive node belongs to the same coordinator fragment that // the ORDER BY node does. Alternatively, it could belong to a distributed subquery. // Walk up the tree starting at the receive node until we hit either a scan node // (distributed subquery) or the original order by node (distributed order by) // Collect all nodes that are currently in between ORDER BY and RECEIVE nodes // If the optimization is possible, they will be converted to inline nodes of // the MERGE RECEIVE node. The expected node types are: // LIMIT, AGGREGATE/PARTIALAGGREGATE/HASHAGGREGATE // The HASHAGGREGATE must be convertible to AGGREGATE or PARTIALAGGREGATE for optimization // to be applicable. // LIMIT can be already inline with ORDER BY node AbstractPlanNode limitNode = orderbyNode.getInlinePlanNode(PlanNodeType.LIMIT); AbstractPlanNode aggregateNode = null; AbstractPlanNode inlineCandidate = receive.getParent(0); while (orderbyNode != inlineCandidate) { if (inlineCandidate instanceof AbstractScanPlanNode) { // it's a subquery return orderbyNode; } PlanNodeType nodeType = inlineCandidate.getPlanNodeType(); if (nodeType == PlanNodeType.LIMIT && limitNode == null) { limitNode = inlineCandidate; } else if ((nodeType == PlanNodeType.AGGREGATE || nodeType == PlanNodeType.PARTIALAGGREGATE) && aggregateNode == null) { aggregateNode = inlineCandidate; } else if (nodeType == PlanNodeType.HASHAGGREGATE && aggregateNode == null) { aggregateNode = convertToSerialAggregation(inlineCandidate, orderbyNode); if (PlanNodeType.HASHAGGREGATE == aggregateNode.getPlanNodeType()) { return orderbyNode; } } else { // Don't know how to handle this node or there is already a node of this type return orderbyNode; } // move up one node assert(inlineCandidate.getParentCount() == 1); inlineCandidate = inlineCandidate.getParent(0); } assert(receive.getChildCount() == 1); AbstractPlanNode partitionRoot = receive.getChild(0); if (!partitionRoot.isOutputOrdered(orderbyNode.getSortExpressions(), orderbyNode.getSortDirections())) { // Partition results are not ordered return orderbyNode; } // At this point we confirmed that the optimization is applicable. // Short circuit the current ORDER BY parent (if such exists) and // the new MERGERECIEVE node.. All in-between nodes will be inlined assert (orderbyNode.getParentCount() <= 1); AbstractPlanNode rootNode = (orderbyNode.getParentCount() == 1) ? orderbyNode.getParent(0) : null; MergeReceivePlanNode mergeReceive = new MergeReceivePlanNode(); assert(receive.getChildCount() == 1); mergeReceive.addAndLinkChild(receive.getChild(0)); receive.removeFromGraph(); if (rootNode == null) { rootNode = mergeReceive; } else { rootNode.clearChildren(); rootNode.addAndLinkChild(mergeReceive); } // Add inline ORDER BY node and remove inline LIMIT node if any mergeReceive.addInlinePlanNode(orderbyNode); if (limitNode != null) { orderbyNode.removeInlinePlanNode(PlanNodeType.LIMIT); } // Add inline aggregate if (aggregateNode != null) { if (limitNode != null) { // Inline LIMIT with aggregate aggregateNode.addInlinePlanNode(limitNode); } mergeReceive.addInlinePlanNode(aggregateNode); } // Add LIMIT if it is exist and wasn't inline with aggregate node if (limitNode != null && aggregateNode == null) { mergeReceive.addInlinePlanNode(limitNode); } // return the new root return rootNode; } /** * The Hash aggregate can be converted to a Serial or Partial aggregate if * - all GROUP BY and ORDER BY expressions bind to each other - Serial Aggregate * - a subset of the GROUP BY expressions covers all of the ORDER BY - Partial * - anything else - remains a Hash Aggregate * @param aggregateNode * @param orderbyNode * @return new aggregate node if the conversion is possible or the original hash aggregate otherwise */ AbstractPlanNode convertToSerialAggregation(AbstractPlanNode aggregateNode, OrderByPlanNode orderbyNode) { assert(aggregateNode instanceof HashAggregatePlanNode); HashAggregatePlanNode hashAggr = (HashAggregatePlanNode) aggregateNode; List<AbstractExpression> groupbys = new ArrayList<>(hashAggr.getGroupByExpressions()); List<AbstractExpression> orderbys = new ArrayList<>(orderbyNode.getSortExpressions()); Set<Integer> coveredGroupByColumns = new HashSet<>(); Iterator<AbstractExpression> orderbyIt = orderbys.iterator(); while (orderbyIt.hasNext()) { AbstractExpression orderby = orderbyIt.next(); int idx = 0; for (AbstractExpression groupby : groupbys) { if (!coveredGroupByColumns.contains(idx)) { if (orderby.equals(groupby)) { orderbyIt.remove(); coveredGroupByColumns.add(idx); break; } } ++idx; } } if (orderbys.isEmpty() && groupbys.size() == coveredGroupByColumns.size()) { // All GROUP BY expressions are also ORDER BY - Serial aggregation return AggregatePlanNode.convertToSerialAggregatePlanNode(hashAggr); } if (orderbys.isEmpty() && !coveredGroupByColumns.isEmpty() ) { // Partial aggregation List<Integer> coveredGroupByColumnList = new ArrayList<>(); coveredGroupByColumnList.addAll(coveredGroupByColumns); return AggregatePlanNode.convertToPartialAggregatePlanNode(hashAggr, coveredGroupByColumnList); } return aggregateNode; } }