/* This file is part of VoltDB.
* Copyright (C) 2008-2017 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.planner.microoptimizations;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.plannodes.IndexSortablePlanNode;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AbstractScanPlanNode;
import org.voltdb.plannodes.AggregatePlanNode;
import org.voltdb.plannodes.HashAggregatePlanNode;
import org.voltdb.plannodes.MergeReceivePlanNode;
import org.voltdb.plannodes.OrderByPlanNode;
import org.voltdb.plannodes.ReceivePlanNode;
import org.voltdb.plannodes.SendPlanNode;
import org.voltdb.plannodes.WindowFunctionPlanNode;
import org.voltdb.types.PlanNodeType;
public class InlineOrderByIntoMergeReceive extends MicroOptimization {
@Override
protected AbstractPlanNode recursivelyApply(AbstractPlanNode planNode)
{
assert(planNode != null);
// This optimization was interfering with some UPSERT ... FROM
// queries because once the optimization is applied to a subquery,
// it becomes difficult to correct the subquery to work in a
// multi-partition DML statement context. That's no longer
// simply a matter of removing the Send/Receive pair without
// side effects.
if (m_parsedStmt.topmostParentStatementIsDML()) {
return planNode; // Do not apply the optimization.
}
Queue<AbstractPlanNode> children = new LinkedList<>();
children.add(planNode);
while(!children.isEmpty()) {
AbstractPlanNode plan = children.remove();
PlanNodeType nodeType = plan.getPlanNodeType();
if (PlanNodeType.RECEIVE == nodeType) {
// continue. We are after the coordinator ORDER BY or WINDOWFUNCTION node.
return planNode;
}
if (PlanNodeType.ORDERBY == nodeType) {
assert(plan instanceof OrderByPlanNode);
AbstractPlanNode newPlan = applyOptimization((OrderByPlanNode)plan);
// (*) If we have changed plan to newPlan, then the
// new nodes are inside the tree unless plan is the top.
// So, return the original argument, planNode, unless
// we actually changed the top plan node. Then return
// the new plan node.
if (newPlan != plan) {
// Only one coordinator ORDER BY node is possible
if (plan == planNode) {
return newPlan;
} else {
return planNode; // Do not apply the optimization.
}
}
} else if (PlanNodeType.WINDOWFUNCTION == nodeType) {
assert(plan instanceof WindowFunctionPlanNode);
AbstractPlanNode newPlan = applyOptimization((WindowFunctionPlanNode)plan);
// See above for why this is the way it is.
if (newPlan != plan) {
return newPlan;
} else {
return planNode;
}
}
for (int i = 0; i < plan.getChildCount(); i++) {
children.add(plan.getChild(i));
}
}
return planNode; // Do not apply the optimization.
}
/**
* Convert ReceivePlanNodes into MergeReceivePlanNodes when the
* RECEIVE node's nearest parent is a window function. We won't
* have any inline limits or aggregates here, so this is somewhat
* simpler than the order by case.
*
* @param plan
* @return
*/
private AbstractPlanNode applyOptimization(WindowFunctionPlanNode plan) {
assert(plan.getChildCount() == 1);
assert(plan.getChild(0) != null);
AbstractPlanNode child = plan.getChild(0);
assert(child != null);
// SP Plans which have an index which can provide
// the window function ordering don't create
// an order by node.
if ( ! ( child instanceof OrderByPlanNode ) ) {
return plan;
}
OrderByPlanNode onode = (OrderByPlanNode)child;
child = onode.getChild(0);
// The order by node needs a RECEIVE node child
// for this optimization to work.
if ( ! ( child instanceof ReceivePlanNode)) {
return plan;
}
ReceivePlanNode receiveNode = (ReceivePlanNode)child;
assert(receiveNode.getChildCount() == 1);
child = receiveNode.getChild(0);
// The Receive node needs a send node child.
assert( child instanceof SendPlanNode );
SendPlanNode sendNode = (SendPlanNode)child;
child = sendNode.getChild(0);
// If this window function does not use the
// index then this optimization is not possible.
// We've recorded a number of the window function
// in the root of the subplan, which will be
// the first child of the send node.
//
// Right now the only window function has number
// 0, and we don't record that in the
// WINDOWFUNCTION plan node. If there were
// more than one window function we would need
// to record a number in the plan node and
// then check that child.getWindowFunctionUsesIndex()
// returns the number in the plan node.
if ( ! ( child instanceof IndexSortablePlanNode)) {
return plan;
}
IndexSortablePlanNode indexed = (IndexSortablePlanNode)child;
if (indexed.indexUse().getWindowFunctionUsesIndex() != 0) {
return plan;
}
// Remove the Receive node and the Order by node
// and replace them with a MergeReceive node. Leave
// the order by node inline in the MergeReceive node,
// since we need it to calculate the merge.
plan.clearChildren();
receiveNode.removeFromGraph();
MergeReceivePlanNode mrnode = new MergeReceivePlanNode();
mrnode.addInlinePlanNode(onode);
mrnode.addAndLinkChild(sendNode);
plan.addAndLinkChild(mrnode);
return plan;
}
/**
* For MP queries, the coordinator's OrderBy node can be replaced with
* a specialized Receive node that merges individual partitions results
* into a final result set if the partitions result set is sorted
* in the order matching the ORDER BY order
*
* @param orderbyNode - ORDER BY node to optimize
* @return optimized plan
*/
AbstractPlanNode applyOptimization(OrderByPlanNode orderbyNode) {
// Find all child RECEIVE nodes. We are not interested in the MERGERECEIVE nodes there
// because they could only come from subqueries.
List<AbstractPlanNode> receives = orderbyNode.findAllNodesOfType(PlanNodeType.RECEIVE);
if (receives.isEmpty()) {
return orderbyNode;
}
assert(receives.size() == 1);
ReceivePlanNode receive = (ReceivePlanNode)receives.get(0);
// Make sure that this receive node belongs to the same coordinator fragment that
// the ORDER BY node does. Alternatively, it could belong to a distributed subquery.
// Walk up the tree starting at the receive node until we hit either a scan node
// (distributed subquery) or the original order by node (distributed order by)
// Collect all nodes that are currently in between ORDER BY and RECEIVE nodes
// If the optimization is possible, they will be converted to inline nodes of
// the MERGE RECEIVE node. The expected node types are:
// LIMIT, AGGREGATE/PARTIALAGGREGATE/HASHAGGREGATE
// The HASHAGGREGATE must be convertible to AGGREGATE or PARTIALAGGREGATE for optimization
// to be applicable.
// LIMIT can be already inline with ORDER BY node
AbstractPlanNode limitNode = orderbyNode.getInlinePlanNode(PlanNodeType.LIMIT);
AbstractPlanNode aggregateNode = null;
AbstractPlanNode inlineCandidate = receive.getParent(0);
while (orderbyNode != inlineCandidate) {
if (inlineCandidate instanceof AbstractScanPlanNode) {
// it's a subquery
return orderbyNode;
}
PlanNodeType nodeType = inlineCandidate.getPlanNodeType();
if (nodeType == PlanNodeType.LIMIT && limitNode == null) {
limitNode = inlineCandidate;
} else if ((nodeType == PlanNodeType.AGGREGATE || nodeType == PlanNodeType.PARTIALAGGREGATE) &&
aggregateNode == null) {
aggregateNode = inlineCandidate;
} else if (nodeType == PlanNodeType.HASHAGGREGATE && aggregateNode == null) {
aggregateNode = convertToSerialAggregation(inlineCandidate, orderbyNode);
if (PlanNodeType.HASHAGGREGATE == aggregateNode.getPlanNodeType()) {
return orderbyNode;
}
} else {
// Don't know how to handle this node or there is already a node of this type
return orderbyNode;
}
// move up one node
assert(inlineCandidate.getParentCount() == 1);
inlineCandidate = inlineCandidate.getParent(0);
}
assert(receive.getChildCount() == 1);
AbstractPlanNode partitionRoot = receive.getChild(0);
if (!partitionRoot.isOutputOrdered(orderbyNode.getSortExpressions(), orderbyNode.getSortDirections())) {
// Partition results are not ordered
return orderbyNode;
}
// At this point we confirmed that the optimization is applicable.
// Short circuit the current ORDER BY parent (if such exists) and
// the new MERGERECIEVE node.. All in-between nodes will be inlined
assert (orderbyNode.getParentCount() <= 1);
AbstractPlanNode rootNode = (orderbyNode.getParentCount() == 1) ? orderbyNode.getParent(0) : null;
MergeReceivePlanNode mergeReceive = new MergeReceivePlanNode();
assert(receive.getChildCount() == 1);
mergeReceive.addAndLinkChild(receive.getChild(0));
receive.removeFromGraph();
if (rootNode == null) {
rootNode = mergeReceive;
} else {
rootNode.clearChildren();
rootNode.addAndLinkChild(mergeReceive);
}
// Add inline ORDER BY node and remove inline LIMIT node if any
mergeReceive.addInlinePlanNode(orderbyNode);
if (limitNode != null) {
orderbyNode.removeInlinePlanNode(PlanNodeType.LIMIT);
}
// Add inline aggregate
if (aggregateNode != null) {
if (limitNode != null) {
// Inline LIMIT with aggregate
aggregateNode.addInlinePlanNode(limitNode);
}
mergeReceive.addInlinePlanNode(aggregateNode);
}
// Add LIMIT if it is exist and wasn't inline with aggregate node
if (limitNode != null && aggregateNode == null) {
mergeReceive.addInlinePlanNode(limitNode);
}
// return the new root
return rootNode;
}
/**
* The Hash aggregate can be converted to a Serial or Partial aggregate if
* - all GROUP BY and ORDER BY expressions bind to each other - Serial Aggregate
* - a subset of the GROUP BY expressions covers all of the ORDER BY - Partial
* - anything else - remains a Hash Aggregate
* @param aggregateNode
* @param orderbyNode
* @return new aggregate node if the conversion is possible or the original hash aggregate otherwise
*/
AbstractPlanNode convertToSerialAggregation(AbstractPlanNode aggregateNode, OrderByPlanNode orderbyNode) {
assert(aggregateNode instanceof HashAggregatePlanNode);
HashAggregatePlanNode hashAggr = (HashAggregatePlanNode) aggregateNode;
List<AbstractExpression> groupbys = new ArrayList<>(hashAggr.getGroupByExpressions());
List<AbstractExpression> orderbys = new ArrayList<>(orderbyNode.getSortExpressions());
Set<Integer> coveredGroupByColumns = new HashSet<>();
Iterator<AbstractExpression> orderbyIt = orderbys.iterator();
while (orderbyIt.hasNext()) {
AbstractExpression orderby = orderbyIt.next();
int idx = 0;
for (AbstractExpression groupby : groupbys) {
if (!coveredGroupByColumns.contains(idx)) {
if (orderby.equals(groupby)) {
orderbyIt.remove();
coveredGroupByColumns.add(idx);
break;
}
}
++idx;
}
}
if (orderbys.isEmpty() && groupbys.size() == coveredGroupByColumns.size()) {
// All GROUP BY expressions are also ORDER BY - Serial aggregation
return AggregatePlanNode.convertToSerialAggregatePlanNode(hashAggr);
}
if (orderbys.isEmpty() && !coveredGroupByColumns.isEmpty() ) {
// Partial aggregation
List<Integer> coveredGroupByColumnList = new ArrayList<>();
coveredGroupByColumnList.addAll(coveredGroupByColumns);
return AggregatePlanNode.convertToPartialAggregatePlanNode(hashAggr, coveredGroupByColumnList);
}
return aggregateNode;
}
}