package com.taobao.tddl.optimizer.costbased.pusher;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.taobao.tddl.optimizer.core.ASTNodeFactory;
import com.taobao.tddl.optimizer.core.ast.ASTNode;
import com.taobao.tddl.optimizer.core.ast.QueryTreeNode;
import com.taobao.tddl.optimizer.core.ast.query.JoinNode;
import com.taobao.tddl.optimizer.core.ast.query.MergeNode;
import com.taobao.tddl.optimizer.core.ast.query.QueryNode;
import com.taobao.tddl.optimizer.core.ast.query.TableNode;
import com.taobao.tddl.optimizer.core.expression.IBooleanFilter;
import com.taobao.tddl.optimizer.core.expression.IColumn;
import com.taobao.tddl.optimizer.core.expression.IFunction;
import com.taobao.tddl.optimizer.core.expression.IOrderBy;
import com.taobao.tddl.optimizer.core.expression.ISelectable;
import com.taobao.tddl.optimizer.core.plan.query.IJoin.JoinStrategy;
import com.taobao.tddl.optimizer.exceptions.OptimizerException;
/**
* 将merge/join中的order by条件下推,包括隐式的order by条件,比如将groupBy转化为orderBy
*
* <pre>
* a. 如果orderBy中包含function,也不做下推
* b. 如果orderBy中的的column字段来自于子节点的函数查询,也不做下推
* c. 如果join是SortMergeJoin,会下推join列
*
* 比如: tabl1.join(table2).on("table1.id=table2.id").orderBy("id")
* 转化为:table.orderBy(id).join(table2).on("table1.id=table2.id")
*
* 下推例子:
* 1.
* 父节点:order by c1 ,c2 ,c3
* 子节点: order by c1, c2
* 优化:下推c3
*
* 2.
* 父节点:order by c2 ,c3 (顺序不一致,强制下推)
* 子节点: order by c2, c3,无limit
* 优化:不下推
*
* 3.
* 父节点:order by c2 ,c3 (顺序不一致,因为子节点存在limit,不可下推order by)
* 子节点: order by c2, c3,存在limit
* 优化:不下推
*
* 4.
* 父节点:order by c1, c2 ,c3
* 子节点: 无
* 优化:下推c1,c2,c3
*
* 5.
* 父节点:order by count(*) (函数不下推)
* 子节点: 无
* 优化:不下推
*
* @author jianghang 2013-12-10 下午5:33:16
* @since 5.0.0
*/
public class OrderByPusher {
/**
* 详细优化见类描述 {@linkplain OrderByPusher}
*/
public static QueryTreeNode optimize(QueryTreeNode qtn) {
qtn = optimizeDistinct(qtn);
qtn = pushOrderBy(qtn);
return qtn;
}
/**
* 处理Merge节点的distinct处理,需要底下节点先做排序
*
* <pre>
* 排序优先级:
* groupBy > orderby > distinct
* </pre>
*/
private static QueryTreeNode optimizeDistinct(QueryTreeNode qtn) {
for (ASTNode child : ((QueryTreeNode) qtn).getChildren()) {
if (child instanceof QueryTreeNode) {
optimizeDistinct((QueryTreeNode) child);
}
}
if (qtn instanceof MergeNode) {
MergeNode merge = (MergeNode) qtn;
if (!(merge.getChild() instanceof QueryTreeNode)) {
return merge;
}
if (containsDistinctColumns(merge)) {
for (ASTNode con : merge.getChildren()) {
QueryTreeNode child = (QueryTreeNode) con;
// 去掉function函数,比如count(distinct id)
List<IFunction> toRemove = new ArrayList();
for (ISelectable s : child.getColumnsSelected()) {
if (s instanceof IFunction) {
toRemove.add((IFunction) s);
}
}
child.getColumnsSelected().removeAll(toRemove);
child.build();
// 重新构建order by / group by字段
if (!child.getGroupBys().isEmpty()) {
List<IOrderBy> implicitOrderBys = child.getImplicitOrderBys();
// 如果order by包含了所有的group by顺序
if (containAllOrderBys(implicitOrderBys, child.getGroupBys())) {
child.setOrderBys(implicitOrderBys);
} else {
// order by不是一个group by的子集,优先使用group by
child.setOrderBys(child.getGroupBys());
}
}
// 将查询所有字段进行order by,保证每个child返回的数据顺序都是一致的
List<IOrderBy> distinctOrderbys = new LinkedList<IOrderBy>();
for (ISelectable s : child.getColumnsSelected()) {
IOrderBy order = ASTNodeFactory.getInstance().createOrderBy();
order.setColumn(s).setDirection(true);
distinctOrderbys.add(order);
}
// 尝试调整下distinct的order by顺序,调整不了的话,按照distinct columns顺序
List<IOrderBy> orderbys = getPushOrderBysCombileOrderbyColumns(distinctOrderbys,
child.getOrderBys());
if (orderbys.isEmpty()) {
child.setOrderBys(distinctOrderbys);
} else {
child.setOrderBys(orderbys);
}
// 清空child的group by,由merge节点进行处理
child.setGroupBys(new ArrayList(0));
}
// merge上的函数设置为distinct标记
for (ISelectable s : merge.getColumnsSelected()) {
if (s instanceof IFunction && isDistinct(s)) {
((IFunction) s).setNeedDistinctArg(true);
}
}
return merge;
}
} else if (qtn instanceof JoinNode || qtn instanceof QueryNode) {
if (qtn.getGroupBys() != null && !qtn.getGroupBys().isEmpty()) {
// 如果存在group by + distinct,暂时无法做优化
return qtn;
}
if (containsDistinctColumns(qtn)) {
// 将查询所有字段进行order by,保证每个child返回的数据顺序都是一致的
List<IOrderBy> distinctOrderbys = new LinkedList<IOrderBy>();
for (ISelectable s : qtn.getColumnsSelected()) {
IOrderBy order = ASTNodeFactory.getInstance().createOrderBy();
order.setColumn(s).setDirection(true);
distinctOrderbys.add(order);
}
List<IOrderBy> orderbys = getPushOrderBysCombileOrderbyColumns(distinctOrderbys, qtn.getOrderBys());
if (!orderbys.isEmpty()) {
// 尝试合并order by和distinct成功,则设置当前order by
qtn.setOrderBys(orderbys);
}
}
}
return qtn;
}
private static QueryTreeNode pushOrderBy(QueryTreeNode qtn) {
if (qtn instanceof MergeNode) {
MergeNode merge = (MergeNode) qtn;
if (!(merge.getChild() instanceof QueryTreeNode)) {
return qtn;
}
if (merge.isUnion()) {
List<IOrderBy> standardOrder = ((QueryTreeNode) merge.getChild()).getImplicitOrderBys();
for (ASTNode child : merge.getChildren()) {
((QueryTreeNode) child).setOrderBys(new ArrayList(0));
// 优先以主键为准
if (child instanceof TableNode) {
if (((TableNode) child).getIndexUsed().isPrimaryKeyIndex()) {
standardOrder = ((TableNode) child).getImplicitOrderBys();
}
}
}
for (ASTNode child : merge.getChildren()) {
((QueryTreeNode) child).setOrderBys(standardOrder);
((QueryTreeNode) child).build();
}
} else {
for (ASTNode child : merge.getChildren()) {
if (!(child instanceof QueryTreeNode)) {
continue;
}
// 比如merge节点同时存在order by/group by
// 1. 优先下推父节点的group by到子节点
// 2. 然后去掉子节点的group by
QueryTreeNode qn = (QueryTreeNode) child;
if (qn.getOrderBys() != null && !qn.getOrderBys().isEmpty() && qn.getGroupBys() != null
&& !qn.getGroupBys().isEmpty()) {
// 正常的shard生成的MergeNode
List<IOrderBy> standardOrder = qn.getImplicitOrderBys();
// order by不是一个group by的子集,优先使用group by
if (!containAllOrderBys(standardOrder, qn.getGroupBys())) {
standardOrder = qn.getGroupBys();
}
// 需要考虑,如果子节点的列中存在聚合函数,则不能去除子节点的group by,否则语义不正确
// order by中可能有desc的倒排语法
// 目前的做法是设置orderby/groupby使用相同的列
qn.setOrderBys(standardOrder);
qn.setGroupBys(standardOrder);
qn.having("");
qn.build();
}
}
}
} else if (qtn instanceof JoinNode) {
// index nested loop中的order by,可以推到左节点
JoinNode join = (JoinNode) qtn;
if (join.getJoinStrategy() == JoinStrategy.INDEX_NEST_LOOP
|| join.getJoinStrategy() == JoinStrategy.NEST_LOOP_JOIN) {
List<IOrderBy> orders = getPushOrderBys(join, join.getImplicitOrderBys(), join.getLeftNode(), true);
pushJoinOrder(orders, join.getLeftNode(), join.isUedForIndexJoinPK());
} else if (join.getJoinStrategy() == JoinStrategy.SORT_MERGE_JOIN) {
// 如果是sort merge join中的order by,需要推到左/右节点
List<IOrderBy> implicitOrders = join.getImplicitOrderBys();
Map<ISelectable, IOrderBy> orderColumnsMap = new HashMap<ISelectable, IOrderBy>();
for (IOrderBy order : implicitOrders) {
orderColumnsMap.put(order.getColumn(), order);
}
List<IOrderBy> leftJoinColumnOrderbys = new ArrayList<IOrderBy>();
List<IOrderBy> rightJoinColumnOrderbys = new ArrayList<IOrderBy>();
for (IBooleanFilter joinFilter : join.getJoinFilter()) {
ISelectable column = (ISelectable) joinFilter.getColumn();
ISelectable value = (ISelectable) joinFilter.getValue();
if (!(column instanceof IColumn && value instanceof IColumn)) {
throw new OptimizerException("join列出现函数列,下推函数orderby过于复杂,此时sort merge join无法支持");
}
// 复制下隐藏orderby的asc/desc
boolean asc = true;
IOrderBy o = orderColumnsMap.get(column);
if (o != null) {
asc = o.getDirection();
}
o = orderColumnsMap.get(value);
if (o != null) {
asc = o.getDirection();
}
leftJoinColumnOrderbys.add(ASTNodeFactory.getInstance()
.createOrderBy()
.setColumn(column)
.setDirection(asc));
rightJoinColumnOrderbys.add(ASTNodeFactory.getInstance()
.createOrderBy()
.setColumn(value)
.setDirection(asc));
}
// 调整下join orderBys的顺序,尽可能和原始的order by顺序一致,这样可以有利于下推
adjustJoinColumnByImplicitOrders(leftJoinColumnOrderbys, rightJoinColumnOrderbys, implicitOrders);
// 先推join列,
// 如果join列推失败了,比如join列是函数列,这问题就蛋疼了,需要提前做判断
List<IOrderBy> leftOrders = getPushOrderBys(join, leftJoinColumnOrderbys, join.getLeftNode(), true);
pushJoinOrder(leftOrders, join.getLeftNode(), join.isUedForIndexJoinPK());
List<IOrderBy> rightOrders = getPushOrderBys(join, rightJoinColumnOrderbys, join.getRightNode(), true);
pushJoinOrder(rightOrders, join.getRightNode(), join.isUedForIndexJoinPK());
if (!implicitOrders.isEmpty()) {
// group by + order by的隐藏列,如果和join列前缀相同,则下推,否则忽略
// 已经推了一次join column,这里不能再强推了
leftOrders = getPushOrderBys(join, implicitOrders, join.getLeftNode(), false);
rightOrders = getPushOrderBys(join, implicitOrders, join.getRightNode(), false);
if (!leftOrders.isEmpty() || !rightOrders.isEmpty()) {
pushJoinOrder(leftOrders, join.getLeftNode(), join.isUedForIndexJoinPK());
pushJoinOrder(rightOrders, join.getRightNode(), join.isUedForIndexJoinPK());
} else {
// 尝试一下只推group by的排序,减少一层排序
if (join.getGroupBys() != null && !join.getGroupBys().isEmpty()) {
List<IOrderBy> leftImplicitOrders = getPushOrderBysCombileOrderbyColumns(join.getGroupBys(),
leftJoinColumnOrderbys);
List<IOrderBy> rightImplicitOrders = getPushOrderBysCombileOrderbyColumns(join.getGroupBys(),
rightJoinColumnOrderbys);
// 重置下group by的顺序
if (!leftImplicitOrders.isEmpty()) {
join.setGroupBys(leftImplicitOrders);
} else if (!rightImplicitOrders.isEmpty()) {
join.setGroupBys(rightImplicitOrders);
}
leftOrders = getPushOrderBys(join, leftImplicitOrders, join.getLeftNode(), false);
rightOrders = getPushOrderBys(join, rightImplicitOrders, join.getRightNode(), false);
if (!leftOrders.isEmpty() || !rightOrders.isEmpty()) {
pushJoinOrder(leftOrders, join.getLeftNode(), join.isUedForIndexJoinPK());
pushJoinOrder(rightOrders, join.getRightNode(), join.isUedForIndexJoinPK());
}
}
}
}
}
join.build();
} else if (qtn instanceof QueryNode) {
// 可以将order推到子查询
QueryNode query = (QueryNode) qtn;
List<IOrderBy> orders = getPushOrderBys(query, query.getImplicitOrderBys(), query.getChild(), true);
if (orders != null && !orders.isEmpty()) {
for (IOrderBy order : orders) {
query.getChild().orderBy(order.getColumn(), order.getDirection());
}
}
query.build();
}
for (ASTNode child : ((QueryTreeNode) qtn).getChildren()) {
if (child instanceof QueryTreeNode) {
pushOrderBy((QueryTreeNode) child);
}
}
return qtn;
}
/**
* 判断是否存在distinct函数
*/
private static boolean containsDistinctColumns(QueryTreeNode qc) {
for (ISelectable c : qc.getColumnsSelected()) {
if (isDistinct(c)) {
return true;
}
}
return false;
}
private static boolean isDistinct(ISelectable s) {
if (s.isDistinct()) {
return true;
}
if (s instanceof IFunction) {
for (Object arg : ((IFunction) s).getArgs()) {
if (arg instanceof ISelectable) {
if (isDistinct((ISelectable) arg)) {
return true;
}
}
}
}
return false;
}
private static boolean containAllOrderBys(List<IOrderBy> orderBys, List<IOrderBy> groupBys) {
for (IOrderBy group : groupBys) {
boolean found = false;
for (IOrderBy order : orderBys) {
if (order.getColumn().equals(group.getColumn())) {
found = true;
break;
}
}
if (!found) {
return false;
}
}
return true;
}
/**
* 调整join列,按照order by的顺序,有利于下推
*/
private static void adjustJoinColumnByImplicitOrders(List<IOrderBy> leftJoinOrders, List<IOrderBy> rightJoinOrders,
List<IOrderBy> implicitOrders) {
// 调整下join orderBys的顺序,尽可能和原始的order by顺序一致,这样可以有利于下推
for (int i = 0; i < implicitOrders.size(); i++) {
if (i >= leftJoinOrders.size()) {
return;
}
IOrderBy order = implicitOrders.get(i);
int leftIndex = findOrderByByColumn(leftJoinOrders, order.getColumn());
int rightIndex = findOrderByByColumn(rightJoinOrders, order.getColumn());
int index = -1;
if (leftIndex >= 0 && leftIndex != i) { // 判断位置是否相同
index = leftIndex;
} else if (rightIndex >= 0 && rightIndex != i) {
index = rightIndex;
}
if (index >= 0) {
// 交换位置一下
IOrderBy tmp = leftJoinOrders.get(i);
leftJoinOrders.set(i, leftJoinOrders.get(index));
leftJoinOrders.set(index, tmp);
tmp = rightJoinOrders.get(i);
rightJoinOrders.set(i, rightJoinOrders.get(index));
rightJoinOrders.set(index, tmp);
} else {
return;// join列中没有order by的字段,直接退出
}
}
}
/**
* 尝试对比父节点中的orderby和子节点的orderby顺序,如果前缀一致,则找出末尾的order by字段进行返回
*
* <pre>
* 比如
* 1.
* 父节点:order by c1 ,c2 ,c3
* 子节点: order by c1, c2
*
* 返回为c3
*
* 2.
* 父节点:order by c2 ,c3
* 子节点: order by c1, c2,不存在limit
*
* 返回为c2,c3
*
* 3.
* 父节点:order by c2 ,c3
* 子节点: order by c1, c2,存在limit
*
* 返回为空
*
* 4.
* 父节点:order by c1, c2 ,c3
* 子节点: 无
*
* 返回为c1,c2,c3
*
* 5.
* 父节点:order by count(*) (函数不下推)
* 子节点: 无
*
* 返回空
* </pre>
*/
private static List<IOrderBy> getPushOrderBys(QueryTreeNode qtn, List<IOrderBy> implicitOrderBys,
QueryTreeNode child, boolean forcePush) {
List<IOrderBy> newOrderBys = new LinkedList<IOrderBy>();
List<IOrderBy> targetOrderBys = child.getOrderBys();
if (implicitOrderBys == null || implicitOrderBys.size() == 0) {
return new LinkedList<IOrderBy>();
}
for (int i = 0; i < implicitOrderBys.size(); i++) {
IOrderBy order = implicitOrderBys.get(i);
ISelectable column = qtn.findColumn(order.getColumn());// 找到select或者是meta中的字段
if (!(column != null && column instanceof IColumn)) {
// 可能orderby的字段为当前select的函数列
return new LinkedList<IOrderBy>();
}
// 在子节点中找一次,转化为子节点中的字段信息,比如表名,这样才可以和orderby字段做比较
column = child.findColumn(column);
if (!(column != null && column instanceof IColumn)) {
// 可能orderby的字段为当前select的函数列
return new LinkedList<IOrderBy>();
}
forcePush &= (child.getLimitFrom() == null && child.getLimitTo() == null);
if (!forcePush) {
// 如果非强制下推,判断一下orderby顺序
if (targetOrderBys != null && targetOrderBys.size() > i) {
IOrderBy targetOrder = targetOrderBys.get(i);
if (!(column.equals(targetOrder.getColumn()) && order.getDirection()
.equals(targetOrder.getDirection()))) {
// 如果不相同
return new LinkedList<IOrderBy>();
}
} else {
// 如果出现父类的长度>子类
IOrderBy newOrder = order.copy().setColumn(column.copy().setAlias(null));
newOrderBys.add(newOrder);
}
} else {
// 直接复制
IOrderBy newOrder = order.copy().setColumn(column.copy().setAlias(null));
newOrderBys.add(newOrder);
}
}
if (!newOrderBys.isEmpty() && forcePush) {
// 干掉子节点原本的order by
child.setOrderBys(new LinkedList<IOrderBy>());
}
return newOrderBys;
}
/**
* <pre>
* 1. 尝试组合group by和join列的排序字段,以join列顺序为准,重排groupBys顺序
* 2. 尝试组合order by列和distinct列的排序字段,以order列顺序为准,重排distinct顺序
* </pre>
*/
private static List<IOrderBy> getPushOrderBysCombileOrderbyColumns(List<IOrderBy> matchOrders,
List<IOrderBy> standardOrders) {
List<IOrderBy> newOrderbys = new ArrayList<IOrderBy>();
for (IOrderBy standardOrder : standardOrders) {
if (findOrderByByColumn(matchOrders, standardOrder.getColumn()) >= 0) {
newOrderbys.add(standardOrder);
} else {
return new ArrayList<IOrderBy>(); // 返回一般的顺序没用
}
}
for (IOrderBy matchOrder : matchOrders) {
// 找到join column中没有的进行添加
if (findOrderByByColumn(newOrderbys, matchOrder.getColumn()) < 0) {
newOrderbys.add(matchOrder);
}
}
return newOrderbys;
}
/**
* 尝试查找一个同名的排序字段,返回下标,-1代表没找到
*/
protected static int findOrderByByColumn(List<IOrderBy> orderbys, ISelectable column) {
for (int i = 0; i < orderbys.size(); i++) {
IOrderBy order = orderbys.get(i);
if (order.getColumn().equals(column)) {
return i;
}
}
return -1;
}
private static void pushJoinOrder(List<IOrderBy> orders, QueryTreeNode qn, boolean isUedForIndexJoinPK) {
if (orders != null && !orders.isEmpty()) {
for (IOrderBy order : orders) {
if (qn.hasColumn(order.getColumn())) {
qn.orderBy(order.getColumn(), order.getDirection());
} else if (isUedForIndexJoinPK) {
// 尝试忽略下表名查找一下
ISelectable newC = order.getColumn().copy().setTableName(null);
if (qn.hasColumn(newC)) {
qn.orderBy(order.getColumn(), order.getDirection());
}
}
}
qn.build();
}
}
}