package com.taobao.tddl.optimizer.costbased;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang.StringUtils;
import com.alibaba.cobar.parser.ast.stmt.SQLStatement;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.taobao.tddl.common.TddlConstants;
import com.taobao.tddl.common.exception.NotSupportException;
import com.taobao.tddl.common.exception.TddlException;
import com.taobao.tddl.common.exception.TddlRuntimeException;
import com.taobao.tddl.common.jdbc.ParameterContext;
import com.taobao.tddl.common.jdbc.SqlTypeParser;
import com.taobao.tddl.common.model.ExtraCmd;
import com.taobao.tddl.common.model.SqlType;
import com.taobao.tddl.common.model.lifecycle.AbstractLifecycle;
import com.taobao.tddl.common.utils.logger.Logger;
import com.taobao.tddl.common.utils.logger.LoggerFactory;
import com.taobao.tddl.monitor.Monitor;
import com.taobao.tddl.optimizer.Optimizer;
import com.taobao.tddl.optimizer.OptimizerContext;
import com.taobao.tddl.optimizer.core.ASTNodeFactory;
import com.taobao.tddl.optimizer.core.ast.ASTNode;
import com.taobao.tddl.optimizer.core.ast.DMLNode;
import com.taobao.tddl.optimizer.core.ast.QueryTreeNode;
import com.taobao.tddl.optimizer.core.ast.dml.DeleteNode;
import com.taobao.tddl.optimizer.core.ast.dml.InsertNode;
import com.taobao.tddl.optimizer.core.ast.dml.PutNode;
import com.taobao.tddl.optimizer.core.ast.dml.UpdateNode;
import com.taobao.tddl.optimizer.core.ast.query.JoinNode;
import com.taobao.tddl.optimizer.core.ast.query.MergeNode;
import com.taobao.tddl.optimizer.core.ast.query.QueryNode;
import com.taobao.tddl.optimizer.core.ast.query.TableNode;
import com.taobao.tddl.optimizer.core.plan.IDataNodeExecutor;
import com.taobao.tddl.optimizer.core.plan.query.IMerge;
import com.taobao.tddl.optimizer.costbased.after.ChooseTreadOptimizer;
import com.taobao.tddl.optimizer.costbased.after.FillRequestIDAndSubRequestID;
import com.taobao.tddl.optimizer.costbased.after.FuckAvgOptimizer;
import com.taobao.tddl.optimizer.costbased.after.LimitOptimizer;
import com.taobao.tddl.optimizer.costbased.after.MergeConcurrentOptimizer;
import com.taobao.tddl.optimizer.costbased.after.MergeJoinMergeOptimizer;
import com.taobao.tddl.optimizer.costbased.after.QueryPlanOptimizer;
import com.taobao.tddl.optimizer.costbased.after.StreamingOptimizer;
import com.taobao.tddl.optimizer.costbased.chooser.DataNodeChooser;
import com.taobao.tddl.optimizer.costbased.chooser.JoinChooser;
import com.taobao.tddl.optimizer.costbased.pusher.FilterPusher;
import com.taobao.tddl.optimizer.costbased.pusher.OrderByPusher;
import com.taobao.tddl.optimizer.exceptions.QueryException;
import com.taobao.tddl.optimizer.exceptions.SqlParserException;
import com.taobao.tddl.optimizer.parse.SqlAnalysisResult;
import com.taobao.tddl.optimizer.parse.SqlParseManager;
import com.taobao.tddl.optimizer.parse.cobar.CobarSqlAnalysisResult;
import com.taobao.tddl.optimizer.parse.cobar.CobarSqlParseManager;
import com.taobao.tddl.optimizer.parse.cobar.visitor.MysqlOutputVisitor;
import com.taobao.tddl.optimizer.parse.hint.DirectlyRouteCondition;
import com.taobao.tddl.optimizer.parse.hint.RouteCondition;
import com.taobao.tddl.optimizer.parse.hint.RuleRouteCondition;
import com.taobao.tddl.optimizer.parse.hint.SimpleHintParser;
import com.taobao.tddl.rule.model.TargetDB;
/**
* <pre>
* 此优化器是根据开销进行优化的,主要优化流程在public IQueryCommon optimize(QueryTreeNode qn)中
* 分为两部分进行
* a. 第一部分,对关系查询树的优化,包含以下几个步骤:
* s1.将SELECT提前,放到叶子节点进行 SELECT列提前进行可以减少数据量
* 由于一些列是作为连接列的,他们不在最后的SELECT中
* (比如SELECT table1.id from table1 join table2 on table1.name=table2.name table1.name和table2.name作为连接列)
* 在对table1与table2的查询中应该保存,同时在执行执行结束后,需要将table1.name与table2.name去除.
* 所以在执行这一步的时候,需要保存中间需要的临时列. 在生成执行计划后,需要将这些列从最后的节点中删除。
* 效果是:
* 原SQL:table1.join(table2).addJoinColumns("id","id").select("table1.id table2.id")
* 转换为:table1.select("table1.id").join(tabl2.select("table2.id")).addJoinColumns("id","id")
*
* s2.将Join中连接列上的约束条件复制到另一边
* 比如SELECT * from table1 join table2 on table1.id=table2.id where table1.id = 1
* 因为Join是在table1.id与table2.id上的,所以table2.id上同样存在约束table2.id=1,此步就是需要发现这些条件,并将它复制。
* 效果是:
* 原SQL: table1.query("id=1").join(table2).addJoinColumns("id","id")
* 转换为:table1.query("id=1").join(table2.query("id=2")).addJoinColumns("id","id")
*
* s3.将约束条件提前,约束条件提前进行可以减少结果集的行数,并且可以合并QueryNode
* 效果是:
* 原SQL: table1.join(table2).addJoinColumns("id","id").query("table1.name=1")
* 转换为: table1.query("table1.name=1").join(table2).addJoinColumns("id","id")
*
* s4.找到并遍历每种个子查询,调整其Join顺序,并为其选择Join策略
*
* s5.所有子查询优化之后,再调整这个查询树的Join顺序
* 对Join顺序调整的依据是通过计算开销,开销主要包括两种:
* 1. 磁盘IO与网络传输 详细计算方式请参见CostEstimater实现类的相关注释
* 2. 对Join顺序的遍历使用的是最左树 在此步中,还会对同一列的约束条件进行合并等操作
* 选取策略见JoinChooser的注释
*
* s6.将s1中生成的临时列删除
*
* s7.将查询树转换为原始的执行计划树
*
* 第二部分,对执行计划树的优化,包含以下几个步骤:
* s8.为执行计划的每个节点选择执行的GroupNode
* 这一步是根据TDDL的规则进行分库 在Join,Merge的执行节点选择上,遵循的原则是尽量减少网络传输
*
* s9.调整分库后的Join节点
* 由于分库后,一个Query节点可能会变成一个Merge节点,需要对包含这样子节点的Join节点进行调整,详细见splitJoinAfterChooseDataNode的注释
* </pre>
*
* @since 5.0.0
*/
public class CostBasedOptimizer extends AbstractLifecycle implements Optimizer {
private static final String _DIRECT = "_DIRECT_";
private static final Logger logger = LoggerFactory.getLogger(CostBasedOptimizer.class);
private int cacheSize = 1000;
private long expireTime = TddlConstants.DEFAULT_OPTIMIZER_EXPIRE_TIME;
private SqlParseManager sqlParseManager;
private Cache<String, OptimizeResult> optimizedResults;
private final List<QueryPlanOptimizer> afterOptimizers = new ArrayList<QueryPlanOptimizer>();
@Override
protected void doInit() throws TddlException {
// after处理
afterOptimizers.add(new FuckAvgOptimizer());
afterOptimizers.add(new ChooseTreadOptimizer());
afterOptimizers.add(new FillRequestIDAndSubRequestID());
afterOptimizers.add(new LimitOptimizer());
afterOptimizers.add(new MergeJoinMergeOptimizer());
afterOptimizers.add(new MergeConcurrentOptimizer());
afterOptimizers.add(new StreamingOptimizer());
if (this.sqlParseManager == null) {
CobarSqlParseManager sqlParseManager = new CobarSqlParseManager();
sqlParseManager.setCacheSize(cacheSize);
sqlParseManager.setExpireTime(expireTime);
this.sqlParseManager = sqlParseManager;
}
if (!sqlParseManager.isInited()) {
sqlParseManager.init(); // 启动
}
optimizedResults = CacheBuilder.newBuilder()
.maximumSize(1000)
.expireAfterWrite(expireTime, TimeUnit.MILLISECONDS)
.build();
}
@Override
protected void doDestory() throws TddlException {
optimizedResults.invalidateAll();
sqlParseManager.destory();
}
private class OptimizeResult {
public ASTNode optimized = null;
public QueryException ex = null;
}
@Override
public IDataNodeExecutor optimizeAndAssignment(ASTNode node, Map<Integer, ParameterContext> parameterSettings,
Map<String, Object> extraCmd) throws QueryException {
return optimizeAndAssignment(node, parameterSettings, extraCmd, null, false);
}
@Override
public IDataNodeExecutor optimizeAndAssignment(String sql, Map<Integer, ParameterContext> parameterSettings,
Map<String, Object> extraCmd, boolean cached) throws QueryException,
SqlParserException {
// 处理sql hint
RouteCondition routeCondition = SimpleHintParser.convertHint2RouteCondition(sql, parameterSettings);
if (routeCondition != null && !routeCondition.getExtraCmds().isEmpty()) {
// 合并sql中的extra cmd参数
if (extraCmd == null) {
extraCmd = new HashMap<String, Object>();
}
extraCmd.putAll(routeCondition.getExtraCmds());
}
if (routeCondition != null
&& (routeCondition instanceof DirectlyRouteCondition || routeCondition instanceof RuleRouteCondition)) {
sql = SimpleHintParser.removeHint(sql, parameterSettings);
return optimizerHint(sql, cached, routeCondition, parameterSettings, extraCmd);
} else {
return optimizeAndAssignment(null, parameterSettings, extraCmd, sql, cached);
}
}
private IDataNodeExecutor optimizerHint(String sql, boolean cached, RouteCondition routeCondition,
Map<Integer, ParameterContext> parameterSettings,
Map<String, Object> extraCmd) {
long time = System.currentTimeMillis();
IDataNodeExecutor qc;
String groupHint = SimpleHintParser.extractTDDLGroupHintString(sql);
// 基于hint直接构造执行计划
if (routeCondition instanceof DirectlyRouteCondition) {
DirectlyRouteCondition drc = (DirectlyRouteCondition) routeCondition;
if (!drc.getTables().isEmpty()) {
SqlAnalysisResult result = sqlParseManager.parse(sql, cached);
Map<String, String> sqls = buildDirectSqls(result,
drc.getVirtualTableName(),
drc.getTables(),
groupHint);
qc = buildDirectPlan(result.getSqlType(), drc.getDbId(), sqls);
} else {
// 直接下推sql时,不做任何sql解析
Map<String, String> sqls = new HashMap<String, String>();
sqls.put(_DIRECT, sql);
qc = buildDirectPlan(SqlTypeParser.getSqlType(sql), drc.getDbId(), sqls);
}
} else if (routeCondition instanceof RuleRouteCondition) {
RuleRouteCondition rrc = (RuleRouteCondition) routeCondition;
SqlAnalysisResult result = sqlParseManager.parse(sql, cached);
boolean isWrite = (result.getSqlType() != SqlType.SELECT && result.getSqlType() != SqlType.SELECT_FOR_UPDATE);
List<TargetDB> targetDBs = OptimizerContext.getContext()
.getRule()
.shard(rrc.getVirtualTableName(), rrc.getCompMapChoicer(), isWrite);
// 考虑表名可能有重复
Set<String> tables = new HashSet<String>();
for (TargetDB target : targetDBs) {
tables.addAll(target.getTableNames());
}
Map<String, String> sqls = buildDirectSqls(result, rrc.getVirtualTableName(), tables, groupHint);
qc = buildRulePlain(result.getSqlType(), targetDBs, sqls);
} else {
throw new NotSupportException("RouteCondition : " + routeCondition.toString());
}
// 进行一些自定义的额外处理
for (QueryPlanOptimizer after : afterOptimizers) {
qc = after.optimize(qc, parameterSettings, extraCmd);
}
if (logger.isDebugEnabled()) {
logger.warn(qc.toString());
}
time = Monitor.monitorAndRenewTime(Monitor.KEY1,
Monitor.AndOrExecutorOptimize,
Monitor.Key3Success,
System.currentTimeMillis() - time);
return qc;
}
private IDataNodeExecutor optimizeAndAssignment(final ASTNode node,
final Map<Integer, ParameterContext> parameterSettings,
final Map<String, Object> extraCmd, final String sql,
final boolean cached) throws QueryException {
long time = System.currentTimeMillis();
ASTNode optimized = null;
if (cached && sql != null && !sql.isEmpty()) {
OptimizeResult or;
try {
or = optimizedResults.get(sql, new Callable<OptimizeResult>() {
@Override
public OptimizeResult call() throws Exception {
OptimizeResult or = new OptimizeResult();
try {
SqlAnalysisResult result = sqlParseManager.parse(sql, true);
or.optimized = optimize(result.getAstNode(parameterSettings), parameterSettings, extraCmd);
} catch (Exception e) {
if (e instanceof QueryException) {
or.ex = (QueryException) e;
} else {
or.ex = new QueryException(e);
}
}
return or;
}
});
} catch (ExecutionException e1) {
throw new QueryException("Optimizer future task interrupted,the sql is:" + sql, e1);
}
if (or.ex != null) {
throw or.ex;
}
optimized = or.optimized.deepCopy();
optimized.build();
} else {
if (node == null) {
SqlAnalysisResult result = sqlParseManager.parse(sql, cached);
optimized = this.optimize(result.getAstNode(parameterSettings), parameterSettings, extraCmd);
} else {
optimized = this.optimize(node, parameterSettings, extraCmd);
}
}
if (parameterSettings != null) {
optimized.assignment(parameterSettings);
// 绑定变量后,再做一次
if (optimized instanceof DMLNode) {
((DMLNode) optimized).setNode((TableNode) FilterPreProcessor.optimize(((DMLNode) optimized).getNode(),
false));
} else {
optimized = FilterPreProcessor.optimize(((QueryTreeNode) optimized), false);
}
}
// 分库,选择执行节点
try {
optimized = DataNodeChooser.shard(optimized, parameterSettings, extraCmd);
} catch (Exception e) {
if (e instanceof QueryException) {
throw (QueryException) e;
} else {
throw new QueryException(e);
}
}
optimized = this.createMergeForJoin(optimized, extraCmd);
if (optimized instanceof QueryTreeNode) {
OrderByPusher.optimize((QueryTreeNode) optimized);
}
IDataNodeExecutor qc = optimized.toDataNodeExecutor();
// 进行一些自定义的额外处理
for (QueryPlanOptimizer after : afterOptimizers) {
qc = after.optimize(qc, parameterSettings, extraCmd);
}
if (logger.isDebugEnabled()) {
logger.warn(qc.toString());
}
time = Monitor.monitorAndRenewTime(Monitor.KEY1,
Monitor.AndOrExecutorOptimize,
Monitor.Key3Success,
System.currentTimeMillis() - time);
return qc;
}
public ASTNode optimize(ASTNode node, Map<Integer, ParameterContext> parameterSettings, Map<String, Object> extraCmd)
throws QueryException {
// 先调用一次build,完成select字段信息的推导
node.build();
ASTNode optimized = null;
if (node instanceof QueryTreeNode) {
optimized = this.optimizeQuery((QueryTreeNode) node, extraCmd);
}
if (node instanceof InsertNode) {
optimized = this.optimizeInsert((InsertNode) node, extraCmd);
}
else if (node instanceof DeleteNode) {
optimized = this.optimizeDelete((DeleteNode) node, extraCmd);
}
else if (node instanceof UpdateNode) {
optimized = this.optimizeUpdate((UpdateNode) node, extraCmd);
}
else if (node instanceof PutNode) {
optimized = this.optimizePut((PutNode) node, extraCmd);
}
return optimized;
}
private QueryTreeNode optimizeQuery(QueryTreeNode qn, Map<String, Object> extraCmd) throws QueryException {
// / 预先处理子查询
qn = SubQueryPreProcessor.optimize(qn);
qn = JoinPreProcessor.optimize(qn);
// 预处理filter,比如过滤永假式/永真式
qn = FilterPreProcessor.optimize(qn, true);
// 将约束条件推向叶节点
qn = FilterPusher.optimize(qn);
// 找到每一个子查询,并进行优化
qn = JoinChooser.optimize(qn, extraCmd);
// 完成之前build
qn.build();
return qn;
}
private ASTNode optimizeUpdate(UpdateNode update, Map<String, Object> extraCmd) throws QueryException {
update.build();
if (extraCmd == null) {
extraCmd = new HashMap();
}
// update暂不允许使用索引
extraCmd.put(ExtraCmd.CHOOSE_INDEX, "FALSE");
QueryTreeNode queryCommon = this.optimizeQuery(update.getNode(), extraCmd);
queryCommon.build();
update.setNode((TableNode) queryCommon);
return update;
}
private ASTNode optimizeInsert(InsertNode insert, Map<String, Object> extraCmd) throws QueryException {
insert.setNode((TableNode) insert.getNode().convertToJoinIfNeed());
return insert;
}
private ASTNode optimizeDelete(DeleteNode delete, Map<String, Object> extraCmd) throws QueryException {
QueryTreeNode queryCommon = this.optimizeQuery(delete.getNode(), extraCmd);
delete.setNode((TableNode) queryCommon);
return delete;
}
private ASTNode optimizePut(PutNode put, Map<String, Object> extraCmd) throws QueryException {
return put;
}
// ============= helper method =============
/**
* 通过visitor替换表名生成sql
*/
private Map<String, /* table name */String/* sql */> buildDirectSqls(SqlAnalysisResult sqlAnalysisResult,
String vtab, Collection<String> tables,
String groupHint) {
if (groupHint == null) {
groupHint = "";
}
Map<String, String> sqls = new HashMap<String, String>();
// 指定分库分表,直接下推sql
// 目前先考虑只有一张表的表名需要替换
SQLStatement statement = ((CobarSqlAnalysisResult) sqlAnalysisResult).getStatement();
boolean singleNode = (tables.size() > 1);
if (StringUtils.isNotEmpty(vtab) && !tables.isEmpty()) {
String[] vtabs = StringUtils.split(vtab, ',');
Map<String, String> logicTable2RealTable = new HashMap<String, String>();
for (String realTable : tables) {
String[] rtabs = StringUtils.split(realTable, ',');
if (rtabs.length != vtabs.length) {
throw new TddlRuntimeException("hint中逻辑表和真实表数量不匹配");
}
int i = 0;
for (String v : vtabs) {
logicTable2RealTable.put(v, rtabs[i++]);
}
MysqlOutputVisitor sqlVisitor = new MysqlOutputVisitor(new StringBuilder(groupHint),
singleNode,
logicTable2RealTable);
statement.accept(sqlVisitor);
sqls.put(realTable, sqlVisitor.getSql());
}
} else {
// 没有执行表设置,直接下推sql,不需要做表名替换
sqls.put(_DIRECT, sqlAnalysisResult.getSql());
}
return sqls;
}
/**
* 根据规则生成对应的执行计划
*/
private IDataNodeExecutor buildRulePlain(SqlType sqlType, List<TargetDB> targetDBs, Map<String, String> sqls) {
List<IDataNodeExecutor> subs = new ArrayList<IDataNodeExecutor>();
for (TargetDB target : targetDBs) {
for (String table : target.getTableNames()) {
subs.add(buildOneDirectPlan(sqlType, target.getDbIndex(), sqls.get(table)));
}
}
if (subs.size() > 1) {
IMerge merge = ASTNodeFactory.getInstance().createMerge();
for (IDataNodeExecutor sub : subs) {
merge.addSubNode(sub);
}
merge.executeOn(subs.get(0).getDataNode());// 选择第一个
return merge;
} else {
return subs.get(0);
}
}
/**
* 根据指定的库和表生成执行计划
*/
private IDataNodeExecutor buildDirectPlan(SqlType sqlType, String dbId, Map<String, String> sqls) {
if (sqls.size() > 1) {
IMerge merge = ASTNodeFactory.getInstance().createMerge();
for (String sql : sqls.values()) {
merge.addSubNode(buildOneDirectPlan(sqlType, dbId, sql));
}
merge.executeOn(dbId);
return merge;
} else {
return buildOneDirectPlan(sqlType, dbId, sqls.values().iterator().next());
}
}
private IDataNodeExecutor buildOneDirectPlan(SqlType sqlType, String dbId, String sql) {
IDataNodeExecutor executor = null;
switch (sqlType) {
case SELECT:
executor = ASTNodeFactory.getInstance().createQuery();
break;
case UPDATE:
executor = ASTNodeFactory.getInstance().createUpdate();
break;
case DELETE:
executor = ASTNodeFactory.getInstance().createDelete();
break;
case INSERT:
executor = ASTNodeFactory.getInstance().createInsert();
break;
case REPLACE:
executor = ASTNodeFactory.getInstance().createReplace();
break;
default:
break;
}
if (executor != null) {
executor.setSql(sql);
executor.executeOn(dbId);
}
return executor;
}
private ASTNode createMergeForJoin(ASTNode dne, Map<String, Object> extraCmd) {
if (dne instanceof MergeNode) {
for (ASTNode sub : ((MergeNode) dne).getChildren()) {
this.createMergeForJoin(sub, extraCmd);
}
}
if (dne instanceof JoinNode) {
this.createMergeForJoin(((JoinNode) dne).getLeftNode(), extraCmd);
this.createMergeForJoin(((JoinNode) dne).getRightNode(), extraCmd);
// 特殊处理子查询
if (((JoinNode) dne).getRightNode() instanceof QueryNode) {
QueryNode right = (QueryNode) ((JoinNode) dne).getRightNode();
if (right.getDataNode() != null) {
// right和join节点跨机,则需要右边生成Merge来做mget
if (!right.getDataNode().equals(dne.getDataNode())) {
MergeNode merge = new MergeNode();
merge.merge(right);
merge.setSharded(false);
merge.executeOn(right.getDataNode());
merge.build();
((JoinNode) dne).setRightNode(merge);
}
}
}
}
if (dne instanceof QueryNode) {
if (((QueryNode) dne).getChild() != null) {
this.createMergeForJoin(((QueryNode) dne).getChild(), extraCmd);
}
}
return dne;
}
public void setCacheSize(int cacheSize) {
this.cacheSize = cacheSize;
}
public void setExpireTime(long expireTime) {
this.expireTime = expireTime;
}
public void setSqlParseManager(SqlParseManager sqlParseManager) {
this.sqlParseManager = sqlParseManager;
}
}