package com.taobao.tddl.optimizer.costbased.esitimater;
import java.util.List;
import com.taobao.tddl.optimizer.OptimizerContext;
import com.taobao.tddl.optimizer.config.table.IndexMeta;
import com.taobao.tddl.optimizer.core.ast.QueryTreeNode;
import com.taobao.tddl.optimizer.core.ast.query.QueryNode;
import com.taobao.tddl.optimizer.core.ast.query.TableNode;
import com.taobao.tddl.optimizer.core.expression.IFilter;
import com.taobao.tddl.optimizer.core.expression.IFilter.OPERATION;
import com.taobao.tddl.optimizer.costbased.esitimater.stat.KVIndexStat;
import com.taobao.tddl.optimizer.costbased.esitimater.stat.TableStat;
import com.taobao.tddl.optimizer.exceptions.StatisticsUnavailableException;
import com.taobao.tddl.optimizer.utils.FilterUtils;
/**
* @author Dreamond
*/
public class QueryNodeCostEstimater implements QueryTreeCostEstimater {
public Cost estimate(QueryTreeNode q) throws StatisticsUnavailableException {
QueryTreeNode query = (QueryTreeNode) q;
Cost cost = new Cost();
long rowCount = 0;
long initRowCount = 0;
long scanRowCount = 0;
boolean isOnfly = false;
// 索引
IndexMeta index = null;
// 索引的选择度
KVIndexStat indexStat = null;
// step1.估算行数
if (query instanceof QueryNode) {
// 查询对象是另一个查询,说明数据是on fly的,根据子查询提供的行数来确定初始行数
Cost childCost = CostEsitimaterFactory.estimate(((QueryNode) query).getChild());
initRowCount = childCost.getRowCount();
isOnfly = true;
} else if (query instanceof TableNode) {
// 查询对象是一个物理表,则根据表的统计信息来获取初始行数
isOnfly = false;
index = ((TableNode) query).getIndexUsed();
TableStat stat = OptimizerContext.getContext()
.getStatManager()
.getTable(((TableNode) query).getTableMeta().getTableName());
indexStat = OptimizerContext.getContext().getStatManager().getKVIndex(index.getName());
if (stat != null) {
initRowCount = stat.getTableRows();
} else {
initRowCount = 1000;
}
}
List<IFilter> keyFilters = FilterUtils.toDNFNode(query.getKeyFilter());
List<IFilter> valueFilters = FilterUtils.toDNFNode(query.getResultFilter());
// 主键是唯一的,如果在主键上进行了=操作,最后结果肯定不超过1
// 对于唯一的列也是同理,但是现在还不支持
// TODO:暂时没有考虑倒排索引
if (this.isAllEqualOrIS(keyFilters) && index != null && index.isPrimaryKeyIndex()) {
rowCount = 1;
scanRowCount = 1;
} else if (query.getLimitFrom() != null
&& (query.getLimitFrom() instanceof Long || query.getLimitFrom() instanceof Long)
&& (Long) query.getLimitFrom() != 0 && query.getLimitTo() != null
&& (query.getLimitTo() instanceof Long || query.getLimitTo() instanceof Long)
&& (Long) query.getLimitTo() != 0) {
// 对于包含limit的查询,使用limit提供的结果
rowCount = (Long) query.getLimitTo() - (Long) query.getLimitFrom();
scanRowCount = CostEsitimaterFactory.estimateRowCount(initRowCount, keyFilters, index, indexStat);
} else if (query.getLimitFrom() != null || query.getLimitTo() != null) {
rowCount = CostEsitimaterFactory.estimateRowCount(initRowCount, keyFilters, index, indexStat) / 2;
scanRowCount = rowCount;
rowCount = CostEsitimaterFactory.estimateRowCount(rowCount, valueFilters, index, indexStat);
} else {
// 对于其他情况,则根据约束条件进行推算
rowCount = CostEsitimaterFactory.estimateRowCount(initRowCount, keyFilters, index, indexStat);
scanRowCount = rowCount;
rowCount = CostEsitimaterFactory.estimateRowCount(rowCount, valueFilters, index, indexStat);
}
long networkCost = 0;
// step2.估计网络开销
if (isOnfly) {
if (query.getDataNode() == null
|| (query.getDataNode().equals(((QueryNode) query).getChild().getDataNode()))) {
// 如果当前的查询和子查询在一台机器上执行,则网络开销为0
networkCost = 0;
} else {
// 如果当前的查询和子查询不在一台机器上,则需要将子查询的数据传输到当前查询的机器上
// 所以网络开销就为子查询结果的行数
// (目前只用行数作为开销的依据,没有考虑字段的大小等复杂因素)
networkCost = initRowCount;
}
} else {
// 如果是对物理表进行查询,则不需要经过网络传输,网络开销为0
networkCost = 0;
}
cost.setRowCount(rowCount);
cost.setNetworkCost(networkCost);
cost.setScanCount(scanRowCount);
return cost;
}
private boolean isAllEqualOrIS(List<IFilter> filters) {
if (filters == null || filters.isEmpty()) {
return false;
}
for (IFilter filter : filters) {
if (filter.getOperation() == OPERATION.IS && filter.getOperation() != OPERATION.EQ) {
return false;
}
}
return true;
}
}