package com.taobao.tddl.optimizer.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import com.google.common.collect.Lists;
import com.taobao.tddl.optimizer.core.ASTNodeFactory;
import com.taobao.tddl.optimizer.core.expression.IBindVal;
import com.taobao.tddl.optimizer.core.expression.IBooleanFilter;
import com.taobao.tddl.optimizer.core.expression.IColumn;
import com.taobao.tddl.optimizer.core.expression.IFilter;
import com.taobao.tddl.optimizer.core.expression.IFilter.OPERATION;
import com.taobao.tddl.optimizer.core.expression.IFunction;
import com.taobao.tddl.optimizer.core.expression.ILogicalFilter;
import com.taobao.tddl.optimizer.core.expression.ISelectable;
import com.taobao.tddl.optimizer.exceptions.EmptyResultFilterException;
import com.taobao.tddl.optimizer.parse.cobar.visitor.MySqlExprVisitor;
import com.taobao.tddl.optimizer.utils.range.AndRangeProcessor;
import com.taobao.tddl.optimizer.utils.range.OrRangeProcessor;
/**
* 用来做一些布尔表达式的转换,比如我们会将(A and B) OR C => (A and C) or (A and B),析取范式便于做计算<br/>
* 注意,目前处理中不是一个严格的析取范式处理,比如 A and B and C不会进行转化
*
* <pre>
* DNF析取范式:
* a. http://zh.wikipedia.org/zh-cn/析取范式
* b. http://baike.baidu.com/view/143339.htm
*
* 简单析取式: 仅由有限个文字构成的析取式,比如:p,q,p∨q
* 析取范式:由有限个简单合取式构成的析取式,比如 (p∧q)vr
* </pre>
*
* @author Dreamond
* @author jianghang 2013-11-13 下午1:18:53
*/
public class FilterUtils {
// ----------------------- DNF filter处理-------------------------
/**
* 将一个Bool树转换成析取形式 A(B+C)转换为AB+AC
*
* @param node
* @return
*/
public static IFilter toDNFAndFlat(IFilter node) {
if (node == null) {
return null;
}
node = toDNF(node);
node = flatDNFFilter(node);
return node;
}
/**
* 将一个Bool树转换成析取形式 A(B+C)转换为AB+AC,不做拉平处理
*/
public static IFilter toDNF(IFilter node) {
if (node == null) {
return null;
}
while (!isDNF(node)) {
if (node.getOperation().equals(OPERATION.OR)) {
node = passOrNode((ILogicalFilter) node);
} else if (node.getOperation().equals(OPERATION.AND)) {
node = expandAndNode((ILogicalFilter) node);
}
}
return node;
}
private static IFilter passOrNode(ILogicalFilter node) {
for (int i = 0; i < node.getSubFilter().size(); i++) {
node.getSubFilter().set(i, toDNF(node.getSubFilter().get(i)));
}
return node;
}
private static IFilter expandAndNode(ILogicalFilter node) {
if (node.getSubFilter().size() > 2) {
throw new IllegalArgumentException("此处不支持And包含超过两个子节点\n" + node);
}
if (node.getSubFilter().size() == 1) {
return node;
}
if ((!isLogicalNode(node.getSubFilter().get(0))) && (!isLogicalNode(node.getSubFilter().get(1)))) {
return node;
}
node.setLeft(toDNF(node.getLeft()));
node.setRight(toDNF(node.getRight()));
// (A+B)C = AC+BC
boolean isRightOr = node.getRight().getOperation().equals(OPERATION.OR);
boolean isLeftOr = node.getLeft().getOperation().equals(OPERATION.OR);
if (isLeftOr || isRightOr) {
IFilter orNode = node.getLeft();
IFilter otherNode = node.getRight();
if (isRightOr) {
orNode = node.getRight();
otherNode = node.getLeft();
}
ILogicalFilter leftAnd = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.AND);
ILogicalFilter rightAnd = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.AND);
// 构造 AC
leftAnd.setLeft(((ILogicalFilter) orNode).getLeft());
leftAnd.setRight(otherNode);
// 构造 BC
rightAnd.setLeft(((ILogicalFilter) orNode).getRight());
rightAnd.setRight(otherNode);
// 构造AC + BC
ILogicalFilter or = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.OR);
or.addSubFilter(leftAnd).addSubFilter(rightAnd);
return passOrNode(or);
}
return node;
}
/**
* 拉平一个filter树,将多层的嵌套拉平为一层<br/>
* 比如: A and B and (C and D) => A and B and C and D
*/
private static IFilter flatDNFFilter(IFilter node) {
if (!isDNF(node)) {
throw new IllegalArgumentException("filter is not dnf!\n" + node);
}
List<List<IFilter>> dnfNodes = toDNFNodesArray(node);
if (dnfNodes.size() == 1 && dnfNodes.get(0).size() == 1) {
return dnfNodes.get(0).get(0);
}
ILogicalFilter or = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.OR);
for (List<IFilter> dnfNode : dnfNodes) {
if (dnfNode.size() != 1) {
ILogicalFilter and = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.AND);
for (IFilter boolNode : dnfNode) {
and.addSubFilter(boolNode);
}
if (dnfNodes.size() == 1) { // 如果只有一个合取,直接返回合取结果
return and;
}
or.addSubFilter(and);
} else {
or.addSubFilter(dnfNode.get(0));
}
}
return or;
}
/**
* 将一个IFilter全部展开为一个多维数组,会做拉平处理,需要预先调用toDNF/toDNFAndFlat进行预处理转化为DNF范式
*
* <pre>
* 比如:(A and B) or (A and C)
* 返回结果为:
* List-
* List
* -(A , B)
* List
* -(A , C)
* </pre>
*/
public static List<List<IFilter>> toDNFNodesArray(IFilter node) {
if (node == null || !isDNF(node)) {
return Lists.newLinkedList(); // 返回空的数组节点
}
List<List<IFilter>> res = new LinkedList();
if (node.getOperation().equals(OPERATION.OR)) {
for (int i = 0; i < ((ILogicalFilter) node).getSubFilter().size(); i++) {
res.addAll(toDNFNodesArray(((ILogicalFilter) node).getSubFilter().get(i)));
}
} else if (node.getOperation().equals(OPERATION.AND)) {
res.add(toDNFNode(node));
} else {
List<IFilter> DNFNode = new ArrayList<IFilter>(1);
DNFNode.add(node);
res.add(DNFNode);
}
if (res == null || res.isEmpty() || res.get(0) == null || res.get(0).isEmpty() || res.get(0).get(0) == null) {
return new LinkedList<List<IFilter>>();
} else {
return res;
}
}
/**
* 将一个IFilter全部展开为一个平级的数组,不考虑逻辑and/or的组织关系<br/>
* 需要预先调用toDNF/toDNFAndFlat进行预处理转化为DNF范式
*/
public static List<IFilter> toDNFNode(IFilter node) {
List<IFilter> DNFNode = Lists.newLinkedList();
if (node == null) {
return DNFNode;
}
if (!isLogicalNode(node)) {
DNFNode.add(node);
return DNFNode;
}
for (int i = 0; i < ((ILogicalFilter) node).getSubFilter().size(); i++) {
if (!isLogicalNode(((ILogicalFilter) node).getSubFilter().get(i))) {
DNFNode.add(((ILogicalFilter) node).getSubFilter().get(i));
} else {
// 递归处理
DNFNode.addAll(toDNFNode(((ILogicalFilter) node).getSubFilter().get(i)));
}
}
return DNFNode;
}
/**
* 根据column进行filter归类
*/
public static Map<Object, List<IFilter>> toColumnFiltersMap(List<IFilter> DNFNode) {
Map<Object, List<IFilter>> columns = new HashMap(DNFNode.size());
for (IFilter boolNode : DNFNode) {
if (!columns.containsKey(((IBooleanFilter) boolNode).getColumn())) {
columns.put(((IBooleanFilter) boolNode).getColumn(), new LinkedList());
}
columns.get(((IBooleanFilter) boolNode).getColumn()).add(boolNode);
}
return columns;
}
/**
* 非严格DNF检查,允许出现 Filter(A and B)
*/
public static boolean isDNF(IFilter node) {
if (!isLogicalNode(node)) {
return true;
}
if (node.getOperation().equals(OPERATION.AND)) {
boolean isAllBooleanFilter = true;
for (IFilter sub : ((ILogicalFilter) node).getSubFilter()) {
if (isLogicalNode(sub)) {
isAllBooleanFilter = false;
break;
}
}
if (isAllBooleanFilter) {
return true;
}
for (IFilter sub : ((ILogicalFilter) node).getSubFilter()) {
if (sub.getOperation().equals(OPERATION.OR)) { // 子表达式中存在析取
return false;
}
}
}
for (IFilter sub : ((ILogicalFilter) node).getSubFilter()) {
if (!isDNF(sub)) {
return false;
}
}
return true;
}
/**
* 是否为一简单合取式
*/
public static boolean isCNFNode(IFilter node) {
if (node == null) {
return false;
}
if (node.getOperation().equals(OPERATION.AND)) {
for (IFilter f : ((ILogicalFilter) node).getSubFilter())
if (!isCNFNode(f)) {
return false;
}
} else if (node.getOperation().equals(OPERATION.OR)) {
return false;
}
return true;
}
/**
* 判断是否为and/or的组合节点
*/
private static boolean isLogicalNode(IFilter node) {
if (node instanceof ILogicalFilter) {
return true;
}
return false;
}
// -------------------- 智能merge 处理 -----------------
/**
* 将filter中的and/or条件中进行Range合并处理 <br/>
*
* <pre>
* 比如:
* a. A =1 And A =2 ,永远false条件,返回EmptyResultFilterException异常
* b. (1 < A < 5) or (2 < A < 6),合并为 (1 < A < 6)
* c. A <= 1 or A = 1,永远true条件
* </pre>
*/
public static IFilter merge(IFilter filter) throws EmptyResultFilterException {
if (filter == null || filter instanceof IBooleanFilter) {
return filter;
}
// 先转为DNF结构
filter = toDNFAndFlat(filter);
List<List<IFilter>> DNFNodes = toDNFNodesArray(filter);
if (!needToMerge(DNFNodes)) {
return filter;
}
DNFNodes = mergeOrDNFNodes(mergeAndDNFNodesArray(DNFNodes));
if (DNFNodes == null || DNFNodes.isEmpty() || DNFNodes.get(0) == null || DNFNodes.get(0).isEmpty()
|| DNFNodes.get(0).get(0) == null) {
// 返回常量true
IBooleanFilter f = ASTNodeFactory.getInstance().createBooleanFilter();
f.setOperation(OPERATION.CONSTANT);
f.setColumn("1");
f.setColumnName(ObjectUtils.toString("1"));
return f;
} else {
return DNFToOrLogicTree(DNFNodes);
}
}
/**
* 如果filter中包含函数,或者是绑定变量,则不进行merge
*/
private static boolean needToMerge(List<List<IFilter>> dNFNodes) {
for (List<IFilter> DNFNode : dNFNodes) {
for (IFilter filter : DNFNode) {
if (((IBooleanFilter) filter).getValue() instanceof IBindVal
|| ((IBooleanFilter) filter).getValue() instanceof IFunction) {
return false;
}
}
}
return true;
}
/**
* 合并析取式中的And重复条件
*/
private static List<List<IFilter>> mergeAndDNFNodesArray(List<List<IFilter>> DNFNodesBeforeMerge)
throws EmptyResultFilterException {
List<List<IFilter>> nodesAfterMerge = new LinkedList();
for (List<IFilter> DNFNode : DNFNodesBeforeMerge) {
// 每个合取中按照column进行归类
Map<Comparable, List<IFilter>> columnRestrictions = new HashMap();
for (IFilter boolNode : DNFNode) {
Comparable c = (Comparable) ((IBooleanFilter) boolNode).getColumn();
if (!columnRestrictions.containsKey(c)) {
columnRestrictions.put(c, new LinkedList());
}
columnRestrictions.get(c).add(boolNode);
}
// 针对单个字段的条件进行合并,比如 A > 1 and A < 5 and A > 3合并为 3 < A < 5
List<IFilter> columnsFilter = new LinkedList();
for (Comparable c : columnRestrictions.keySet()) {
AndRangeProcessor ri = new AndRangeProcessor(c);
for (IFilter node : columnRestrictions.get(c)) {
if (!ri.process(node)) {
throw new EmptyResultFilterException("空结果");
}
}
List<IFilter> boolNodesOfCurrentColumn = ri.toFilterList();
columnsFilter.addAll(boolNodesOfCurrentColumn);
}
nodesAfterMerge.add(columnsFilter);
}
return nodesAfterMerge;
}
/**
* 合并析取式中的Or重复条件
*/
private static List<List<IFilter>> mergeOrDNFNodes(List<List<IFilter>> DNFNodes) throws EmptyResultFilterException {
Map<Object, List<IFilter>> columnRestrictions = new HashMap<Object, List<IFilter>>();
List<List<IFilter>> toRemove = new LinkedList<List<IFilter>>();
for (List<IFilter> DNFNode : DNFNodes) {
if (DNFNode.size() == 1) { // 只处理单个or条件的表达式,比如 A = 1 or A < 2
Object c = (((IBooleanFilter) DNFNode.get(0)).getColumn());
if (!columnRestrictions.containsKey(c)) {
columnRestrictions.put(c, new LinkedList());
}
columnRestrictions.get(c).add(DNFNode.get(0));
toRemove.add(DNFNode);
}
}
DNFNodes.removeAll(toRemove); // 先干掉,后面会计算后会重新添加
for (Object c : columnRestrictions.keySet()) {
OrRangeProcessor ri = new OrRangeProcessor(c);
for (IFilter boolNode : columnRestrictions.get(c)) {
ri.process(boolNode);
}
if (ri.isFullSet()) {
return new LinkedList<List<IFilter>>();
} else {
DNFNodes.addAll(ri.toFilterList());
}
}
return DNFNodes;
}
/**
* 将析取范式的数组重新构造为一个LogicFilter,使用and/or条件
*
* @param DNFNodes
* @return
*/
public static IFilter DNFToOrLogicTree(List<List<IFilter>> DNFNodes) {
if (DNFNodes.isEmpty()) {
return null;
}
IFilter treeNode = DNFToAndLogicTree(DNFNodes.get(0));
for (int i = 1; i < DNFNodes.size(); i++) {
treeNode = or(treeNode, DNFToAndLogicTree(DNFNodes.get(i)));
}
return treeNode;
}
/**
* 将一系列的boolean filter ,拼装成一个andFilter { boolFilter , boolFilter...}
* 的filter..
*
* @param DNFNode
* @return
*/
public static IFilter DNFToAndLogicTree(List<IFilter> DNFNode) {
if (DNFNode == null || DNFNode.isEmpty()) {
return null;
}
IFilter treeNode = DNFNode.get(0);
for (int i = 1; i < DNFNode.size(); i++) {
treeNode = and(treeNode, DNFNode.get(i));
}
return treeNode;
}
// -------------------- filter helper method-----------------
/**
* 创建and条件
*/
public static IFilter and(IFilter root, IFilter o) {
if (o == null) {
return root;
}
if (root == null) {
root = o;
} else {
if (root.getOperation().equals(OPERATION.AND)) {
((ILogicalFilter) root).addSubFilter(o);
} else {
ILogicalFilter and = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.AND);
and.addSubFilter(root);
and.addSubFilter(o);
root = and;
}
}
return root;
}
/**
* 创建or条件
*/
public static IFilter or(IFilter root, IFilter o) {
if (o == null) {
return root;
}
if (root == null) {
root = o;
} else {
if (root.getOperation().equals(OPERATION.OR)) {
((ILogicalFilter) root).addSubFilter(o);
} else {
ILogicalFilter and = ASTNodeFactory.getInstance().createLogicalFilter().setOperation(OPERATION.OR);
and.addSubFilter(root);
and.addSubFilter(o);
root = and;
}
}
return root;
}
/**
* 创建equal filter
*/
public static IBooleanFilter equal(Comparable columnName, Comparable value) {
IBooleanFilter f = ASTNodeFactory.getInstance().createBooleanFilter();
f.setOperation(OPERATION.EQ);
f.setColumn(columnName);
f.setValue(value);
return f;
}
/**
* 判断是否为常量的filter
*/
public static boolean isConstFilter(IBooleanFilter f) {
if (f.getColumn() instanceof IColumn || f.getColumn() instanceof IFunction) {
return false;
}
if (f.getValue() instanceof IColumn || f.getValue() instanceof IFunction) {
return false;
}
return true;
}
/**
* 基于字符串表达式构建IFilter
*/
public static IFilter createFilter(String where) {
if (StringUtils.isEmpty(where)) {
return null;
}
MySqlExprVisitor visitor = MySqlExprVisitor.parser(where);
Comparable value = visitor.getColumnOrValue();
if (value instanceof IFilter) {
return (IFilter) value;
} else if (value instanceof ISelectable) {
throw new IllegalArgumentException("不合法的filter表达式:" + where);
} else {
return visitor.buildConstanctFilter(value);
}
}
/**
* 判断是否为常量的表达式对象
*/
public static boolean isConstValue(Object v) {
if (v instanceof IColumn || v instanceof IFunction) {
return false;
}
return true;
}
}