package org.aksw.sparqlify.database;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.BinaryOperator;
import org.aksw.commons.util.reflect.MultiMethod;
import org.aksw.jena_sparql_api.normal_form.Clause;
import org.aksw.jena_sparql_api.normal_form.NestedNormalForm;
import org.aksw.jena_sparql_api.restriction.RestrictionImpl;
import org.aksw.jena_sparql_api.restriction.RestrictionManagerImpl;
import org.aksw.jena_sparql_api.views.OpViewInstanceJoin;
import org.aksw.sparqlify.sparqlview.OpSparqlViewPattern;
import org.apache.commons.collections15.Predicate;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.op.OpConditional;
import org.apache.jena.sparql.algebra.op.OpDisjunction;
import org.apache.jena.sparql.algebra.op.OpDistinct;
import org.apache.jena.sparql.algebra.op.OpExtend;
import org.apache.jena.sparql.algebra.op.OpGroup;
import org.apache.jena.sparql.algebra.op.OpJoin;
import org.apache.jena.sparql.algebra.op.OpLeftJoin;
import org.apache.jena.sparql.algebra.op.OpNull;
import org.apache.jena.sparql.algebra.op.OpOrder;
import org.apache.jena.sparql.algebra.op.OpProject;
import org.apache.jena.sparql.algebra.op.OpSequence;
import org.apache.jena.sparql.algebra.op.OpSlice;
import org.apache.jena.sparql.algebra.op.OpTopN;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.expr.E_Bound;
import org.apache.jena.sparql.expr.Expr;
import org.apache.jena.sparql.expr.NodeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Sets;
/**
* A predicate that returns true if the given object is a subClass of a certain class.
* Uses Class.isAssignableFrom.
*
* @author Claus Stadler <cstadler@informatik.uni-leipzig.de>
*
* @param <T>
*/
class PredicateInstanceOf<T>
implements Predicate<T>
{
private Class<?> superClass;
public PredicateInstanceOf(Class<?> superClass) {
this.superClass = superClass;
}
@Override
public boolean evaluate(T value) {
return value == null ? false : superClass.isAssignableFrom(value.getClass());
}
}
/**
*
* @author raven
*
* Uses RestrictionManager for the filter expressions (indexed set of dnfs)
*
*
* TODO: Inconsistent filters disappear
*/
public class FilterPlacementOptimizer2 {
private static FilterPlacementOptimizer2 instance;
public static FilterPlacementOptimizer2 get() {
if(instance == null) {
instance = new FilterPlacementOptimizer2();
}
return instance;
}
private static final Logger logger = LoggerFactory.getLogger(FilterPlacementOptimizer2.class);
// public static Factory2<Op> joinFactory = new Factory2<Op>() {
// @Override
// public Op create(Op a, Op b) {
// Op result = OpJoin.create(a, b);
// return result;
// }
//
// };
public static Op optimizeStatic(Op op) {
Op result = optimizeStatic(op, null);
return result;
// RestrictionManagerImpl cnf = new RestrictionManagerImpl();
// //Op result = MultiMethod.invokeStatic(FilterPlacementOptimizer2.class, "_optimize", op, cnf);
// FilterPlacementOptimizer2 x = get();
// Op result = MultiMethod.invoke(x, "_optimize", op, cnf);
// return result;
}
public static Op optimizeStatic(Op op, RestrictionManagerImpl cnf) {
FilterPlacementOptimizer2 inst = get();
Op result = inst.optimize(op, cnf);
return result;
}
public Op optimize(Op op) {
Op result = optimize(op, null);
return result;
}
public Op optimize(Op op, RestrictionManagerImpl cnf) {
if(cnf == null) {
cnf = new RestrictionManagerImpl();
}
// Op result = MultiMethod.invokeStatic(FilterPlacementOptimizer2.class, "_optimize", op, cnf);
//FilterPlacementOptimizer2 x = get();
Op result = MultiMethod.invoke(this, "_optimize", op, cnf);
return result;
}
public RestrictionManagerImpl filterByVars(RestrictionManagerImpl cnf, Op op) {
Set<Var> vars = GetVarsMentioned.getVarsMentioned(op);
Set<Clause> clauses = cnf.getClausesForVars(vars);
return new RestrictionManagerImpl(new NestedNormalForm(clauses));
}
public Op _optimize(OpOrder op, RestrictionManagerImpl cnf) {
return new OpOrder(optimize(op.getSubOp(), cnf), op.getConditions());
}
public Op _optimize(OpTopN op, RestrictionManagerImpl cnf) {
return new OpTopN(optimize(op.getSubOp(), cnf), op.getLimit(), op.getConditions());
}
public Op _optimize(OpJoin op, RestrictionManagerImpl cnf) {
Op result = handleLeftJoin(op.getLeft(), op.getRight(), cnf, OpJoin::create);
return result;
}
public Op _optimizeBreaking(OpJoin op, RestrictionManagerImpl cnf) {
RestrictionManagerImpl leftCnf = filterByVars(cnf, op.getLeft());
RestrictionManagerImpl rightCnf = filterByVars(cnf, op.getRight());
Set<Clause> union = Sets.union(leftCnf.getCnf(), rightCnf.getCnf());
Set<Clause> remaining = Sets.difference(cnf.getCnf(), union);
Op result = OpJoin.create(optimize(op.getLeft(), leftCnf), optimize(op.getRight(), rightCnf));
if(!remaining.isEmpty()) {
//result = OpFilter.filter(cnfToExprList(remaining), result);
result = OpFilterIndexed.filter(new RestrictionManagerImpl(new NestedNormalForm(remaining)), result);
}
return result;
}
// public static Op _optimize(OpJoin op, RestrictionManagerImpl cnf) {
// RestrictionManagerImpl leftCnf = filterByVars(cnf, op.getLeft());
// RestrictionManagerImpl rightCnf = filterByVars(cnf, op.getRight());
//
// Set<Clause> union = Sets.union(leftCnf.getCnf(), rightCnf.getCnf());
// Set<Clause> remaining = Sets.difference(cnf.getCnf(), union);
//
// Op result = OpJoin.create(optimize(op.getLeft(), leftCnf), optimize(op.getRight(), rightCnf));
//
// if(!remaining.isEmpty()) {
// //result = OpFilter.filter(cnfToExprList(remaining), result);
// result = OpFilterIndexed.filter(new RestrictionManagerImpl(new NestedNormalForm(remaining)), result);
// }
//
// return result;
// }
public Op _optimize(OpSequence op, RestrictionManagerImpl cnf) {
List<Op> members = op.getElements();
List<Op> newMembers = new ArrayList<Op>(members.size());
Set<Clause> intersection = new HashSet<Clause>();
for(Op member : members) {
RestrictionManagerImpl restrictions = filterByVars(cnf, member);
Op newMember = optimize(member, restrictions);
newMembers.add(newMember);
Set<Clause> tmp = Sets.intersection(restrictions.getCnf(), intersection);
intersection = new HashSet<Clause>(tmp);
}
Set<Clause> remaining = Sets.difference(cnf.getCnf(), intersection);
Op result = OpSequence.create().copy(newMembers);
if(!remaining.isEmpty()) {
//result = OpFilter.filter(cnfToExprList(remaining), result);
result = OpFilterIndexed.filter(new RestrictionManagerImpl(new NestedNormalForm(remaining)), result);
}
return result;
}
// TODO This method looks wrong
// For each element of the union push all appropriate clauses
public Op _optimize(OpDisjunction op, RestrictionManagerImpl cnf)
{
List<Op> args = new ArrayList<Op>();
for(Op element : op.getElements()) {
Set<Var> elementVars = GetVarsMentioned.getVarsMentioned(element);
//Set<Clause> clauses = new HashSet<Clause>();
boolean elementHasRequiredVars = true;
for(Clause clause : cnf.getCnf()) {
Set<Var> clauseVars = clause.getVarsMentioned();
if(clauseVars.containsAll(elementVars)) {
elementHasRequiredVars = false;
break;
}
}
if(!elementHasRequiredVars) {
continue;
}
Op optimizedMember = optimize(element, cnf);
args.add(optimizedMember);
}
OpDisjunction result = OpDisjunction.create();
result.getElements().addAll(args);
return result;
}
public Op _optimize(OpDistinct op, RestrictionManagerImpl cnf) {
return new OpDistinct(optimize(op.getSubOp(), cnf));
}
public Op _optimize(OpProject op, RestrictionManagerImpl cnf) {
Op subOp = optimize(op.getSubOp(), cnf);
Op result = new OpProject(subOp, op.getVars());
return result;
}
public Op _optimize(OpExtend op, RestrictionManagerImpl cnf) {
logger.warn("OpExtend probably not optimally implemented");
return op.copy(optimize(op.getSubOp(), cnf));
}
public Op _optimize(OpGroup op, RestrictionManagerImpl cnf) {
return new OpGroup(optimize(op.getSubOp(), cnf), op.getGroupVars(), op.getAggregators());
}
//public static Op _optimize(OpEx)
/*
public static Op _optimize(OpFilter op, RestrictionManager cnf) {
RestrictionManager child = new RestrictionManager(cnf);
for(Expr expr : op.getExprs()) {
NestedNormalForm newCnf = CnfUtils.toCnf(expr);
child.stateCnf(newCnf);
}
return optimize(op.getSubOp(), child);
}
*/
public Op _optimizeNewButNotSureIfWeNeedSplitsHere(OpFilterIndexed op, RestrictionManagerImpl cnf) {
RestrictionManagerImpl child = new RestrictionManagerImpl(cnf);
child.stateRestriction(op.getRestrictions());
FilterSplit filterSplit = splitFilter(op, child);
RestrictionManagerImpl pushable = filterSplit.getPushable();
Op result = optimize(op.getSubOp(), pushable);
if(!filterSplit.getNonPushable().getCnf().isEmpty()) {
result = OpFilterIndexed.filter(filterSplit.getNonPushable(), result);
}
/*
if(child.isUnsatisfiable()) {
Op result = OpNull.create();
return result;
}*/
return result;
}
public Op _optimize(OpFilterIndexed op, RestrictionManagerImpl cnf) {
RestrictionManagerImpl child = new RestrictionManagerImpl(cnf);
child.stateRestriction(op.getRestrictions());
/*
if(child.isUnsatisfiable()) {
Op result = OpNull.create();
return result;
}*/
Op result = optimize(op.getSubOp(), child);
return result;
}
public Op _optimize(OpNull op, RestrictionManagerImpl cnf)
{
return op;
}
public Op _optimize(OpSlice op, RestrictionManagerImpl cnf)
{
return op.copy(optimize(op.getSubOp(), cnf));
}
public static boolean evalPredicate(Expr expr, Predicate<Expr> predicate) {
if(predicate.evaluate(expr)) {
return true;
} else if(expr.isFunction()) {
for(Expr arg : expr.getFunction().getArgs()) {
if(evalPredicate(arg, predicate)) {
return true;
}
}
}
return false;
}
public static boolean doesClauseContainBoundExpr(Clause clause) {
Predicate<Expr> predicate = new PredicateInstanceOf<Expr>(E_Bound.class);
/*
Expr test = new E_Bound(new ExprVar(Var.alloc("v")));
System.out.println("Predicate evaluated to: " + predicate.evaluate(test));
System.exit(0);
*/
for(Expr expr : clause.getExprs()) {
if(evalPredicate(expr, predicate)) {
return true;
}
}
return false;
}
public Op _optimize(final OpLeftJoin op, RestrictionManagerImpl cnf) {
Op result = handleLeftJoin(op.getLeft(), op.getRight(), cnf, (a, b) -> OpLeftJoin.create(a, b, op.getExprs()));
return result;
}
public Op _optimize(OpConditional op, RestrictionManagerImpl cnf) {
Op result = handleLeftJoin(op.getLeft(), op.getRight(), cnf, OpConditional::new);
return result;
}
public static FilterSplit splitFilter(Op op, RestrictionManagerImpl cnf) {
//Set<Var> rightVars = GetVarsMentioned.getVarsMentioned(right);
Set<Var> opVars = GetVarsMentioned.getVarsMentioned(op);
Set<Clause> leftClauses = new HashSet<Clause>();
Set<Clause> nonPushable = new HashSet<Clause>();
for(Clause clause : cnf.getCnf()) {
Set<Var> clauseVars = clause.getVarsMentioned();
// If the clause contains vars that are not part of the op, we cannot push it down
if(opVars.containsAll(clauseVars)) {
leftClauses.add(clause);
} else {
nonPushable.add(clause);
}
/*
if(Sets.intersection(clauseVars, rightVars).isEmpty()) { // Do we need to check && !doesClauseContainBoundExpr(clause)) {
leftClauses.add(clause);
} else {
nonPushable.add(clause);
}
*/
}
RestrictionManagerImpl leftRm = new RestrictionManagerImpl(new NestedNormalForm(leftClauses));
RestrictionManagerImpl np = new RestrictionManagerImpl(new NestedNormalForm(nonPushable));
for(Entry<Var, RestrictionImpl> entry : cnf.getRestrictions().entrySet()) {
Var var = entry.getKey();
RestrictionImpl rest = entry.getValue();
leftRm.stateRestriction(var, rest);
np.stateRestriction(var, rest);
}
/*
for(Var var : leftRm.getVariables()) {
RestrictionImpl r = cnf.getRestriction(var);
if(r != null) {
leftRm.stateRestriction(var, r);
}
}
for(Var var : np.getVariables()) {
RestrictionImpl r = cnf.getRestriction(var);
if(r != null) {
np.stateRestriction(var, r);
}
}
*/
FilterSplit result = new FilterSplit(leftRm, np);
return result;
}
/*
* TODO: We could still push constraints down on the RHS - why aren't we doing this?
* Even the old version only considered restrictions on the left hand side
*
*/
public Op handleLeftJoin(Op left, Op right, RestrictionManagerImpl cnf, BinaryOperator<Op> factory) {
// Only push those expression on the, that do not contain any
// variables of the right side
FilterSplit filterSplit = splitFilter(left, cnf);
RestrictionManagerImpl leftRm = filterSplit.getPushable();
RestrictionManagerImpl np = filterSplit.getNonPushable();
Op newLeft = optimize(left, leftRm);
// We can push expressions from the left side into the right side - but
// only if ther expressions are pushable
FilterSplit rsplit = splitFilter(right, leftRm);
RestrictionManagerImpl rightRm = rsplit.getPushable();
Op newRight = optimize(right, rightRm);
//Op leftJoin = OpLeftJoin.create(newLeft, newRight, new ExprList());
Op leftJoin = factory.apply(newLeft, newRight);
Op result = surroundWithFilterIfNeccessary(leftJoin, np);
return result;
}
public Op handleLeftJoinOld(Op left, Op right, RestrictionManagerImpl cnf, BinaryOperator<Op> factory) {
// Only push those expression on the, that do not contain any
// variables of the right side
Set<Var> rightVars = GetVarsMentioned.getVarsMentioned(right);
Set<Clause> leftClauses = new HashSet<Clause>();
Set<Clause> nonPushable = new HashSet<Clause>();
for(Clause clause : cnf.getCnf()) {
Set<Var> clauseVars = clause.getVarsMentioned();
// If there are variables in the clause which do not appear on the right side, we cannot push the clause down the right side of the left join
//if(rightVars.containsAll(clauseVars)) {
if(Sets.intersection(clauseVars, rightVars).isEmpty()) {
leftClauses.add(clause);
} else {
nonPushable.add(clause);
}
/*
if(Sets.intersection(clauseVars, rightVars).isEmpty()) { // Do we need to check && !doesClauseContainBoundExpr(clause)) {
leftClauses.add(clause);
} else {
nonPushable.add(clause);
}
*/
}
RestrictionManagerImpl leftRm = new RestrictionManagerImpl(new NestedNormalForm(leftClauses));
RestrictionManagerImpl np = new RestrictionManagerImpl(new NestedNormalForm(nonPushable));
Op newLeft = optimize(left, leftRm);
Op newRight = optimize(right, leftRm);
//Op leftJoin = OpLeftJoin.create(newLeft, newRight, new ExprList());
Op leftJoin = factory.apply(newLeft, newRight);
Op result = surroundWithFilterIfNeccessary(leftJoin, np);
return result;
}
/*
public static ExprList cnfToExprList(Set<Set<Expr>> cnf)
{
ExprList result = new ExprList();
for(Set<Expr> clause : cnf) {
Expr expr = ExprUtils.orifyBalanced(clause);
result.add(expr);
}
return result;
}*/
public Op surroundWithFilterIfNeccessary(Op op, RestrictionManagerImpl cnf)
{
Op result;
if(cnf.isUnsatisfiable()) {
result = new OpFilterIndexed(op, new RestrictionManagerImpl(new NestedNormalForm(new HashSet<Clause>(Collections.singleton(new Clause(new HashSet<Expr>(Collections.singleton(NodeValue.FALSE))))))));
}
else if(cnf.getCnf().isEmpty()) {
result = op;
}
else {
result = new OpFilterIndexed(op, cnf);
/*
ExprList exprs = cnfToExprList(cnf);
Op result = OpFilter.filter(exprs, op);
*/
}
return result;
}
// @Deprecated
// public static Op _optimize(OpRdfViewPattern op, RestrictionManagerImpl cnf) {
// return surroundWithFilterIfNeccessary(op, cnf);
// }
public Op _optimize(OpViewInstanceJoin op, RestrictionManagerImpl cnf) {
return surroundWithFilterIfNeccessary(op, cnf);
}
public Op _optimize(OpSparqlViewPattern op, RestrictionManagerImpl cnf) {
return surroundWithFilterIfNeccessary(op, cnf);
}
/*
public static Op _optimize(OpUnion op, Set<Set<Expr>> cnf) {
Set<Var> rightVars = GetVarsMentioned.getVarsMentioned(op.getRight());
Set<Set<Expr>> leftClauses = new HashSet<Set<Expr>>();
Set<Set<Expr>> nonPushable = new HashSet<Set<Expr>>();
for(Set<Expr> clause : cnf) {
Set<Var> clauseVars = ClauseUtils.getVarsMentioned(clause);
if(Sets.intersection(clauseVars, rightVars).isEmpty()) {
leftClauses.add(clause);
} else {
nonPushable.add(clause);
}
}
return new OpUnion(optimize(op.getLeft(), cnf), optimize(op.getRight(), cnf));
}*/
}