package org.aksw.jena_sparql_api.concept_cache.core;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.aksw.commons.collections.multimaps.BiHashMultimap;
import org.aksw.commons.collections.multimaps.IBiSetMultimap;
import org.aksw.jena_sparql_api.algebra.transform.TransformReplaceConstants;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMap;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMapImpl;
import org.aksw.jena_sparql_api.concept_cache.dirty.SparqlViewMatcherQfpc;
import org.aksw.jena_sparql_api.concept_cache.domain.ConjunctiveQuery;
import org.aksw.jena_sparql_api.concept_cache.domain.ExprHolder;
import org.aksw.jena_sparql_api.concept_cache.domain.PatternSummary;
import org.aksw.jena_sparql_api.concept_cache.domain.ProjectedQuadFilterPattern;
import org.aksw.jena_sparql_api.concept_cache.domain.QuadFilterPattern;
import org.aksw.jena_sparql_api.concept_cache.domain.QuadFilterPatternCanonical;
import org.aksw.jena_sparql_api.concept_cache.domain.VarOccurrence;
import org.aksw.jena_sparql_api.concept_cache.op.OpExtConjunctiveQuery;
import org.aksw.jena_sparql_api.concept_cache.op.OpUtils;
import org.aksw.jena_sparql_api.concept_cache.trash.OpVisitorViewCacheApplier;
import org.aksw.jena_sparql_api.core.QueryExecutionExecWrapper;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel;
import org.aksw.jena_sparql_api.utils.ClauseUtils;
import org.aksw.jena_sparql_api.utils.CnfUtils;
import org.aksw.jena_sparql_api.utils.DnfUtils;
import org.aksw.jena_sparql_api.utils.ExprUtils;
import org.aksw.jena_sparql_api.utils.Generator;
import org.aksw.jena_sparql_api.utils.NodeTransformRenameMap;
import org.aksw.jena_sparql_api.utils.QuadUtils;
import org.aksw.jena_sparql_api.utils.VarGeneratorImpl2;
import org.aksw.jena_sparql_api.utils.Vars;
import org.apache.jena.ext.com.google.common.collect.Sets;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.ResultSet;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.OpAsQuery;
import org.apache.jena.sparql.algebra.OpVars;
import org.apache.jena.sparql.algebra.Table;
import org.apache.jena.sparql.algebra.op.OpDistinct;
import org.apache.jena.sparql.algebra.op.OpFilter;
import org.apache.jena.sparql.algebra.op.OpGraph;
import org.apache.jena.sparql.algebra.op.OpJoin;
import org.apache.jena.sparql.algebra.op.OpNull;
import org.apache.jena.sparql.algebra.op.OpProject;
import org.apache.jena.sparql.algebra.op.OpQuadPattern;
import org.apache.jena.sparql.algebra.op.OpService;
import org.apache.jena.sparql.algebra.op.OpTable;
import org.apache.jena.sparql.algebra.op.OpUnion;
import org.apache.jena.sparql.algebra.optimize.Rewrite;
import org.apache.jena.sparql.core.Quad;
import org.apache.jena.sparql.core.QuadPattern;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.expr.E_Equals;
import org.apache.jena.sparql.expr.E_OneOf;
import org.apache.jena.sparql.expr.Expr;
import org.apache.jena.sparql.expr.ExprList;
import org.apache.jena.sparql.expr.ExprVar;
import org.apache.jena.sparql.expr.ExprVars;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.syntax.Element;
import org.apache.jena.sparql.syntax.ElementService;
import org.apache.jena.sparql.syntax.ElementSubQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.SetMultimap;
/*
class QueryRewrite {
protected Query masterQuery;
protected Map<Node, Query>
}
*/
public class SparqlCacheUtils {
private static final Logger logger = LoggerFactory.getLogger(SparqlCacheUtils.class);
/**
* Wrap a node with a caching operation
*
* @param subOp
* @param cacheRef The cache entry to create and where the result set will be stored
*
* @return
*/
public static Op createCachingOp(Op subOp, Node storageRef) {
boolean silent = false;
//Query subQuery = OpAsQuery.asQuery(subOp);
//Element subElement = new ElementSubQuery(subQuery);
//ElementService elt = new ElementService(storageRef, subElement, silent);
OpService result = new OpService(storageRef, subOp, silent);
return result;
}
/**
* Utility method to quickly create a canonical quad filter pattern from a query.
*
* @param query
* @return
*/
public static QuadFilterPatternCanonical fromQuery(Query query) {
ProjectedOp op = SparqlQueryContainmentUtils.toProjectedOp(query);
Op resOp = op.getResidualOp();
QuadFilterPatternCanonical result = SparqlCacheUtils.extractQuadFilterPatternCanonical(resOp);
return result;
}
public static QuadFilterPatternCanonical removeDefaultGraphFilter(QuadFilterPatternCanonical qfpc) {
Set<Quad> quads = qfpc.getQuads();
Set<Set<Expr>> cnf = qfpc.getFilterCnf();
Map<Var, Node> varToNode = CnfUtils.getConstants(cnf);
Map<Var, Node> candMap = varToNode.entrySet().stream().filter(
e -> (Quad.defaultGraphIRI.equals(e.getValue())
|| Quad.defaultGraphNodeGenerated.equals(e.getValue())))
.collect(Collectors.toMap(Entry::getKey, Entry::getValue));
Set<Var> candVars = candMap.keySet();
// Remove all vars that occurr in positions other than the graph
for(Quad quad : quads) {
Node[] nodes = QuadUtils.quadToArray(quad);
for(int i = 1; i < 4; ++i) {
Node node = nodes[i];
candVars.remove(node);
}
}
Set<Set<Expr>> newCnf = cnf.stream().filter(clause -> {
Entry<Var, Node> e = CnfUtils.extractEquality(clause);
boolean r = !candMap.entrySet().contains(e);
return r;
}).collect(Collectors.toSet());
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(quads, ExprHolder.fromCnf(newCnf));
return result;
}
public static ProjectedQuadFilterPattern optimizeFilters(ProjectedQuadFilterPattern pqfp) {
PatternSummary summary = summarize(pqfp.getQuadFilterPattern());
QuadFilterPatternCanonical qfpc = summary.getCanonicalPattern();
QuadFilterPatternCanonical optimized = optimizeFilters(qfpc.getQuads(), qfpc.getFilterCnf(), pqfp.getProjectVars());
QuadFilterPattern qfp = optimized.toQfp();
ProjectedQuadFilterPattern result = new ProjectedQuadFilterPattern(pqfp.getProjectVars(), qfp, false);
return result;
}
public static QuadFilterPatternCanonical optimizeFilters(Collection<Quad> quads, Set<Set<Expr>> cnf, Set<Var> projection) {
Map<Var, Node> varToNode = CnfUtils.getConstants(cnf);
// A view on the set of variables subject the optimization
Set<Var> optVars = varToNode.keySet();
// Remove all equalities for projected variables
optVars.remove(projection);
Set<Quad> newQuads = new HashSet<Quad>();
for(Quad quad : quads) {
Node[] nodes = QuadUtils.quadToArray(quad);
for(int i = 0; i < 4; ++i) {
Node node = nodes[i];
Node subst = varToNode.get(node);
// Update in place, because the array is a copy anyway
nodes[i] = subst == null ? node : subst;
}
Quad newQuad = QuadUtils.arrayToQuad(nodes);
newQuads.add(newQuad);
}
// Remove the clauses from which the mapping was obtained
Set<Set<Expr>> newCnf = new HashSet<>();
for(Set<Expr> clause : cnf) {
Entry<Var, Node> equality = CnfUtils.extractEquality(clause);
boolean retainClause = equality == null || !optVars.contains(equality.getKey());
if(retainClause) {
newCnf.add(clause);
}
}
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(newQuads, ExprHolder.fromCnf(newCnf));
return result;
}
// TODO Not used, can probably be removed
// public static ResultSet executeCached(QueryExecutionFactory qef, Query query, ProjectedQuadFilterPattern pqfp, SparqlViewCache sparqlViewCache, long indexResultSetSizeThreshold) {
// if(pqfp == null) {
// throw new RuntimeException("Query is not indexable: " + query);
// }
//
// Set<Var> indexVars = new HashSet<>(query.getProjectVars());
//
// QueryExecution qe = new QueryExecutionViewCacheFragment(query, pqfp, qef, sparqlViewCache, indexVars, indexResultSetSizeThreshold);
// ResultSet result = qe.execSelect();
// return result;
// }
public static long preparationId = 0;
/**
* Prepares the execution of a query in regard to a query cache.
*
* Replaces parts of the algebra with cache hits, and
* replaces other parts with ops that perform the caching.
*
* There are two types of query execution under caching:
* (a) Rewrite the query by making only use of SPARQL 1. 1, most notably VALUES keyword, such that the remote sparql service can execute it
* (b) Rewrite the query such that a local executor has to do the execution. This one can then request remote result sets.
*
* Essentially this means, that if the query made use of local cache operators, then the remaining quad patterns would also have
* to be rewritten as to make a remote query.
*
*
* @param qef
* @param rawQuery
* @param conceptMap
* @param indexResultSetSizeThreshold
* @return
*/
public static QueryExecution prepareQueryExecution(
QueryExecutionFactory qef,
Map<Node, ? super ViewCacheIndexer> serviceMap,
//Node serviceNode,
Query rawQuery,
SparqlViewMatcherQfpc conceptMap,
//SparqlViewMatcherSystem viewMatcherSystem,
long indexResultSetSizeThreshold)
{
Node serviceNode = NodeFactory.createURI("cache://" + qef.getId() + "-" + (preparationId++));
logger.debug("Rewriting query: " + rawQuery);
Query query = rewriteQuery(serviceNode, rawQuery, conceptMap, indexResultSetSizeThreshold);
logger.debug("Rewritten query: " + query);
ViewCacheIndexer vci = new ViewCacheIndexerImpl(qef, conceptMap, indexResultSetSizeThreshold);
//serviceMap.put(serviceNode, new ViewCacheIndexerImpl(qef, conceptMap, indexResultSetSizeThreshold));
// Temporarily register query execution factories for the parts that need to be cached
//QueryExecutionViewCachePartial qefPartial = new QueryExecutionViewCachePartial(query, pqfp, qef, conceptMap, indexVars, indexResultSetSizeThreshold)
// TODO Get this right:
//boolean isPatternFree = true;
boolean performLocalExecution = true;
//boolean isCachingAllowed = true;
// RewriteResult rewriteResult = OpVisitorViewCacheApplier.apply(rawQuery, conceptMap);
//Query query = rewriteResult.getRewrittenQuery();
//boolean isPatternFree = rewriteResult.isPatternFree();
//boolean isCachingAllowed = rewriteResult.isCachingAllowed();
logger.debug("Preparing query: " + query.toString().substring(0, Math.min(2000, query.toString().length())));
//System.out.println("Running query: " + query);
//
// ProjectedQuadFilterPattern pqfp = SparqlCacheUtils.transform(query);
// QuadFilterPattern qfp = pqfp == null ? null : pqfp.getQuadFilterPattern();
// boolean isIndexable = qfp != null;
//
// List<Var> vars = query.getProjectVars();
// If the query is pattern free, we can execute it against an empty model instead of performing a remote request
QueryExecution result;
if(performLocalExecution) {
QueryExecutionFactory ss = new QueryExecutionFactoryModel();
result = ss.createQueryExecution(query);
}
else {
//QueryExecution qe = qef.createQueryExecution(query);
// if(isIndexable && !vars.isEmpty() && isCachingAllowed) {
// //Set<Var> indexVars = Collections.singleton(vars.iterator().next());
//
// //result = new QueryExecutionViewCachePartial(query, qef, conceptMap, indexVars, indexResultSetSizeThreshold);
// } else {
// //result = qef.createQueryExecution(query);
// }
result = qef.createQueryExecution(query);
}
result = new QueryExecutionExecWrapper(result,
() -> { Assert.isTrue(!serviceMap.containsKey(serviceNode)); serviceMap.put(serviceNode, vci); },
() -> { Assert.isTrue(serviceMap.containsKey(serviceNode)); serviceMap.remove(serviceNode); }
);
return result;
}
/**
* Create a service node with a union where the first member is to be interpreted as the
* pattern that should be used for caching, and the second argument is the pattern to be
* executed.
*
* @param patternOp
* @param serviceNode
* @param executionOp
* @return
*/
public static OpService wrapWithServiceOld(Op patternOp, Node serviceNode, Op executionOp) {
boolean silent = false;
OpUnion union = new OpUnion(patternOp, executionOp);
Query subQuery = OpAsQuery.asQuery(union);
Element subElement = new ElementSubQuery(subQuery);
ElementService elt = new ElementService(serviceNode, subElement, silent);
OpService result = new OpService(serviceNode, union, elt, silent);
return result;
}
/**
* Rewrites a query to make use of the cache
*
*
* @param serviceNode
* @param rawQuery
* @param sparqlViewCache
* @param indexResultSetSizeThreshold
* @return
*/
public static Query rewriteQuery(
//QueryExecutionFactory qef,
Node serviceNode,
Query rawQuery,
SparqlViewMatcherQfpc sparqlViewCache,
long indexResultSetSizeThreshold)
{
Op rawOp = Algebra.compile(rawQuery);
rawOp = Algebra.toQuadForm(rawOp);
// TODO We could create a mapping from (op) -> (op with replaced constants)
// rawOp = ReplaceConstants.replace(rawOp);
Generator<Var> generator = OpUtils.freshVars(rawOp);
// Determine which parts of the query are cacheable
// (i.e. those parts that correspond to projected quad filter patterns)
Map<Op, ProjectedQuadFilterPattern> tmpCacheableOps = OpVisitorViewCacheApplier.detectPrimitiveCachableOps(rawOp);
// If the op is a projection, associate the pqfp with the sub op in order to retain the projection
// TODO This is necessary, if we later expand the graph pattern; yet, I am not sure this is the best way to retain the projection
Map<Op, ProjectedQuadFilterPattern> cacheableOps = tmpCacheableOps.entrySet().stream()
.collect(Collectors.toMap(e -> {
Op op = e.getKey();
Op r = op instanceof OpProject ? ((OpProject)op).getSubOp() : op;
return r;
}, Entry::getValue));
Map<QuadFilterPattern, QuadFilterPatternCanonical> qfpToCanonical = cacheableOps.entrySet().stream()
.collect(Collectors.toMap(e -> e.getValue().getQuadFilterPattern(), e -> {
ProjectedQuadFilterPattern pqfp = e.getValue();
QuadFilterPattern qfp = pqfp.getQuadFilterPattern();
QuadFilterPatternCanonical r = canonicalize2(qfp, generator);
return r;
}));
// Determine for which of the cachable parts we have cache hits
Map<Op, CacheResult> opToCacheHit = cacheableOps.entrySet().stream()
.map(e -> {
ProjectedQuadFilterPattern pqfp = e.getValue();
QuadFilterPattern qfp = pqfp.getQuadFilterPattern();
//QuadFilterPattern SparqlCacheUtils.no
//qfp = summarize(qfp).getCanonicalPattern();
//qfp = canonicalize(qfp, generator);
Op op = e.getKey();
QuadFilterPatternCanonical qfpc = qfpToCanonical.get(qfp);
CacheResult cacheResult = null; // TODO Maybe fix this line: sparqlViewCache.lookup(qfpc);
Entry<Op, CacheResult> r = cacheResult == null ? null : new SimpleEntry<>(op, cacheResult);
return r;
})
.filter(e -> e != null)
.collect(Collectors.toMap(Entry::getKey, Entry::getValue));
//System.out.println("Cache hits:");
//opToCacheHit.entrySet().forEach(e -> System.out.println(e));
logger.debug("Cache hits:");
opToCacheHit.entrySet().forEach(e -> logger.debug("" + e));
// Determine the cacheable parts which do not yet have cache hits
Set<Op> nonCachedCacheableOps = Sets.difference(cacheableOps.keySet(), opToCacheHit.keySet());
// TODO There may be ops for which there exist partial covers via cache hits.
// These ops are again subject to caching.
// Execute the cacheable parts, and cache them, if possible.
// Note: We might find out that some result sets are too large to cache them.
// This is the map which contains the rewrites:
// . Ops that are in the cache are replaced by cache-access ops
// . Ops that are not in the cache but cacheable are mapped by caching ops
Map<Op, Op> opToCachingOp = new HashMap<>();
for(Entry<Op, CacheResult> entry : opToCacheHit.entrySet()) {
Op op = entry.getKey();
// TODO Inject projection
// ProjectedQuadFilterPattern pqfp = cacheableOps.get(op);
// List<Var> projectVars = new ArrayList<Var>(pqfp.getProjectVars());
//cacheableOps.get(key)
//= new OpProject(op, new ArrayList<Var>(pqfp.getProjectVars()));
//entry.getValue().getTables().
CacheResult cacheResult = entry.getValue();
Op executionOp = cacheResult.getReplacementPattern().toOp();
boolean isFullCover = executionOp instanceof OpNull;
Collection<Table> tables = cacheResult.getTables();
for(Table table : tables) {
OpTable opTable = OpTable.create(table);
// If the replacement pattern is empty, OpNull is returned which we need to eliminate
executionOp = executionOp instanceof OpNull ? opTable : OpJoin.create(opTable, executionOp);
}
// TODO IMPORTANT Try to optimize filter placement
// TODO Inject projection
// executionOp = new OpProject(executionOp, projectVars);
//executionOp = Optimize.apply(new TransformFilterPlacement(true), executionOp);
// TODO The new op may be cachable again
Op newOp = isFullCover
? executionOp
: wrapWithServiceOld(op, serviceNode, executionOp);
opToCachingOp.put(op, newOp);
}
// Notes: indexOp is the op that encodes the canonical projected quad filter pattern used for indexing
// executionOp is the op used to actually execute the pattern and may make use of caching parts
for(Op op : nonCachedCacheableOps) {
//op = Algebra.toQuadForm(op);
ProjectedQuadFilterPattern pqfp = cacheableOps.get(op);
QuadFilterPattern qfp = pqfp.getQuadFilterPattern();
QuadFilterPatternCanonical indexQfpc = qfpToCanonical.get(qfp);
// TODO Remove clause for default graph constraint
//ProjectedQuadFilterPattern executionPqfp = SparqlCacheUtils.optimizeFilters(pqfp);
//executionPqfp.to
if(pqfp == null) { // TODO Turn into an assertion
throw new RuntimeException("Should not happen");
}
//pqfp.getQuadFilterPattern();
List<Var> projectVars = new ArrayList<Var>(pqfp.getProjectVars());
Op indexOp = indexQfpc.toOp();
indexOp = new OpProject(indexOp, projectVars);
//Op executionOp = Optimize.apply(new TransformFilterPlacement(true), op);
Op executionOp = op;
// TODO: Maybe we should wrap the executionOp with the projection again
Op newOp = wrapWithServiceOld(indexOp, serviceNode, executionOp);
opToCachingOp.put(op, newOp);
}
// Perform the substitution
Op rootOp = OpUtils.substitute(rawOp, false, x -> opToCachingOp.get(x));
Query tmp = OpAsQuery.asQuery(rootOp);
rootOp = Algebra.compile(tmp);
//rootOp = Transformer.transform(new TransformRemoveGraph(x -> false), rootOp);
Query result = OpAsQuery.asQuery(rootOp);
//System.out.println("Rewritten query: " + query);
return result;
}
/**
* Rename all variables to ?g ?s ?p ?o based on the given quad and the cnf
* This is used for looking up triples having a certain expression over its components
*
* ([?g ?s ?s ?o], (fn(?s, ?o))
*
* @param quad
* @param expr
*/
public static Set<Set<Expr>> normalize(Quad quad, Set<Set<Expr>> nf) {
List<Var> componentVars = Vars.gspo;
Map<Var, Var> renameMap = new HashMap<Var, Var>();
Set<Set<Expr>> extra = new HashSet<Set<Expr>>();
for(int i = 0; i < 4; ++i) {
Node tmp = QuadUtils.getNode(quad, i);
if(i == 0 && (Quad.defaultGraphNodeGenerated.equals(tmp) || Quad.defaultGraphIRI.equals(tmp))) {
continue;
}
if(!tmp.isVariable()) {
throw new RuntimeException("Expected variable normalized quad, got: " + quad);
}
Var quadVar = (Var)tmp;
Var componentVar = componentVars.get(i);
Var priorComponentVar = renameMap.get(quadVar);
// We need to rename
if(priorComponentVar != null) {
extra.add(Collections.<Expr>singleton(new E_Equals(new ExprVar(priorComponentVar), new ExprVar(componentVar))));
} else {
renameMap.put(quadVar, componentVar);
}
}
NodeTransformRenameMap transform = new NodeTransformRenameMap(renameMap);
Set<Set<Expr>> result = ClauseUtils.applyNodeTransformSet(nf, transform);
result.addAll(extra);
//System.out.println(result);
return result;
}
public static Set<Set<Expr>> add(Quad quad, Set<Set<Expr>> cnf) {
Set<Set<Expr>> result = new HashSet<Set<Expr>>();
for(Set<Expr> clause : cnf) {
Set<Var> clauseVars = ClauseUtils.getVarsMentioned(clause);
Set<Var> exprVars = QuadUtils.getVarsMentioned(quad);
boolean isApplicable = exprVars.containsAll(clauseVars);
if(isApplicable) {
result.add(clause);
}
}
return result;
}
public static ProjectedOp cutProjectionAndNormalize(Op op, Rewrite opNormalizer) {
// Before normalization, cut away the projection on the original op first
ProjectedOp projectedOp = SparqlCacheUtils.cutProjection(op);
Op normalizedOp = opNormalizer.rewrite(projectedOp.getResidualOp());
ProjectedOp result = new ProjectedOp(projectedOp.getProjection(), normalizedOp);
return result;
}
/**
* Cut away the projection (TODO: and maybe extend) of an op (if any), and return
* the projection as a standalone object together with the remaining op.
*
* @param residualOp
* @return
*/
public static ProjectedOp cutProjection(Op op) {
Op residualOp = op;
Set<Var> projectVars = null;
int distinctLevel = 0;
if(residualOp instanceof OpDistinct) {
distinctLevel = 2;
residualOp = ((OpDistinct)residualOp).getSubOp();
}
if(residualOp instanceof OpProject) {
OpProject tmp = (OpProject)residualOp;
projectVars = new LinkedHashSet<>(tmp.getVars());
residualOp = tmp.getSubOp();
}
ProjectedOp result = projectVars == null
? new ProjectedOp(new VarInfo(OpUtils.visibleNamedVars(residualOp), 0), residualOp)
: new ProjectedOp(new VarInfo(projectVars, distinctLevel), residualOp);
return result;
}
public static ProjectedQuadFilterPattern transform(Query query) {
Op op = Algebra.compile(query);
op = Algebra.toQuadForm(op);
op = TransformReplaceConstants.transform(op);
ProjectedQuadFilterPattern result = transform(op);
return result;
}
public static QuadFilterPatternCanonical transform2(Query query) {
Op op = Algebra.compile(query);
op = Algebra.toQuadForm(op);
op = TransformReplaceConstants.transform(op);
ProjectedQuadFilterPattern pqfp = transform(op);
QuadFilterPatternCanonical result = pqfp == null ? null : canonicalize2(pqfp.getQuadFilterPattern(), VarGeneratorImpl2.create("v"));
return result;
}
public static ProjectedQuadFilterPattern transform(Element element) {
Op op = Algebra.compile(element);
op = Algebra.toQuadForm(op);
ProjectedQuadFilterPattern result = transform(op);
return result;
}
public static QuadFilterPatternCanonical extractQuadFilterPatternCanonical(Op op) {
QuadFilterPattern qfp = SparqlCacheUtils.extractQuadFilterPattern(op);
QuadFilterPatternCanonical result;
if(qfp != null) {
Generator<Var> generator = VarGeneratorImpl2.create();
result = SparqlCacheUtils.canonicalize2(qfp, generator);
} else {
result = null;
}
return result;
}
public static ConjunctiveQuery tryExtractConjunctiveQuery(Op op, Generator<Var> generator) {
OpDistinct opDistinct = null;
OpProject opProject = null;
if(op instanceof OpDistinct) {
opDistinct = (OpDistinct)op;
op = opDistinct.getSubOp();
}
if(op instanceof OpProject) {
opProject = (OpProject)op;
op = opProject.getSubOp();
}
QuadFilterPattern qfp = extractQuadFilterPattern(op);
ConjunctiveQuery result = null;
if(qfp != null) {
boolean isDistinct = opDistinct != null;
Set<Var> projectVars = opProject == null
? OpVars.visibleVars(op)
: new LinkedHashSet<>(opProject.getVars());
VarInfo varInfo = new VarInfo(projectVars, isDistinct ? 2 : 0);
QuadFilterPatternCanonical qfpc = canonicalize2(qfp, generator);
// TODO canonicalize the pattern
result = new ConjunctiveQuery(varInfo, qfpc);
}
return result;
}
public static QuadFilterPattern extractQuadFilterPattern(Op op) {
QuadFilterPattern result = null;
// //
// if(op instanceof OpQuadFilterPatternCanonical) {
// result = ((OpQuadFilterPatternCanonical)op).getQfpc().toQfp();
// }
OpFilter opFilter;
// TODO allow nested filters
if(op instanceof OpFilter) {
opFilter = (OpFilter)op;
} else {
opFilter = (OpFilter)OpFilter.filter(NodeValue.TRUE, op);
}
op = opFilter.getSubOp();
if(op instanceof OpGraph) {
OpGraph opGraph = (OpGraph)op;
Node graphNode = opGraph.getNode();
// boolean retainDefaultGraphNode = true;
// if(retainDefaultGraphNode && Quad.defaultGraphNodeGenerated.equals(graphNode)) {
// The graphNode must be a variable which is not constrained by the filter
Set<Var> filterVars = ExprVars.getVarsMentioned(opFilter.getExprs());
if(graphNode.isVariable() && !filterVars.contains(graphNode)) {
op = opGraph.getSubOp();
} else {
op = null;
}
}
if(op instanceof OpQuadPattern) {
OpQuadPattern opQuadPattern = (OpQuadPattern)opFilter.getSubOp();
QuadPattern quadPattern = opQuadPattern.getPattern();
List<Quad> quads = quadPattern.getList();
ExprList exprs = opFilter.getExprs();
Expr expr = ExprUtils.andifyBalanced(exprs);
result = new QuadFilterPattern(quads, expr);
}
return result;
}
/**
* Note assumes that this has been applied so far:
* op = Algebra.toQuadForm(op);
* op = ReplaceConstants.replace(op);
*
* @param op
* @return
*/
public static ProjectedQuadFilterPattern transform(Op op) {
ProjectedQuadFilterPattern result = null;
Set<Var> projectVars = null;
boolean isDistinct = false;
if(op instanceof OpDistinct) {
isDistinct = true;
op = ((OpDistinct)op).getSubOp();
}
if(op instanceof OpProject) {
OpProject tmp = (OpProject)op;
projectVars = new HashSet<>(tmp.getVars());
op = tmp.getSubOp();
}
QuadFilterPattern qfp = extractQuadFilterPattern(op);
if(qfp != null) {
if(projectVars == null) {
projectVars = new HashSet<>(OpVars.mentionedVars(op));
}
result = new ProjectedQuadFilterPattern(projectVars, qfp, isDistinct);
}
return result;
}
public static Map<Quad, Set<Set<Expr>>> quadToCnf(QuadFilterPattern qfp) {
Map<Quad, Set<Set<Expr>>> result = new HashMap<Quad, Set<Set<Expr>>>();
Expr expr = qfp.getExpr();
if(expr == null) {
expr = NodeValue.TRUE;
}
Set<Set<Expr>> filterCnf = CnfUtils.toSetCnf(expr);
Set<Quad> quads = new HashSet<Quad>(qfp.getQuads());
for(Quad quad : quads) {
Set<Var> quadVars = QuadUtils.getVarsMentioned(quad);
Set<Set<Expr>> cnf = new HashSet<Set<Expr>>();
for(Set<Expr> clause : filterCnf) {
Set<Var> clauseVars = ClauseUtils.getVarsMentioned(clause);
boolean containsAll = quadVars.containsAll(clauseVars);
if(containsAll) {
cnf.add(clause);
}
}
//Set<Set<Expr>> quadCnf = normalize(quad, cnf);
//quadCnfList.add(quadCnf);
result.put(quad, cnf);
}
return result;
}
// public static QuadFilterPattern canonicalize(QuadFilterPattern qfp, Generator<Var> generator) {
// QuadFilterPatternCanonical tmp = replaceConstants(qfp.getQuads(), generator);
// Set<Set<Expr>> cnf = CnfUtils.toSetCnf(qfp.getExpr());
// cnf.addAll(tmp.getFilterCnf());
// QuadFilterPatternCanonical canonical = new QuadFilterPatternCanonical(tmp.getQuads(), cnf);
//
//
// //QuadFilterPatternCanonical qfpc = summarize(qfp).getCanonicalPattern();
//
//// QuadFilterPatternCanonical tmp = canonicalize(qfpc, generator);
// QuadFilterPattern result = canonical.toQfp();
//
// return result;
// }
public static OpExtConjunctiveQuery tryCreateCqfp(Op op, Generator<Var> generator) {
ConjunctiveQuery cq = tryExtractConjunctiveQuery(op, generator);
OpExtConjunctiveQuery result = cq == null
? null
: new OpExtConjunctiveQuery(cq);
return result;
// QuadFilterPattern qfp = extractQuadFilterPattern(op);
// OpExtConjunctiveQuery result;
// if(qfp == null) {
// result = null;
// } else {
// QuadFilterPatternCanonical tmp = canonicalize2(qfp, generator);
// result = new OpExtConjunctiveQuery(tmp, generator);
// }
// return result;
}
// Assumes that ReplaceConstants has been called
public static QuadFilterPatternCanonical canonicalize2(QuadFilterPattern qfp, Generator<Var> generator) {
Set<Set<Expr>> dnf = DnfUtils.toSetDnf(qfp.getExpr());
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(new LinkedHashSet<>(qfp.getQuads()), ExprHolder.fromDnf(dnf));
return result;
}
public static QuadFilterPatternCanonical canonicalize2old(QuadFilterPattern qfp, Generator<Var> generator) {
QuadFilterPatternCanonical tmp = replaceConstants(qfp.getQuads(), generator);
tmp = removeDefaultGraphFilter(tmp);
Set<Set<Expr>> cnf = CnfUtils.toSetCnf(qfp.getExpr());
cnf.addAll(tmp.getFilterCnf());
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(tmp.getQuads(), ExprHolder.fromCnf(cnf));
return result;
}
public static QuadFilterPatternCanonical canonicalize(QuadFilterPatternCanonical qfpc, Generator<Var> generator) {
QuadFilterPatternCanonical tmp = replaceConstants(qfpc.getQuads(), generator);
Set<Set<Expr>> newCnf = new HashSet<>();
newCnf.addAll(qfpc.getFilterCnf());
newCnf.addAll(tmp.getFilterCnf());
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(tmp.getQuads(), ExprHolder.fromCnf(newCnf));
return result;
}
public static QuadFilterPatternCanonical replaceConstants(Iterable<Quad> quads, Generator<Var> generator) {
Set<Set<Expr>> cnf = new HashSet<>();
Map<Node, Var> constantToVar = new HashMap<>();
Set<Quad> newQuads = new LinkedHashSet<>();
for(Quad quad : quads) {
Node[] nodes = QuadUtils.quadToArray(quad);
for(int i = 0; i < 4; ++i) {
Node node = nodes[i];
if(!node.isVariable()) {
Var v = constantToVar.get(node);
if(v == null) {
v = generator.next();
constantToVar.put(node, v);
Expr expr = new E_Equals(new ExprVar(v), NodeValue.makeNode(node));
cnf.add(Collections.singleton(expr));
}
nodes[i] = v;
}
// If it is a variable, just retain it
}
Quad newQuad = QuadUtils.arrayToQuad(nodes);
newQuads.add(newQuad);
}
QuadFilterPatternCanonical result = new QuadFilterPatternCanonical(newQuads, ExprHolder.fromCnf(cnf));
return result;
}
public static PatternSummary summarize(QuadFilterPattern originalPattern) {
Expr expr = originalPattern.getExpr();
Set<Quad> quads = new LinkedHashSet<Quad>(originalPattern.getQuads());
Set<Set<Expr>> filterDnf = DnfUtils.toSetDnf(expr, true);
IBiSetMultimap<Quad, Set<Set<Expr>>> quadToDnf = createMapQuadsToFilters(quads, filterDnf);
IBiSetMultimap<Var, VarOccurrence> varOccurrences = createMapVarOccurrences(quadToDnf, false);
//System.out.println("varOccurrences: " + varOccurrences);
//Set<Set<Set<Expr>>> quadCnfs = new HashSet<Set<Set<Expr>>>(quadCnfList);
QuadFilterPatternCanonical canonicalPattern = new QuadFilterPatternCanonical(quads, ExprHolder.fromDnf(filterDnf));
//canonicalPattern = canonicalize(canonicalPattern, generator);
PatternSummary result = new PatternSummary(originalPattern, canonicalPattern, quadToDnf, varOccurrences);
//for(Entry<Var, Collection<VarOccurrence>> entry : varOccurrences.asMap().entrySet()) {
//System.out.println("Summary: " + entry.getKey() + ": " + entry.getValue().size());
//System.out.println(entry);
//}
return result;
}
private static IBiSetMultimap<Var, VarOccurrence> createMapVarOccurrences(
IBiSetMultimap<Quad, Set<Set<Expr>>> quadToCnf, boolean pruneVarOccs) {
Set<Quad> quads = quadToCnf.keySet();
// Iterate the quads again, and for each variable map it to where it to the component where it occurs in
IBiSetMultimap<Var, VarOccurrence> varOccurrences = new BiHashMultimap<Var, VarOccurrence>();
//for(int i = 0; i < quads.size(); ++i) {
//Quad quad = quads.get(i);
for(Quad quad : quads) {
Set<Set<Expr>> quadCnf = quadToCnf.get(quad).iterator().next(); //quadCnfList.get(i);
for(int j = 0; j < 4; ++j) {
Var var = (Var)QuadUtils.getNode(quad, j);
VarOccurrence varOccurence = new VarOccurrence(quadCnf, j);
varOccurrences.put(var, varOccurence);
}
}
// Remove all variables that only occur in the same quad
//boolean pruneVarOccs = false;
if(pruneVarOccs) {
Iterator<Entry<Var, Collection<VarOccurrence>>> it = varOccurrences.asMap().entrySet().iterator();
while(it.hasNext()) {
Entry<Var, Collection<VarOccurrence>> entry = it.next();
Set<Set<Set<Expr>>> varQuadCnfs = new HashSet<Set<Set<Expr>>>();
for(VarOccurrence varOccurrence : entry.getValue()) {
varQuadCnfs.add(varOccurrence.getQuadCnf());
// Bail out early
if(varQuadCnfs.size() > 1) {
break;
}
}
if(varQuadCnfs.size() == 1) {
it.remove();
}
}
}
return varOccurrences;
}
/**
* Note: the result map contains all quads - quads without constraints map to an empty set
*
*
*
* @param quads
* @param filterCnf
* @return
*/
public static IBiSetMultimap<Quad, Set<Set<Expr>>> createMapQuadsToFilters(QuadFilterPatternCanonical qfpc) {
Set<Quad> quads = qfpc.getQuads();
Set<Set<Expr>> filterDnf = qfpc.getFilterDnf();
// if(filterCnf == null) {
// filterCnf = Collections.singleton(Collections.emptySet());
// }
IBiSetMultimap<Quad, Set<Set<Expr>>> result = createMapQuadsToFilters(quads, filterDnf);
return result;
}
// public static IBiSetMultimap<Quad, Set<Set<Expr>>> createMapQuadsToFilters(
// Set<Quad> quads, Set<Set<Expr>> filterCnf) {
// // This is part of the result
// //List<Set<Set<Expr>>> quadCnfList = new ArrayList<Set<Set<Expr>>>(quads.size());
// IBiSetMultimap<Quad, Set<Set<Expr>>> quadToCnf = new BiHashMultimap<Quad, Set<Set<Expr>>>();
//
//
//
// for(Quad quad : quads) {
// Set<Var> quadVars = QuadUtils.getVarsMentioned(quad);
//
// Set<Set<Expr>> cnf = new HashSet<Set<Expr>>(); //new HashSet<Clause>();
//
// for(Set<Expr> clause : filterCnf) {
// Set<Var> clauseVars = ClauseUtils.getVarsMentioned(clause);
//
// boolean containsAll = quadVars.containsAll(clauseVars);
// if(containsAll) {
// cnf.add(clause);
// }
// }
//
//
// Set<Set<Expr>> quadCnf = normalize(quad, cnf);
// //quadCnfList.add(quadCnf);
// quadToCnf.put(quad, quadCnf);
// }
// return quadToCnf;
// }
public static IBiSetMultimap<Quad, Set<Set<Expr>>> createMapQuadsToFilters(
Set<Quad> quads, Set<Set<Expr>> filterDnf) {
// This is part of the result
//List<Set<Set<Expr>>> quadCnfList = new ArrayList<Set<Set<Expr>>>(quads.size());
IBiSetMultimap<Quad, Set<Set<Expr>>> quadToDnf = new BiHashMultimap<Quad, Set<Set<Expr>>>();
for(Quad quad : quads) {
Set<Var> quadVars = QuadUtils.getVarsMentioned(quad);
Set<Set<Expr>> dnf = new HashSet<>(); //new HashSet<Clause>();
for(Set<Expr> clause : filterDnf) {
Set<Expr> cnf = new HashSet<>();
for(Expr expr : clause) {
Set<Var> exprVars = ExprVars.getVarsMentioned(expr);
boolean containsAll = quadVars.containsAll(exprVars);
if(containsAll) {
cnf.add(expr);
}
}
dnf.add(cnf);
}
dnf = SparqlCacheUtils.normalize(quad, dnf);
//Set<Set<Expr>> quadCnf = normalize(quad, cnf);
//quadCnfList.add(quadCnf);
quadToDnf.put(quad, dnf);
}
return quadToDnf;
}
public static Expr createExpr(ResultSet rs, Map<Var, Var> varMap) {
//ResultSet copy = ResultSetFactory.copyResults(rs);
Expr result;
if(rs.getResultVars().size() == 1) {
String varName = rs.getResultVars().iterator().next();
Var var = Var.alloc(varName);
Set<Node> nodes = getResultSetCol(rs, var);
ExprList exprs = nodesToExprs(nodes);
Var inVar = varMap.get(var);
ExprVar ev = new ExprVar(inVar);
result = new E_OneOf(ev, exprs);
} else {
throw new RuntimeException("Not supported yet");
}
return result;
}
public static Set<Node> getResultSetCol(ResultSet rs, Var v) {
Set<Node> result = new HashSet<Node>();
while(rs.hasNext()) {
Binding binding = rs.nextBinding();
Node node = binding.get(v);
if(node != null) {
result.add(node);
}
}
return result;
}
public static ExprList nodesToExprs(Iterable<Node> nodes) {
ExprList result = new ExprList();
for(Node node : nodes) {
Expr expr = NodeValue.makeNode(node);
result.add(expr);
}
return result;
}
/**
* TODO this has complexity O(n^2)
* We can surely do better than that because joins are sparse and we
* don't have to consider quads that do not join...
*
*
* @param sub
* @return
*/
public static SetMultimap<Quad, Quad> quadJoinSummary(List<Quad> sub) {
Node[] tmp = new Node[4];
SetMultimap<Quad, Quad> result = HashMultimap.create();
for(int i = 0; i < sub.size(); ++i) {
Quad a = sub.get(i);
for(int j = i + 1; j < sub.size(); ++j) {
Quad b = sub.get(j);
for(int k = 0; k < 4; ++k) {
Node na = QuadUtils.getNode(a, k);
Node nb = QuadUtils.getNode(b, k);
boolean isEqual = na.equals(nb);
Node c = isEqual ? NodeValue.TRUE.asNode() : NodeValue.FALSE.asNode();
tmp[k] = c;
}
Quad summary = QuadUtils.create(tmp);
result.put(summary, a);
result.put(summary, b);
}
}
return result;
}
public static void backtrackMeh(PatternSummary query, PatternSummary cand, Map<Var, Set<Var>> candToQuery, List<Var> varOrder, int index) {
Var var = varOrder.get(index);
Set<Var> queryVars = candToQuery.get(var);
// Try a mapping, and backtrack if we hit a dead end
for(Var queryVar : queryVars) {
//
}
}
// Return the variables that we cannot optimize away, as they
// are referenced in the following portions
// - projection
// - order
// - group by
public static Set<Var> getRefVars(Query query) {
//query.getProjectVars();
return null;
}
public static FeatureMap<Expr, Multimap<Expr, Expr>> indexDnf(Set<Set<Expr>> dnf) {
// if(dnf == null) {
// // A disjunction containing an empty conjunction (latter is generally treated as true - if i'm not mistaken)
// dnf = Collections.singleton(Collections.emptySet());
// //dnf = Collections.emptySet();
// }
FeatureMap<Expr, Multimap<Expr, Expr>> result = new FeatureMapImpl<>();
for(Set<Expr> clause : dnf) {
Multimap<Expr, Expr> exprSigToExpr = HashMultimap.create();
Set<Expr> clauseSig = new HashSet<>();
for(Expr expr : clause) {
Expr exprSig = org.aksw.jena_sparql_api.utils.ExprUtils.signaturize(expr);
exprSigToExpr.put(exprSig, expr);
clauseSig.add(exprSig);
}
//Set<Expr> clauseSig = ClauseUtils.signaturize(clause);
result.put(clauseSig, exprSigToExpr);
}
return result;
}
/**
* So we need to know which variables of a quad pattern are either projected or required for evaluation (e.g. filters, joins) of the overall query.
*
*
*
*
* For each quad filter pattern of the given algebra expression determine which variables are projected and
* whether distinct applies.
* So actually we want to push distinct down - but this also does not make sense, because distinct only applies to
* after a projection...
*
*
* The mean thing is, that variables are actually scoped:
* Select ?x {
* { Select Distinct ?s As ?x{ // Within this subtree, unique(?x) applies
* ?s a foaf:Person
* } }
* Union {
* ?x a foaf:Agent
* }
* }
*
*
*
*
* @param opIndex
*/
// public static VarUsage analyzeQuadFilterPatterns(OpIndex opIndex) {
// Tree<Op> tree = opIndex.getTree();
// List<Op> leafs = TreeUtils.getLeafs(tree);
// for(Op leaf : leafs) {
// VarUsage ps = OpUtils.analyzeVarUsage(tree, leaf);
// System.out.println(ps);
// }
// return null;
// }
}
//// Variables that are projected in the current iteration
////Set<Var> projectedVars = new HashSet<>(availableVars);
//
//// Variables that are referenced
//Set<Var> referencedVars = new HashSet<>();
//
//// Any variable that is aggregated on must not be non-unique (otherwise it would distort the result)
//// Note that an overall query neither projects nor references a nonUnique variable
//Set<Var> nonUnique = new HashSet<>();
//
//// Maps variables to which other vars they depend on
//// E.g. Select (?x + 1 As ?y) { ... } will create the entry ?y -> { ?x } - i.e. ?y depends on ?x
//// Transitive dependencies are resolved immediately
//Multimap<Var, Var> varDeps = HashMultimap.create();
//availableVars.forEach(v -> varDeps.put(v, v));
//
//Op placeholder = new OpBGP();
//Op parent;
//while((parent = tree.getParent(current)) != null) {
// // Class<?> opClass = current.getClass();
// // System.out.println("Processing: " + parent);
// // Compute referenced vars for joins (non-disjunctive multi argument expressions)
// // boolean isDisjunction = parent instanceof OpUnion || parent instanceof OpDisjunction;
// // boolean isJoin = parent instanceof OpJoin || parent instanceof OpSequence;
// if(parent instanceof OpJoin || parent instanceof OpLeftJoin || parent instanceof OpSequence) {
// List<Op> children = tree.getChildren(parent);
// List<Op> tmp = new ArrayList<>(children);
// Set<Var> visibleVars = new HashSet<>();
// for(int i = 0; i < tmp.size(); ++i) {
// Op child = tmp.get(i);
// if(child != current) {
// OpVars.visibleVars(child, visibleVars);
// }
// }
//
// if(parent instanceof OpLeftJoin) {
// OpLeftJoin olj = (OpLeftJoin)parent;
// ExprList exprs = olj.getExprs();
// if(exprs != null) {
// Set<Var> vms = ExprVars.getVarsMentioned(exprs);
// visibleVars.addAll(vms);
// }
// }
//
// Set<Var> originalVars = getAll(varDeps, visibleVars);
// //Set<Var> overlapVars = Sets.intersection(projectedVars, originalVars);
// referencedVars.addAll(originalVars);
//
// } else if(parent instanceof OpProject) {
// OpProject o = (OpProject)parent;
// Set<Var> vars = new HashSet<>(o.getVars());
// Set<Var> removals = new HashSet<>(Sets.difference(varDeps.keySet(), vars));
// varDeps.removeAll(removals);
// } else if(parent instanceof OpExtend) { // TODO same for OpAssign
// OpExtend o = (OpExtend)parent;
// VarExprList vel = o.getVarExprList();
//
// Multimap<Var, Var> updates = HashMultimap.create();
// vel.forEach((v, ex) -> {
// Set<Var> vars = ExprVars.getVarsMentioned(ex);
// vars.forEach(w -> {
// Collection<Var> deps = varDeps.get(w);
// updates.putAll(w, deps);
// });
//
// updates.asMap().forEach((k, w) -> {
// varDeps.replaceValues(k, w);
// });
// });
//
//// } else if(parent instanceof OpAssign) {
//// OpAssign o = (OpAssign)parent;
//// projectedVars.remove(o.getVarExprList().getVars());
// } else if(parent instanceof OpGroup) {
// // TODO: This is similar to a projection
//
// OpGroup o = (OpGroup)parent;
// // Original variables used in the aggregators are declared as non-unique and referenced
// List<ExprAggregator> exprAggs = o.getAggregators();
// exprAggs.forEach(ea -> {
// Var v = ea.getVar();
// ExprList el = ea.getAggregator().getExprList();
// Set<Var> vars = ExprVars.getVarsMentioned(el);
// Set<Var> origVars = getAll(varDeps, vars);
// //referencedVars.addAll(origVars);
// varDeps.putAll(v, origVars);
// nonUnique.addAll(origVars);
// });
//
// // Original variables in the group by expressions are declared as referenced
// VarExprList vel = o.getGroupVars();
// vel.forEach((v, ex) -> {
// Set<Var> vars = ExprVars.getVarsMentioned(ex);
// Set<Var> origVars = MultiMaps.transitiveGetAll(varDeps.asMap(), vars);
// referencedVars.addAll(origVars);
// varDeps.putAll(v, origVars);
// });
//
// } else {
//// referencedVars.addAll(availableVars);
////// isDistinct = false;
////
////
//// System.out.println("Unknown Op type: " + opClass);
//// projectedVars.clear();
// }
//
// current = parent;
//}
//
////referencedVars.addAll(varDeps.values());