package org.aksw.jena_sparql_api.concept_cache.core;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.aksw.commons.collections.trees.Tree;
import org.aksw.commons.collections.trees.TreeUtils;
import org.aksw.jena_sparql_api.concept_cache.op.OpUtils;
import org.aksw.jena_sparql_api.core.QueryExecutionBaseSelect;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.core.ResultSetCloseable;
import org.aksw.jena_sparql_api.util.collection.RangedSupplier;
import org.aksw.jena_sparql_api.util.collection.RangedSupplierLazyLoadingListCache;
import org.aksw.jena_sparql_api.utils.BindingUtils;
import org.aksw.jena_sparql_api.utils.QueryUtils;
import org.aksw.jena_sparql_api.utils.ResultSetUtils;
import org.aksw.jena_sparql_api.utils.VarUtils;
import org.aksw.jena_sparql_api.views.index.SparqlViewMatcherOpImpl;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.ARQ;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.ResultSet;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.OpAsQuery;
import org.apache.jena.sparql.algebra.op.OpNull;
import org.apache.jena.sparql.algebra.op.OpService;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.util.Context;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Stopwatch;
import com.google.common.cache.Cache;
import com.google.common.collect.Range;
public class QueryExecutionViewMatcherMaster
extends QueryExecutionBaseSelect
{
private static final Logger logger = LoggerFactory.getLogger(QueryExecutionViewMatcherMaster.class);
protected OpRewriteViewMatcherStateful opRewriter;
protected ExecutorService executorService;
// The jena context - used for setting up cache entries
// TODO Not sure if this was better part of the rewriter - or even a rewriteContext object
protected Context context;
// TODO Maybe add a decider which determines whether the result set of a query should be cached
protected long indexResultSetSizeThreshold;
// Statistic attributes
protected Double preparationTimeInSec;
// null: not set, 0: miss, 1: partial, 2: complete
protected Integer cacheHitLevel;
public QueryExecutionViewMatcherMaster(
Query query,
QueryExecutionFactory subFactory,
OpRewriteViewMatcherStateful opRewriter,
ExecutorService executorService
) {
super(query, subFactory);
this.opRewriter = opRewriter;
this.context = ARQ.getContext();
this.executorService = executorService;
}
public static ResultSetCloseable createResultSet(List<String> varNames, RangedSupplier<Long, Binding> rangedSupplier, Range<Long> range, Map<Var, Var> varMap) {
Stream<Binding> stream = rangedSupplier.apply(range);
if(varMap != null) {
stream = stream.map(b -> BindingUtils.rename(b, varMap));
}
ResultSet rs = ResultSetUtils.create(varNames, stream.iterator());
ResultSetCloseable result = new ResultSetCloseable(rs);
return result;
}
/**
* Substitute cache references either with the cached data or -
* if that is not completely available - substitute with the original expression.
*
* @param op
* @param tree
* @return
*/
public static boolean isView(Op op) {
boolean result = op instanceof OpService && isView(((OpService)op).getService());
return result;
}
public static boolean isView(Node node) {
boolean result = isView(node.getURI());
return result;
}
public static boolean isView(String uri) {
boolean result = uri.startsWith("view://");
return result;
}
@Override
protected ResultSetCloseable executeCoreSelect(Query rawQuery) {
Stopwatch sw = Stopwatch.createStarted();
boolean cacheWholeQuery = true; //!rootService.getURI().startsWith("view://");
List<Var> projectVars = rawQuery.getProjectVars();
//List<String> projectVarNames = VarUtils.getVarNames(projectVars);
// Store a present slice, but remove it from the query
// NOTE This could also be done on the OP level; but
// it seems to be much more convenient to be done on the query level
Range<Long> range = QueryUtils.toRange(query);
Query q = query.cloneQuery();
q.setLimit(Query.NOLIMIT);
q.setOffset(Query.NOLIMIT);
Op queryOp = Algebra.toQuadForm(Algebra.compile(q));
// queryOp = SparqlViewMatcherOpImpl.normalizeOp(queryOp);
// TODO opRewriter.lookup and opRewriter.put() both perform normalization
// We could this duplicate processing by normalizing here
// and passing the projected op to both functions
ProjectedOp pop = SparqlCacheUtils.cutProjectionAndNormalize(queryOp, SparqlViewMatcherOpImpl::normalizeOp);
//Op coreQueryOp = pop.getResidualOp();
// The thing here is, that in general we need to
// - Initialize the execution context / jena-wise global data
// - Perform the rewrite (may affect execution context state)
// - Clean up the execution context / jena-wise global data
RewriteResult2 rr = opRewriter.rewrite(pop);
cacheHitLevel = rr.getRewriteLevel();
Op rewrittenOp = rr.getOp();
Map<Node, StorageEntry> storageMap = rr.getIdToStorageEntry();
// All subtrees that are to be executed on the original data source must be wrapped with
// a standard sparql service clause
Tree<Op> tree = OpUtils.createTree(rewrittenOp);
// Find all referenced views in the expression
Set<Node> cacheRefs = TreeUtils.inOrderSearch(tree.getRoot(), tree::getChildren)
.filter(QueryExecutionViewMatcherMaster::isView)
.map(op -> ((OpService)op).getService())
.collect(Collectors.toSet());
Cache<Node, StorageEntry> cache = opRewriter.getCache();
for(Node cacheRef : cacheRefs) {
StorageEntry e = cache.getIfPresent(cacheRef);
//RangedSupplier<Long, Binding> s = e.storage;
storageMap.put(cacheRef, e);
}
// Reverse the levels, so that we start with the leafs
Predicate<Op> predicate = x -> !(x instanceof OpService);
Set<Op> taggedNodes = TreeUtils.propagateBottomUpLabel(tree, predicate);
logger.debug("Tagged: " + taggedNodes);
// If we tagged the root node, then everything can be executed on the original service
int idX = 0;
// Remap all tagged nodes to be executed on the original service
Map<Op, Op> taggedToService = new IdentityHashMap<>();
// Track whether we created a new service for the root node
Node newRootServiceNode = null;
for(Op tag : taggedNodes) {
boolean isRoot = tag == tree.getRoot();
// Do not cache pattern free queries
// TODO It would be better to decide caching based on the actual query execution time
// I.e. having an auto-caching layer would be nice
boolean isPatternFree = OpUtils.isPatternFree(tag);
if(isRoot && isPatternFree) {
cacheWholeQuery = false;
}
Node serviceNode;
if(isRoot && cacheWholeQuery) {
serviceNode = NodeFactory.createURI("view://ex.org/view" + pop.hashCode());
newRootServiceNode = serviceNode;
} else {
serviceNode = NodeFactory.createURI("view://service/" + idX++);
}
// We do not need to wrap parts of the query execution with a service
// if that part is pattern free (i.e. does not depend on external data)
// TODO Make sure that this works with EXISTS
if(!isPatternFree) {
Op serviceOp = new OpService(serviceNode, OpNull.create(), false);
//TransformDisjunctionToUnion
//tag = Transformer.transform(TransformDisjunctionToUnion.fn, tag);
Op execOp = SparqlViewMatcherOpImpl.denormalizeOp(tag);
Query qq = OpAsQuery.asQuery(execOp);
logger.info("Root query:\n" + qq);
RangedSupplier<Long, Binding> s3 = new RangedSupplierQuery(parentFactory, qq);
VarInfo varInfo = new VarInfo(new HashSet<>(qq.getProjectVars()), 0);
StorageEntry se = new StorageEntry(s3, varInfo); // The var info is not used
storageMap.put(serviceNode, se);
taggedToService.put(tag, serviceOp);
}
}
rewrittenOp = OpUtils.substitute(rewrittenOp, false, taggedToService::get);
logger.debug("Raw query being rewritten for execution:\n" + rawQuery);
logger.debug("Rewritten op being passed to execution:\n" + rewrittenOp);
Context ctx = context.copy();
ctx.put(OpExecutorViewCache.STORAGE_MAP, storageMap);
Set<Var> visibleVars = new HashSet<>(projectVars);//OpVars.visibleVars(rewrittenOp);
VarInfo varInfo = new VarInfo(visibleVars, 0);
RangedSupplier<Long, Binding> s2 = new RangedSupplierOp(rewrittenOp, ctx);
if(cacheWholeQuery && newRootServiceNode != null) {
// Caching the whole query requires the following actions:
// (1) Allocate a new id for the query
// (2) Create a storage entry for the rewritten entry
// (3) Make the new id of the query together with its original (i.e. non-rewritten) op known to the rewriter
//Node id = NodeFactory.createURI("view://ex.org/view" + queryOp.hashCode());
s2 = new RangedSupplierLazyLoadingListCache<Binding>(executorService, s2, Range.closedOpen(0l, 10000l));
//s2 = new RangedSupplierLazyLoadingListCache<Binding>(executorService, s2, range);
StorageEntry se2 = new StorageEntry(s2, varInfo);
// Update the storage entry with the cache wrapper
//storageMap.put(newRootServiceNode, se2);
// TODO The registration at the cache and the rewriter should be atomic
// At least we need to deal with the chance that the rewriter maps an op to an id for
// which the storageEntry has not yet been registered at the cache
opRewriter.put(newRootServiceNode, pop);
cache.put(newRootServiceNode, se2);
}
List<String> visibleVarNames = VarUtils.getVarNames(visibleVars);
ResultSetCloseable result = createResultSet(visibleVarNames, s2, range, null);
preparationTimeInSec = sw.stop().elapsed(TimeUnit.NANOSECONDS) / 1000000000.0;
System.out.println("Time to prepare the result set: " + (preparationTimeInSec * 1000) + " ms");
return result;
}
@Override
protected QueryExecution executeCoreSelectX(Query query) {
// TODO Fix bad design - this method is not needed
return null;
}
public Integer getCacheHitLevel() {
return cacheHitLevel;
}
}
//if(false) {
// Iterators.size(storage.apply(range));
// @SuppressWarnings("unchecked")
// RangedSupplierLazyLoadingListCache<Binding> test = storage.unwrap(RangedSupplierLazyLoadingListCache.class, true);
// System.out.println("Is range cached: " + test.isCached(range));
//
// ResultSet xxx = ResultSetUtils.create2(visibleVars, storage.apply(range));
// Table table = TableUtils.createTable(xxx);
// OpTable repl = OpTable.create(table);
// rewrittenOp = repl;
//}
//StorageEntry se = new StorageEntry(storage, varInfo);
//storageMap.put(serviceNode, se);
//
// Note: We use Jena to execute the op.
// The op itself may use SERVICE<> as the root node, which will cause jena to pass execution to the appropriate handler
// TODO Pass the op to an op executor
//QueryEngineMainQuad
// TODO Decide whether to cache the overall query
// Do NOT cache if:
// - there is already a cache entry that only differs in the var map
// - (if the new query is just linear post processing of an existing cache entry)
// This means, that the query will be available for cache lookups
//Node rootService = rewrittenOp instanceof OpService
// ? ((OpService)rewrittenOp).getService()
// : null;
//boolean cacheWholeQuery = true; //!rootService.getURI().startsWith("view://");
//context.put(OpExecutorViewCache.STORAGE_MAP, storageMap);
//if(cacheWholeQuery) {
//RangedSupplier<Long, Binding> s2;
//s2 = new RangedSupplierOp(rewrittenOp, ctx);
// For each parents of which all children are in the set, remove the children from the set
// and add the parent to the set instead
//Node serviceNode = NodeFactory.createURI("view://test.org");
//
//rewrittenOp = new OpService(serviceNode, OpNull.create(), false);
//
//RangedSupplier<Long, Binding> backend = new RangedSupplierQuery(parentFactory, rawQuery);
////RangedSupplierLazyLoadingListCache<Binding>
//RangedSupplier<Long, Binding> storage = new RangedSupplierLazyLoadingListCache<>(executorService, backend, Range.atMost(10000l), null);
//
//storage = RangedSupplierSubRange.create(storage, range);
// Adujst limit
//rewrittenOp = QueryUtils.applyRange(rewrittenOp, range);
//rewrittenOp = RewriteUtils.transformUntilNoChange(rewrittenOp, op -> Transformer.transform(TransformPushSlice.fn, op));
//DatasetGraph dg = DatasetGraphFactory.create();
//Context context = ARQ.getContext().copy();
//context.put(OpExecutorViewCache.STORAGE_MAP, storageMap);
//QueryEngineFactory qef = QueryEngineRegistry.get().find(rewrittenOp, dg, context);
//Plan plan = qef.create(rewrittenOp, dg, BindingRoot.create(), context);
//QueryIterator queryIter = plan.iterator();
//
//
////QueryIterator queryIter = x.eval(rewrittenOp, dg, BindingRoot.create(), context);
//ResultSet tmpRs = ResultSetFactory.create(queryIter, projectVarNames);
//
//// TODO Not sure if we should really return a result set, or a QueryIter instead
//ResultSetCloseable result = new ResultSetCloseable(tmpRs, () -> queryIter.close());
//ResultSetUtils.create(varNames, bindingIt)
//QueryEngineMain
//QC.execute(rewrittenOp, BindingRoot.create(), ARQ.getContext());
//org.apache.jena.query.QueryExecutionFactory.create(queryStr, syntax, model, initialBinding)
//
//
//
//LookupResult<Node> lr = viewMatcher.lookupSingle(opCache);
//RangedSupplier<Long, Binding> rangedSupplier;
//Map<Var, Var> varMap;
//if(lr == null) {
// Node id = viewMatcher.add(opCache);
// // Obtain the supplier from a factory (the factory may e.g. manage the sharing of a thread pool)
//
// rangedSupplier = new RangedSupplierQuery(parentFactory, query);
// rangedSupplier = new RangedSupplierLazyLoadingListCache<>(executorService, rangedSupplier, Range.atMost(10000l), null);
//
// //rangedSupplier = new RangedSupplierQuery(parentFactory, q);
// opToRangedSupplier.put(id, rangedSupplier);
// varMap = null;
//}
//else {
//
// varMap = Iterables.getFirst(lr.getOpVarMap().getVarMaps(), null);
//
// assert varMap != null : "VarMap was not expected to be null at this point";
//
// Node entryId = lr.getEntry().id;
// rangedSupplier = opToRangedSupplier.get(entryId);
//}
//
//ResultSetCloseable result = createResultSet(rangedSupplier, range, varMap);
//return result;
//
// public static StorageEntry createStorageEntry(Op op, VarInfo varInfo, Context context) {
// //Set<Var> visibleVars = OpVars.visibleVars(op);
// VarInfo varInfo = new VarInfo(visibleVars, Collections.emptySet());
//
// RangedSupplier<Long, Binding> storage = new RangedSupplierOp(op, context);
//
// StorageEntry result = new StorageEntry(storage, varInfo);
// return result;
//
//// @SuppressWarnings("unchecked")
//// RangedSupplierLazyLoadingListCache<Binding> test = storage.unwrap(RangedSupplierLazyLoadingListCache.class, true);
//// System.out.println("Is range cached: " + test.isCached(range));
//
//// ResultSet xxx = ResultSetUtils.create2(visibleVars, storage.apply(range));
//// Table table = TableUtils.createTable(xxx);
//// OpTable repl = OpTable.create(table);
//// rewrittenOp = repl;
//
//
// }
//