package org.aksw.jena_sparql_api.views.index;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.aksw.commons.collections.trees.Tree;
import org.aksw.jena_sparql_api.algebra.transform.TransformDisjunctionToUnion;
import org.aksw.jena_sparql_api.algebra.transform.TransformEffectiveOp;
import org.aksw.jena_sparql_api.algebra.transform.TransformJoinToSequence;
import org.aksw.jena_sparql_api.algebra.transform.TransformPushFiltersIntoBGP;
import org.aksw.jena_sparql_api.algebra.transform.TransformReplaceConstants;
import org.aksw.jena_sparql_api.algebra.transform.TransformUnionToDisjunction;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMap;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMapImpl;
import org.aksw.jena_sparql_api.concept_cache.core.SparqlCacheUtils;
import org.aksw.jena_sparql_api.concept_cache.op.OpUtils;
import org.aksw.jena_sparql_api.unsorted.OpVisitorFeatureExtractor;
import org.aksw.jena_sparql_api.utils.Generator;
import org.aksw.jena_sparql_api.utils.VarGeneratorImpl2;
import org.aksw.jena_sparql_api.view_matcher.OpVarMap;
import org.aksw.jena_sparql_api.view_matcher.SparqlViewMatcherUtils;
import org.apache.jena.ext.com.google.common.collect.Iterables;
import org.apache.jena.graph.Node;
import org.apache.jena.query.Query;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.Transformer;
import org.apache.jena.sparql.algebra.op.OpBGP;
import org.apache.jena.sparql.algebra.op.OpService;
import org.apache.jena.sparql.algebra.optimize.Rewrite;
import org.apache.jena.sparql.core.Var;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Multimap;
public class SparqlViewMatcherOpImpl<P>
implements SparqlViewMatcherOp<P>
{
private static final Logger logger = LoggerFactory
.getLogger(SparqlViewMatcherOpImpl.class);
//protected Function<Op, Op> opNormalizer;
protected Rewrite opNormalizer;
protected Function<Op, Set<Set<String>>> itemFeatureExtractor;
protected Function<Op, OpIndex> itemIndexer;
protected FeatureMap<String, P> featuresToIndexes;
protected Map<P, OpIndex> idToQueryIndex;
//protected Map<K, ProjectedOp> keyToValue;
//int nextPatternId = 0;
//P nextPatternId;
Supplier<P> nextPatternIdSupplier;
public SparqlViewMatcherOpImpl(
Rewrite opNormalizer,
Function<Op, Set<Set<String>>> itemFeatureExtractor,
Function<Op, OpIndex> itemIndexer,
Supplier<P> nextPatternIdSupplier) {
super();
this.opNormalizer = opNormalizer;
this.itemFeatureExtractor = itemFeatureExtractor;
this.itemIndexer = itemIndexer;
this.featuresToIndexes = new FeatureMapImpl<>(); //featuresToIndexes;
this.nextPatternIdSupplier = nextPatternIdSupplier;
idToQueryIndex = new HashMap<>();
//this.qfpcIndex = new SparqlViewCacheImpl();
}
public P allocate(Op item) {
P result = nextPatternIdSupplier.get();
put(result, item);
return result;
}
@Override
public void put(P key, Op item) {
// Check whether the submitted op is an extended conjunctive query,
//i.e. is only comprised of distinct, projection, filter and quad pattern in that order, whereas presence is optional
// Op normalizedItem = opNormalizer.rewrite(item);
//
// Node id = NodeFactory.createURI("id://" + StringUtils.md5Hash("" + normalizedItem));
OpIndex index = itemIndexer.apply(item);
Set<Set<String>> featureSets = index.getFeatureSets(); //itemFeatureExtractor.apply(item);
//MyEntry<K> entry = new MyEntry<>(key, featureSets, index);
for(Set<String> featureSet : featureSets) {
featuresToIndexes.put(featureSet, key); // new SimpleEntry<>(item, data)
}
idToQueryIndex.put(key, index);
}
/**
* Lookup a single candidate
*/
// public LookupResult<K> lookupSingle(Op item) {
// Collection<LookupResult<K>> tmp = lookup(item);
// LookupResult<K> result = Iterables.getFirst(tmp, null);
// return result;
// }
/**
* Find matches among the extended conjunctive queries
*
*
* @param item
* @return
*/
//@Override
// public Collection<LookupResult> lookupSimpleMatches(Op item) {
//
// return null;
//
// }
// public Stream<LookupResult<K>> filterByProjection(Stream<LookupResult<K>> lrs) {
// SparqlViewMatcherProjectionUtils.validateProjection(viewVarInfo, userVarInfo, vm)
//
//
// //Collection<LookupResult<K>>
// }
/**
* The result is a list of candidate matches ordered by sepecificity.
* The each item comprises the attributes:
* - The entry-key
* - The Op-Map
* - The Var-Map
* - The projected op
* - (Optionally internal properties: OpIndex and FeatureSet)
*
* So we have two options how to structure the result:
* Nested:
* From the op-mapping, navigate to the var-mapping and from there to the entry-data
* Map<Op, Op> -> Map<Var, Var> -> (Key, ProjectedOp)
*
* Flat:
* (key, opmap, varmap, projected op, [op index, feature set])
*
* Indirect:
* The result is:
* OpVarMap and patternId
*
*
* The advantage of the flat structure is obviously the simplicity
* The disadvantage of the flat structure is, that it may be suboptimal for efficient processing
*
*
*
*
* @param pop
* @return
*/
// public Collection<KeyedOpVarMap<P>> lookup(ProjectedOp pop) {
//
// Op patternOp = pop.getResidualOp();
// Collection<KeyedOpVarMap<P>> candidates = lookup(patternOp);
//
// // For each pattern id, check the projection
// for(LookupResult<P> cand : candidates) {
// int patternId = cand.getEntry().id;
//
// Map<K, ProjectedOp> keyToPop = patternIdToKeyToPop.getOrDefault(patternId, Collections.emptyMap());
//
//
// // TODO What if there are multiple opVarMaps?
//
// // Note: For a given variable mapping, there can be multiple projections, such as{ (?s), (?s ?p), (?p) }
// // So we first determine all compatible ones, and then sort them by specificity.
// // We can then select one of the most specific ones.
//
// // The lookup result is then: OpVarMap, with for every var map the list of projections
// // ISSUE Using var maps as keys in a map feels wrong - but maybe in that case it is justified - as the set of candidate projections
// // really depend on the variable mapping
//
// OpVarMap opVarMap = cand.getOpVarMap();
//
// // For each var map get the set of compatible projections
// Iterable<Map<Var,Var>> filteredVarMaps = () -> StreamUtils.stream(opVarMap.getVarMaps())
// .filter(varMap -> SparqlViewMatcherProjectionUtils.validateProjection(e.getValue().getProjection(), pop.getProjection(), varMap))
// .iterator();
//
// opVarMap = new OpVarMap(opVarMap.getOpMap(), filteredVarMaps);
//
//
//
// // TODO Maybe make this lazy: Iterable<> foo = () -> keyToPop.stream()....interator()
//
// List<Map<Var, Var>> varMaps = keyToPop.entrySet().stream()
// .filter(e -> SparqlViewMatcherProjectionUtils.validateProjection(e.getValue().getProjection(), pop.getProjection(), varMap))
// .collect(Collectors.toList());
//
// OpVarMap r = new OpVarMap(opVarMap.getOpMap(), varMaps);
// return r;
// })
// // Remove all candidates for which no projection was compatible
// .filter(arg0)
//
//
// }
//
// LookupResult<K> lr;
// lr.getEntry();
//
//
//
// return null;
// }
@Override
public Map<P, OpVarMap> lookup(Op item) {
//Op normalizedItem = opNormalizer.rewrite(item);
Set<P> tmpCands = new HashSet<>();
//
// itemFeatureExtractor.apply(item).forEach(featureSet -> {
// //featuresToIndexes.getIfSubsetOf(featureSet).stream()
// featuresToIndexes.get(featureSet).stream()
// //.map(e -> e.getValue())
// .forEach(x -> tmpCands.add(x));
// });
// TODO if there is a projection on item, the lookup fails, because
// in the index we cut the projection away
// So
itemFeatureExtractor.apply(item).forEach(featureSet -> {
//featuresToIndexes.getIfSubsetOf(featureSet).stream()
featuresToIndexes.getIfSubsetOf(featureSet).stream()
//.map(e -> e.getValue())
.forEach(x -> tmpCands.add(x.getValue()));
});
// Order candidates by their node count - largest node counts first
List<P> cands = new ArrayList<>(tmpCands);
Collections.sort(cands, (a, b) -> ((int)(idToQueryIndex.get(a).getTree().nodeCount() - idToQueryIndex.get(b).getTree().nodeCount())));
if(logger.isDebugEnabled()) { logger.debug("Phase 1: " + cands.size() + "/" + featuresToIndexes.size() + " passed"); }
OpIndex queryIndex = itemIndexer.apply(item);
//List<KeyedOpVarMap<P>> result = new ArrayList<>();
Map<P, OpVarMap> result = new LinkedHashMap<>();
for(P cacheEntry : cands) {
//OpIndex cacheIndex = cacheEntry.queryIndex;
P id = cacheEntry;
OpIndex cacheIndex = idToQueryIndex.get(cacheEntry);
Multimap<Op, Op> candOpMapping = SparqlViewMatcherSystemImpl.getCandidateLeafMapping(cacheIndex, queryIndex);
Tree<Op> cacheTree = cacheIndex.getTree();
Tree<Op> queryTree = queryIndex.getTree();
// TODO: Require a complete match of the tree - i.e. cache and query trees must have same number of nodes / same depth / some other criteria that can be checked quickly
// In fact, we could use these features as an additional index
Stream<OpVarMap> opVarMaps = SparqlViewMatcherUtils.generateTreeVarMapping(candOpMapping, cacheTree, queryTree);
opVarMaps.forEach(opVarMap -> {
Op cacheRoot = cacheTree.getRoot();
Op queryRoot = opVarMap.getOpMap().get(cacheRoot);
if(logger.isDebugEnabled()) { logger.debug("query root: " + queryRoot); }
//K id = cacheEntry.id;
// We need to update the queryIndex (remove sub-trees that matched)
Tree<Op> r = applyMapping(id, cacheTree, queryTree, opVarMap);
if(logger.isDebugEnabled()) { logger.debug("Result: " + r); }
if(logger.isDebugEnabled()) { logger.debug("Varmap: " + Iterables.toString(opVarMap.getVarMaps())); }
//KeyedOpVarMap<P> lr = new KeyedOpVarMap<P>(cacheEntry, opVarMap);
//result.add(lr);
result.put(cacheEntry, opVarMap);
//return lr;
});
}
return result;
}
public static <V> Tree<Op> applyMapping(V id, Tree<Op> cacheTree, Tree<Op> queryTree, OpVarMap opVarMap) {
Map<Op, Op> nodeMapping = opVarMap.getOpMap();
Op sourceRoot = cacheTree.getRoot();
Op targetNode = nodeMapping.get(sourceRoot);
if(targetNode == null) {
throw new RuntimeException("Could not match root node of a source tree to a node in the target tree - Should not happen.");
}
//QuadPattern yay = new QuadPattern();
//Node serviceNode = NodeFactory.createURI("");
OpService placeholderOp = new OpService((Node)id, new OpBGP(), true);
Op repl = OpUtils.substitute(queryTree.getRoot(), false, op -> {
return op == targetNode ? placeholderOp : null;
});
Tree<Op> result = OpUtils.createTree(repl);
return result;
}
public static Op queryToNormalizedOp(Query query) {
Op result = Algebra.compile(query);
result = Algebra.toQuadForm(result);
result = normalizeOp(result);
return result;
}
public static Op denormalizeOp(Op op) {
// Replace QFPCs
op = Transformer.transform(new TransformEffectiveOp(), op);
//op = Transformer.transform(/new Transfo, op)
op = TransformPushFiltersIntoBGP.transform(op);
op = Transformer.transform(TransformDisjunctionToUnion.fn, op);
return op;
//op = Transformer.transform(TransformDisju, op);
//op = Transformer.transform(TransformJoinToSequence.fn, op);
}
public static Op normalizeOp(Op op) {
op = Transformer.transform(TransformUnionToDisjunction.fn, op);
op = Transformer.transform(TransformJoinToSequence.fn, op);
//op = Transformer.transform(new TransformReplaceConstants(), op);
op = TransformReplaceConstants.transform(op);
Generator<Var> generatorCache = VarGeneratorImpl2.create();
//op = OpUtils.substitute(op, false, (o) -> SparqlCacheUtils.tryCreateCqfp(o, generatorCache));
op = OpUtils.substitute(op, false, (o) -> SparqlCacheUtils.tryCreateCqfp(o, generatorCache));
return op;
}
public static Set<Set<String>> extractFeatures(Op oop) {
return Collections.singleton(OpVisitorFeatureExtractor.getFeatures(oop, (op) -> op.getClass().getSimpleName()));
}
public static SparqlViewMatcherOp<Integer> create() {
// Function<Op, Set<Set<String>>> itemFeatureExtractor = (oop) ->
// Collections.singleton(OpVisitorFeatureExtractor.getFeatures(oop, (op) -> op.getClass().getSimpleName()));
Iterator<Integer> nextPatternIdIt =
IntStream.generate(new AtomicInteger()::getAndIncrement).iterator();
Supplier<Integer> supplier = () -> nextPatternIdIt.next();
SparqlViewMatcherOp<Integer> result = new SparqlViewMatcherOpImpl<>(
SparqlViewMatcherOpImpl::normalizeOp,
SparqlViewMatcherOpImpl::extractFeatures,
new OpIndexerImpl(),
supplier);
return result;
}
@Override
public void removeKey(Object key) {
featuresToIndexes.removeValue(key);
idToQueryIndex.remove(key);
// MyEntry<K> e = idToQueryIndex.get(key);
// if(e != null) {
// Set<Set<String>> featureSets = e.featureSets;
// featuresToIndexes.removeAll(featureSets);
// idToQueryIndex.remove(key);
// }
}
@Override
public Op getOp(P key) {
OpIndex opIndex = idToQueryIndex.get(key);
Op result = opIndex == null ? null : opIndex.getOp();
return result;
}
}