SparqlViewMatcherOpImpl.java example

Explorer
jena-sparql-api-master
package org.aksw.jena_sparql_api.views.index;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import org.aksw.commons.collections.trees.Tree;
import org.aksw.jena_sparql_api.algebra.transform.TransformDisjunctionToUnion;
import org.aksw.jena_sparql_api.algebra.transform.TransformEffectiveOp;
import org.aksw.jena_sparql_api.algebra.transform.TransformJoinToSequence;
import org.aksw.jena_sparql_api.algebra.transform.TransformPushFiltersIntoBGP;
import org.aksw.jena_sparql_api.algebra.transform.TransformReplaceConstants;
import org.aksw.jena_sparql_api.algebra.transform.TransformUnionToDisjunction;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMap;
import org.aksw.jena_sparql_api.concept_cache.collection.FeatureMapImpl;
import org.aksw.jena_sparql_api.concept_cache.core.SparqlCacheUtils;
import org.aksw.jena_sparql_api.concept_cache.op.OpUtils;
import org.aksw.jena_sparql_api.unsorted.OpVisitorFeatureExtractor;
import org.aksw.jena_sparql_api.utils.Generator;
import org.aksw.jena_sparql_api.utils.VarGeneratorImpl2;
import org.aksw.jena_sparql_api.view_matcher.OpVarMap;
import org.aksw.jena_sparql_api.view_matcher.SparqlViewMatcherUtils;
import org.apache.jena.ext.com.google.common.collect.Iterables;
import org.apache.jena.graph.Node;
import org.apache.jena.query.Query;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.Transformer;
import org.apache.jena.sparql.algebra.op.OpBGP;
import org.apache.jena.sparql.algebra.op.OpService;
import org.apache.jena.sparql.algebra.optimize.Rewrite;
import org.apache.jena.sparql.core.Var;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Multimap;


public class SparqlViewMatcherOpImpl<P>
    implements SparqlViewMatcherOp<P>
{

    private static final Logger logger = LoggerFactory
            .getLogger(SparqlViewMatcherOpImpl.class);

    //protected Function<Op, Op> opNormalizer;
    protected Rewrite opNormalizer;


    protected Function<Op, Set<Set<String>>> itemFeatureExtractor;
    protected Function<Op, OpIndex> itemIndexer;


    protected FeatureMap<String, P> featuresToIndexes;
    protected Map<P, OpIndex> idToQueryIndex;

    //protected Map<K, ProjectedOp> keyToValue;



    //int nextPatternId = 0;
    //P nextPatternId;
    Supplier<P> nextPatternIdSupplier;


    public SparqlViewMatcherOpImpl(
            Rewrite opNormalizer,
            Function<Op, Set<Set<String>>> itemFeatureExtractor,
            Function<Op, OpIndex> itemIndexer,
            Supplier<P> nextPatternIdSupplier) {
        super();
        this.opNormalizer = opNormalizer;
        this.itemFeatureExtractor = itemFeatureExtractor;
        this.itemIndexer = itemIndexer;
        this.featuresToIndexes = new FeatureMapImpl<>(); //featuresToIndexes;

        this.nextPatternIdSupplier = nextPatternIdSupplier;

        idToQueryIndex = new HashMap<>();


        //this.qfpcIndex = new SparqlViewCacheImpl();
    }

    public P allocate(Op item) {
    	P result = nextPatternIdSupplier.get();
    	put(result, item);
    	return result;
    }


    @Override
    public void put(P key, Op item) {

        // Check whether the submitted op is an extended conjunctive query,
        //i.e. is only comprised of distinct, projection, filter and quad pattern in that order, whereas presence is optional


//    	Op normalizedItem = opNormalizer.rewrite(item);
//
//    	Node id = NodeFactory.createURI("id://" + StringUtils.md5Hash("" + normalizedItem));
        OpIndex index = itemIndexer.apply(item);

        Set<Set<String>> featureSets = index.getFeatureSets(); //itemFeatureExtractor.apply(item);
        //MyEntry<K> entry = new MyEntry<>(key, featureSets, index);

        for(Set<String> featureSet : featureSets) {
            featuresToIndexes.put(featureSet, key); // new SimpleEntry<>(item, data)
        }

        idToQueryIndex.put(key, index);

    }


    /**
     * Lookup a single candidate
     */
//    public LookupResult<K> lookupSingle(Op item) {
//        Collection<LookupResult<K>> tmp = lookup(item);
//        LookupResult<K> result = Iterables.getFirst(tmp, null);
//        return result;
//    }


    /**
     * Find matches among the extended conjunctive queries
     *
     *
     * @param item
     * @return
     */
    //@Override
//    public Collection<LookupResult> lookupSimpleMatches(Op item) {
//
//        return null;
//
//    }


//    public Stream<LookupResult<K>> filterByProjection(Stream<LookupResult<K>> lrs) {
//    	SparqlViewMatcherProjectionUtils.validateProjection(viewVarInfo, userVarInfo, vm)
//
//
//    	//Collection<LookupResult<K>>
//    }




    /**
     * The result is a list of candidate matches ordered by sepecificity.
     * The each item comprises the attributes:
     * - The entry-key
     * - The Op-Map
     * - The Var-Map
     * - The projected op
     * - (Optionally internal properties: OpIndex and FeatureSet)
     *
     * So we have two options how to structure the result:
     * Nested:
     *   From the op-mapping, navigate to the var-mapping and from there to the entry-data
     *   Map<Op, Op> -> Map<Var, Var> -> (Key, ProjectedOp)
     *
     * Flat:
     *   (key, opmap, varmap, projected op, [op index, feature set])
     *
     * Indirect:
     *   The result is:
     *   OpVarMap and patternId
     *
     *
     * The advantage of the flat structure is obviously the simplicity
     * The disadvantage of the flat structure is, that it may be suboptimal for efficient processing
     *
     *
     *
     *
     * @param pop
     * @return
     */
//     public Collection<KeyedOpVarMap<P>> lookup(ProjectedOp pop) {
//
//    	Op patternOp = pop.getResidualOp();
//    	Collection<KeyedOpVarMap<P>> candidates = lookup(patternOp);
//
//    	// For each pattern id, check the projection
//    	for(LookupResult<P> cand : candidates) {
//    		int patternId = cand.getEntry().id;
//
//			Map<K, ProjectedOp> keyToPop = patternIdToKeyToPop.getOrDefault(patternId, Collections.emptyMap());
//
//
//    		// TODO What if there are multiple opVarMaps?
//
//			// Note: For a given variable mapping, there can be multiple projections, such as{ (?s), (?s ?p), (?p) }
//			// So we first determine all compatible ones, and then sort them by specificity.
//			// We can then select one of the most specific ones.
//
//			// The lookup result is then: OpVarMap, with for every var map the list of projections
//			// ISSUE Using var maps as keys in a map feels wrong - but maybe in that case it is justified - as the set of candidate projections
//			// really depend on the variable mapping
//
//    		OpVarMap opVarMap = cand.getOpVarMap();
//
//    		// For each var map get the set of compatible projections
//    		Iterable<Map<Var,Var>> filteredVarMaps = () -> StreamUtils.stream(opVarMap.getVarMaps())
//    			.filter(varMap -> SparqlViewMatcherProjectionUtils.validateProjection(e.getValue().getProjection(), pop.getProjection(), varMap))
//    			.iterator();
//
//    		opVarMap = new OpVarMap(opVarMap.getOpMap(), filteredVarMaps);
//
//
//
//	    			// TODO Maybe make this lazy: Iterable<> foo = () -> keyToPop.stream()....interator()
//
//		    		List<Map<Var, Var>> varMaps = keyToPop.entrySet().stream()
//		    			.filter(e -> SparqlViewMatcherProjectionUtils.validateProjection(e.getValue().getProjection(), pop.getProjection(), varMap))
//		    			.collect(Collectors.toList());
//
//		    		OpVarMap r = new OpVarMap(opVarMap.getOpMap(), varMaps);
//		    		return r;
//    			})
//    			// Remove all candidates for which no projection was compatible
//    			.filter(arg0)
//
//
//    	}
//
//    	LookupResult<K> lr;
//    	lr.getEntry();
//
//
//
//    	return null;
//    }


    @Override
    public Map<P, OpVarMap> lookup(Op item) {
        //Op normalizedItem = opNormalizer.rewrite(item);
        Set<P> tmpCands = new HashSet<>();
//
//        itemFeatureExtractor.apply(item).forEach(featureSet -> {
//            //featuresToIndexes.getIfSubsetOf(featureSet).stream()
//            featuresToIndexes.get(featureSet).stream()
//                //.map(e -> e.getValue())
//                .forEach(x -> tmpCands.add(x));
//        });

        // TODO if there is a projection on item, the lookup fails, because
        // in the index we cut the projection away
        // So

        itemFeatureExtractor.apply(item).forEach(featureSet -> {
            //featuresToIndexes.getIfSubsetOf(featureSet).stream()
            featuresToIndexes.getIfSubsetOf(featureSet).stream()
                //.map(e -> e.getValue())
                .forEach(x -> tmpCands.add(x.getValue()));
        });


        // Order candidates by their node count - largest node counts first
        List<P> cands = new ArrayList<>(tmpCands);
        Collections.sort(cands, (a, b) -> ((int)(idToQueryIndex.get(a).getTree().nodeCount() - idToQueryIndex.get(b).getTree().nodeCount())));

        if(logger.isDebugEnabled()) { logger.debug("Phase 1: " + cands.size() + "/" + featuresToIndexes.size() + " passed"); }
        OpIndex queryIndex = itemIndexer.apply(item);


        //List<KeyedOpVarMap<P>> result = new ArrayList<>();

        Map<P, OpVarMap> result = new LinkedHashMap<>();

        for(P cacheEntry : cands) {
            //OpIndex cacheIndex = cacheEntry.queryIndex;
        	P id = cacheEntry;
        	OpIndex cacheIndex = idToQueryIndex.get(cacheEntry);

            Multimap<Op, Op> candOpMapping = SparqlViewMatcherSystemImpl.getCandidateLeafMapping(cacheIndex, queryIndex);
            Tree<Op> cacheTree = cacheIndex.getTree();
            Tree<Op> queryTree = queryIndex.getTree();

            // TODO: Require a complete match of the tree - i.e. cache and query trees must have same number of nodes / same depth / some other criteria that can be checked quickly
            // In fact, we could use these features as an additional index
            Stream<OpVarMap> opVarMaps = SparqlViewMatcherUtils.generateTreeVarMapping(candOpMapping, cacheTree, queryTree);

            opVarMaps.forEach(opVarMap -> {
                Op cacheRoot = cacheTree.getRoot();
                Op queryRoot = opVarMap.getOpMap().get(cacheRoot);
                if(logger.isDebugEnabled()) { logger.debug("query root: " + queryRoot); }


                //K id = cacheEntry.id;
                // We need to update the queryIndex (remove sub-trees that matched)
                Tree<Op> r = applyMapping(id, cacheTree, queryTree, opVarMap);

                if(logger.isDebugEnabled()) { logger.debug("Result: " + r); }
                if(logger.isDebugEnabled()) { logger.debug("Varmap: " + Iterables.toString(opVarMap.getVarMaps())); }

                //KeyedOpVarMap<P> lr = new KeyedOpVarMap<P>(cacheEntry, opVarMap);
                //result.add(lr);
                result.put(cacheEntry, opVarMap);
                //return lr;
            });

        }

        return result;
    }

    public static <V> Tree<Op> applyMapping(V id, Tree<Op> cacheTree, Tree<Op> queryTree, OpVarMap opVarMap) {
        Map<Op, Op> nodeMapping = opVarMap.getOpMap();

        Op sourceRoot = cacheTree.getRoot();
        Op targetNode = nodeMapping.get(sourceRoot);

        if(targetNode == null) {
            throw new RuntimeException("Could not match root node of a source tree to a node in the target tree - Should not happen.");
        }

        //QuadPattern yay = new QuadPattern();
        //Node serviceNode = NodeFactory.createURI("");
        OpService placeholderOp = new OpService((Node)id, new OpBGP(), true);
        Op repl = OpUtils.substitute(queryTree.getRoot(), false, op -> {
           return op == targetNode ? placeholderOp : null;
        });

        Tree<Op> result = OpUtils.createTree(repl);
        return result;
    }



    public static Op queryToNormalizedOp(Query query) {
        Op result = Algebra.compile(query);
        result = Algebra.toQuadForm(result);
        result = normalizeOp(result);
        return result;
    }


    public static Op denormalizeOp(Op op) {
        // Replace QFPCs
        op = Transformer.transform(new TransformEffectiveOp(), op);

        //op = Transformer.transform(/new Transfo, op)
        op = TransformPushFiltersIntoBGP.transform(op);

        op = Transformer.transform(TransformDisjunctionToUnion.fn, op);

        return op;

        //op = Transformer.transform(TransformDisju, op);
        //op = Transformer.transform(TransformJoinToSequence.fn, op);
    }

    public static Op normalizeOp(Op op) {
        op = Transformer.transform(TransformUnionToDisjunction.fn, op);
        op = Transformer.transform(TransformJoinToSequence.fn, op);
        //op = Transformer.transform(new TransformReplaceConstants(), op);
        op = TransformReplaceConstants.transform(op);

        Generator<Var> generatorCache = VarGeneratorImpl2.create();
        //op = OpUtils.substitute(op, false, (o) -> SparqlCacheUtils.tryCreateCqfp(o, generatorCache));
        op = OpUtils.substitute(op, false, (o) -> SparqlCacheUtils.tryCreateCqfp(o, generatorCache));
        return op;
    }


    public static Set<Set<String>> extractFeatures(Op oop) {
    	return Collections.singleton(OpVisitorFeatureExtractor.getFeatures(oop, (op) -> op.getClass().getSimpleName()));
    }

    public static SparqlViewMatcherOp<Integer> create() {
//        Function<Op, Set<Set<String>>> itemFeatureExtractor = (oop) ->
//            Collections.singleton(OpVisitorFeatureExtractor.getFeatures(oop, (op) -> op.getClass().getSimpleName()));

    	Iterator<Integer> nextPatternIdIt =
    			IntStream.generate(new AtomicInteger()::getAndIncrement).iterator();

    	Supplier<Integer> supplier = () -> nextPatternIdIt.next();

        SparqlViewMatcherOp<Integer> result = new SparqlViewMatcherOpImpl<>(
                SparqlViewMatcherOpImpl::normalizeOp,
                SparqlViewMatcherOpImpl::extractFeatures,
                new OpIndexerImpl(),
                supplier);

        return result;
    }

    @Override
    public void removeKey(Object key) {
    	featuresToIndexes.removeValue(key);
    	idToQueryIndex.remove(key);

//        MyEntry<K> e = idToQueryIndex.get(key);
//        if(e != null) {
//            Set<Set<String>> featureSets = e.featureSets;
//            featuresToIndexes.removeAll(featureSets);
//            idToQueryIndex.remove(key);
//        }
    }

	@Override
	public Op getOp(P key) {
		OpIndex opIndex = idToQueryIndex.get(key);
		Op result = opIndex == null ? null : opIndex.getOp();
		return result;
	}
}