/******************************************************************************* * Copyright (c) 2007 Cambridge Semantics Incorporated. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Cambridge Semantics Incorporated *******************************************************************************/ package org.openanzo.glitter.query; import java.util.HashMap; import java.util.Iterator; import java.util.ListIterator; import java.util.Set; import java.util.Map.Entry; import org.openanzo.analysis.RequestAnalysis; import org.openanzo.exceptions.ExceptionConstants; import org.openanzo.exceptions.LogUtils; import org.openanzo.glitter.exception.GlitterException; import org.openanzo.glitter.exception.GlitterRuntimeException; import org.openanzo.glitter.syntax.abstrakt.BGP; import org.openanzo.glitter.syntax.abstrakt.Graph; import org.openanzo.glitter.syntax.abstrakt.GraphPattern; import org.openanzo.glitter.syntax.abstrakt.Group; import org.openanzo.glitter.syntax.abstrakt.Optional; import org.openanzo.glitter.syntax.abstrakt.Subquery; import org.openanzo.glitter.syntax.abstrakt.TreeNode; import org.openanzo.glitter.syntax.abstrakt.TriplePatternNode; import org.openanzo.glitter.syntax.abstrakt.Union; import org.openanzo.rdf.TriplePatternComponent; import org.openanzo.rdf.URI; import org.openanzo.rdf.Value; import org.openanzo.rdf.Variable; /** * Solve the node in memory, calling back to the provided NodeSolver to handle sub nodes. * * @author Joe Betz <jpbetz@cambridgesemantics.com> * */ public class SerialInMemoryNodeSolver implements NodeSolver { private final QueryController controller; private QueryExecutionPlan plan; //private boolean composedSolutions; private boolean canBindGraphVariables; private NodeSolver subNodeSolver; /** * In memory node solver constructor * * @param subNodeSolver * node solver for which to solve nodes * @param controller * the query controller for which this is solving * @param plan * the queyr execution plan to execute * @param canBindGraphVariables * true if the graph variable can be bound */ public SerialInMemoryNodeSolver(NodeSolver subNodeSolver, QueryController controller, QueryExecutionPlan plan, boolean canBindGraphVariables) { this.subNodeSolver = subNodeSolver; this.controller = controller; this.plan = plan; this.canBindGraphVariables = canBindGraphVariables; // this.composedSolutions = false; } // @SuppressWarnings("null") public SolutionSet solveNode(TreeNode n, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { SolutionSet newAnswers = null; long start = 0; boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_beginSolvingNodeInMemory] {}", n.getClass().getSimpleName()); start = System.currentTimeMillis(); } if (n instanceof TriplePatternNode) { // we can't generate bindings ourselves, so we return an empty // solution set -- i.e., no bindings newAnswers = SolutionUtils.noSolutions(); if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_returningNoSolutionsForTriplePatternNode]"); } } else if (n instanceof BGP) { BGP bgp = (BGP) n; newAnswers = solveBGP(bgp, answerConstraints, namedGraphContext, namedGraphVariable); } else if (n instanceof Group) { Group group = (Group) n; newAnswers = solveGroup(group, answerConstraints, namedGraphContext, namedGraphVariable); } else if (n instanceof Optional) { Optional optional = (Optional) n; newAnswers = solveOptional(optional, answerConstraints, namedGraphContext, namedGraphVariable); } else if (n instanceof Union) { Union union = (Union) n; newAnswers = solveUnion(union, answerConstraints, namedGraphContext, namedGraphVariable); } else if (n instanceof Graph) { Graph graphNode = (Graph) n; newAnswers = solveGraph(graphNode, answerConstraints, namedGraphContext, namedGraphVariable); } else if (n instanceof Subquery) { Subquery subquery = (Subquery) n; newAnswers = solveSubquery(subquery, answerConstraints, namedGraphContext, namedGraphVariable); } else { // not reachable throw new GlitterRuntimeException(ExceptionConstants.GLITTER.UNREACHABLE_CODE); } //this.composedSolutions = true; if (isEnabled) { StringBuilder sb = new StringBuilder(); n.prettyPrint(sb, true); RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_solvedNodeInMemory] [{}] {}:{}", new Object[] { sb.toString(), newAnswers.size(), System.currentTimeMillis() - start }); RequestAnalysis.getAnalysisLogger().debug(LogUtils.TIMING_MARKER, "glitter_SerialInMemoryNodeSolver_solvedNodeInMemory,{},{}", new Object[] { System.currentTimeMillis() - start, newAnswers.size() }); } return newAnswers; } private SolutionSet solveGraph(Graph graphNode, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { SolutionSet newAnswers; boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); TriplePatternComponent tpc = graphNode.getGraphContext(); if (tpc instanceof org.openanzo.rdf.URI) { newAnswers = subNodeSolver.solveNode(graphNode.getGraphPattern(), answerConstraints, (org.openanzo.rdf.URI) tpc, namedGraphVariable); } else if (tpc instanceof Variable) { Variable graphVar = (Variable) tpc; //// // if we have a solution in which the GRAPH variable is bound to // something other than a URI, then the GRAPH clause fails to match // on that solution, and so we should remove the solution from our // list of answers for (ListIterator<PatternSolution> it = answerConstraints.listIterator(); it.hasNext();) { Value binding = it.next().getBinding(graphVar); if (binding != null && !(binding instanceof org.openanzo.rdf.URI)) { if (isEnabled) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_removingSolutionBindingGraphVariableToNonURI] {} / {}", graphVar, binding); it.remove(); } } //// // if the backend can bind graph variables then we just recurse with this // variable as our graph context. if (canBindGraphVariables) { newAnswers = subNodeSolver.solveNode(graphNode.getGraphPattern(), answerConstraints, null, graphVar); } else { // otherwise, we try to match the GraphPattern in the context of all of the named graphs // in our DataSet // we're going to ask for solutions to the pattern given // this particular namedGraph as the graph context. first though, // we need to do a bit of massaging on our existing answers -- // all answers that we receive from this matching are going to // be augmented with a tpc->namedGraph binding . Unfortunately, // our solution generator won't know that, so we need to help them // out a bit. This implies two things: // 1) Any existing answer which has tpc bound to anything other // than namedGraph will not be compatible with any solution // returned in this effort, and so should be omitted from // the list of required answers passed along // 2) Any new answer which binds tpc to something other than // namedGraph will immediately be invalid. i.e., all existing // answers should act as if they already have the tpc->namedGraph // binding present. HashMap<URI, SolutionSet> answersPerGraph = new HashMap<URI, SolutionSet>(); Set<URI> allNamedGraphs = this.controller.getQueryDataset().getNamedGraphURIs(); for (URI namedGraph : allNamedGraphs) { SolutionSet apg = new SolutionList(); answersPerGraph.put(namedGraph, apg); } for (PatternSolution existingAnswer : answerConstraints) { Value existingBinding = existingAnswer.getBinding(graphVar); if (existingBinding == null) { // add graphVar -> graph for each graph for (URI namedGraph : allNamedGraphs) { PatternSolutionImpl clone = new PatternSolutionImpl(existingAnswer); clone.setBinding(graphVar, namedGraph); answersPerGraph.get(namedGraph).add(clone); } } else { // see if the current binding is one of our named graphs; if so, we'll include // this binding for that named graph only URI namedGraph = ((org.openanzo.rdf.URI) existingBinding); if (existingBinding instanceof org.openanzo.rdf.URI && allNamedGraphs.contains(namedGraph)) { answersPerGraph.get(namedGraph).add(existingAnswer); } } } // finally, gather new answers from each of the named graphs newAnswers = new SolutionList(); for (Entry<URI, SolutionSet> graph : answersPerGraph.entrySet()) { SolutionSet graphRequiredBindings = graph.getValue(); // if we have overall required bindings, but our bindings filtered // for this particular graph are empty, then there is no way for this // graph to add any bindings to our overall solution, so we skip it if (graphRequiredBindings != null && graphRequiredBindings.size() == 0) continue; org.openanzo.rdf.URI graphIri = graph.getKey(); // otherwise, see what we come up with for the graph pattern associated // with this graph SolutionSet graphAnswers = subNodeSolver.solveNode(graphNode.getGraphPattern(), graphRequiredBindings, graphIri, graphVar); SolutionSet graphVariableBindingSet = SolutionUtils.singletonSolution(graphVar, graphIri); // Add all the answers we've received for this graph from our dataset, // but first ensure that the ?g -> graph IRI binding is in each answer that // we're including if (isEnabled) { StringBuilder sb = new StringBuilder(); graphNode.getGraphPattern().prettyPrint(sb, true); RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningGraphVariableToGraphSolutions] Join({} / {}, {})", new Object[] { graphVar, graphIri, sb.toString() }); } newAnswers.addAll(SPARQLAlgebra.join(graphAnswers, graphVariableBindingSet)); } } } else { throw new GlitterRuntimeException(ExceptionConstants.GLITTER.GRAPH_NOT_VAR); } return newAnswers; } private SolutionSet solveUnion(Union union, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { SolutionSet newAnswers; boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); long start = 0; // build a new collection and add in all the answers we get // (each individual answer conjoins with answers, but not with // each other) newAnswers = new SolutionList(); for (TreeNode it : this.plan.orderNodes(union.getChildren())) { if (controller.isCancelled()) { throw new GlitterException(ExceptionConstants.GLITTER.QUERY_CANCELLED); } GraphPattern gp = (GraphPattern) it; SolutionSet currentNodeAnswers = subNodeSolver.solveNode(gp, answerConstraints, namedGraphContext, namedGraphVariable); if (isEnabled) { start = System.currentTimeMillis(); } newAnswers.addAll(currentNodeAnswers); if (isEnabled) { StringBuilder sb = new StringBuilder(); gp.prettyPrint(sb, true); RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_unioningAnswers] Union(..., {}) [{} + {}] {}", new Object[] { sb.toString(), newAnswers.size(), currentNodeAnswers.size(), System.currentTimeMillis() - start }); } } return newAnswers; } private SolutionSet solveOptional(Optional opt, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); SolutionSet newAnswers; GraphPattern must = opt.getMustMatchPattern(); GraphPattern may = opt.getMayMatchPattern(); SolutionSet newConstraints; if (must != null) { newAnswers = subNodeSolver.solveNode(must, answerConstraints, namedGraphContext, namedGraphVariable); if (isEnabled) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningOptionalLHSWithExistingAnswers]"); newConstraints = SPARQLAlgebra.join(answerConstraints, newAnswers); } else { if (isEnabled) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_emptyLHSOfOptionalIsIdentitySolutionSet]"); newAnswers = SolutionUtils.unconstrainedSolutions(); newConstraints = newAnswers; } if (must == null || newConstraints.size() != 0) { // So, at this point newAnswers = an extended subset of answerConstraints // We want to match mayMatch now in this new context -- which will // extend some of these newAnswers -- but we don't want to eliminate // answers that are not extended - so we just need a left // join instead of the conjoin operator. SolutionSet rhs = subNodeSolver.solveNode(may, newConstraints, namedGraphContext, namedGraphVariable); if (isEnabled) { StringBuilder sb1 = new StringBuilder(), sb2 = new StringBuilder(); if (must != null) must.prettyPrint(sb1, true); else sb1.append("BGP()"); may.prettyPrint(sb2, true); RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_performingOptionalLeftJoin] [{}] [{}]", sb1.toString(), sb2.toString()); } newAnswers = SPARQLAlgebra.leftJoin(newAnswers, rhs, opt.getFilters()); } return newAnswers; } private SolutionSet solveGroup(Group group, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); SolutionSet newAnswers; // first we need to see if this is a BGP backed by code (a FunctionalPredicate) // because those get handled specially newAnswers = conjoinAnswers(group, answerConstraints, namedGraphContext, namedGraphVariable); if (group.getAssignments().size() > 0) { if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_executingAssignments] {}", group.getAssignments()); } newAnswers = SPARQLAlgebra.processAssignments(newAnswers, group.getAssignments()); } // apply filters at the end (TODO @@ this can be optimized as we go and // maybe should be -- note that it can only be optimized when a solution // has all the variables that occur in a FILTER bound already -- even then // I'm not positive that it can be eliminated since it may yet conjoin with // a new solution to form a 3rd solution that is kept due to another FILTER) if (newAnswers.size() > 0 && group.getFilters() != null && group.getFilters().size() > 0) { long start = 0; if (isEnabled) { start = System.currentTimeMillis(); } newAnswers = SPARQLAlgebra.filterSolutions(newAnswers, group.getFilters()); if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_applyingFilters] [{}] {}:{}", new Object[] { group.getFilters(), newAnswers.size(), System.currentTimeMillis() - start }); } } return newAnswers; } private SolutionSet solveBGP(BGP bgp, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { SolutionSet newAnswers; // first we need to see if this is a BGP backed by code (a FunctionalPredicate) // because those get handled specially FunctionalPredicate fp = null; if ((fp = bgp.getFunctionalPredicate()) != null) { return solveFP(answerConstraints, namedGraphContext, namedGraphVariable, fp); } newAnswers = conjoinAnswers(bgp, answerConstraints, namedGraphContext, namedGraphVariable); return newAnswers; } private SolutionSet solveFP(SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable, FunctionalPredicate fp) throws GlitterException { boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); SolutionSet newAnswers; // ask the FP to supply us with bindings - we give them any // answer constraints we know about, as well as a named graph context // and/or named graph variable if (isEnabled) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_solvingFunctionalPredicate] {}", fp); if (namedGraphVariable == null || fp.canBindGraphVariables()) { newAnswers = fp.generateSolutions(namedGraphContext, namedGraphVariable, answerConstraints); } else if (fp.usesDataFromGraphs()) { // there is a graph variable which our FP can't handle, and the FP does use data // from the active graph; so we need to loop through our named graphs and in // turn ask the FP for solutions; each solution needs to be joined with a // solution binding the variable to the current named graph; all these solutions // get unioned together newAnswers = new SolutionList(); for (URI graph : this.controller.getQueryDataset().getNamedGraphURIs()) { if (controller.isCancelled()) { throw new GlitterException(ExceptionConstants.GLITTER.QUERY_CANCELLED); } SolutionSet ss = fp.generateSolutions(graph, null, answerConstraints); SolutionSet graphBinding = SolutionUtils.singletonSolution(namedGraphVariable, graph); if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningGraphVariableToFunctionalPredicateSolutions] Join({} / {}, {})", new Object[] { namedGraphVariable, graph, fp }); } newAnswers.addAll(SPARQLAlgebra.join(ss, graphBinding)); } } else { // in this case, we only get one set of answers from the FP since it doesn't // use graph data, but since we need to bind the graph variable, it gets bound // to every possible named graph SolutionList graphBindings = new SolutionList(); for (URI g : this.controller.getQueryDataset().getNamedGraphURIs()) graphBindings.add(new PatternSolutionImpl(namedGraphVariable, g)); SolutionSet ss = fp.generateSolutions(null, null, answerConstraints); if (isEnabled) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningGraphVariableToFunctionalPredicateSolutions] Join({}, {})", new Object[] { namedGraphVariable, fp }); newAnswers = SPARQLAlgebra.join(graphBindings, ss); } return newAnswers; } private SolutionSet solveSubquery(Subquery subqueryNode, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { // @@ //boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); SolutionSet solutions = this.subNodeSolver.solveNode(subqueryNode.getSubqueryController().getQueryPattern(), answerConstraints, namedGraphContext, namedGraphVariable); SolutionSet processedSolutions = subqueryNode.getSubqueryController().getEngine().postProcessSolutions(solutions, false, subqueryNode.getSubqueryController()).getSelectResults(); if (RequestAnalysis.getAnalysisLogger().isDebugEnabled()) RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_returningSubqueryResults] {}:{}", new Object[] { solutions.size(), processedSolutions.size() }); return processedSolutions; } private SolutionSet conjoinAnswers(TreeNode n, SolutionSet answerConstraints, org.openanzo.rdf.URI namedGraphContext, Variable namedGraphVariable) throws GlitterException { boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled(); SolutionSet newAnswers; // both a group and a BGP require that its answers // be conjoined together // // note: compositional semantics would derive all the answers for these // components individually and then conjoin them, rather than conjoining // as we go. UPDATE: We can't conjoin as we go because that might change // the results of any FILTERs at the end. We can only conjoin with each other. // note: for each child after the first, both existing constraints and // new constraints apply to the rest of the children. We can't just // conjoin them because FILTERs need to apply only to the new answers. So we // maintain answerConstraints, which is (prematurely) conjoined, while also keeping // newAnswers on its own for FILTERing. SolutionSet newConstraints = answerConstraints; newAnswers = SolutionUtils.unconstrainedSolutions(); for (Iterator<TreeNode> it = this.plan.orderNodes(n.getChildren()).iterator(); it.hasNext();) { if (controller.isCancelled()) { throw new GlitterException(ExceptionConstants.GLITTER.QUERY_CANCELLED); } TreeNode child = it.next(); SolutionSet solutions = subNodeSolver.solveNode(child, newConstraints, namedGraphContext, namedGraphVariable); if (isEnabled) { StringBuilder sb = new StringBuilder(); child.prettyPrint(sb, true); RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningSolutionsFromNode] [{}] {}", sb.toString(), solutions.size()); } if (solutions.size() == 0) { return SolutionUtils.noSolutions(); } newAnswers = SPARQLAlgebra.join(newAnswers, solutions); // we only need the new constraints if we'll be looping again if (it.hasNext()) { if (isEnabled) { RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_SerialInMemoryNodeSolver_joiningNewConstraints]"); } newConstraints = SPARQLAlgebra.join(newAnswers, newConstraints); } } return newAnswers; } }