/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Sep 14, 2011 */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IVariable; import com.bigdata.bop.Var; import com.bigdata.rdf.internal.constraints.SparqlTypeErrorBOp; import com.bigdata.rdf.sparql.ast.ASTBase; import com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode; import com.bigdata.rdf.sparql.ast.AssignmentNode; import com.bigdata.rdf.sparql.ast.FilterNode; import com.bigdata.rdf.sparql.ast.FunctionNode; import com.bigdata.rdf.sparql.ast.GlobalAnnotations; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; import com.bigdata.rdf.sparql.ast.GroupMemberValueExpressionNodeBase; import com.bigdata.rdf.sparql.ast.GroupNodeBase; import com.bigdata.rdf.sparql.ast.IBindingProducerNode; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.IGroupNode; import com.bigdata.rdf.sparql.ast.IQueryNode; import com.bigdata.rdf.sparql.ast.ISolutionSetStats; import com.bigdata.rdf.sparql.ast.IValueExpressionNode; import com.bigdata.rdf.sparql.ast.IValueExpressionNodeContainer; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude; import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot; import com.bigdata.rdf.sparql.ast.ProjectionNode; import com.bigdata.rdf.sparql.ast.QueryBase; import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet; import com.bigdata.rdf.sparql.ast.QueryRoot; import com.bigdata.rdf.sparql.ast.QueryType; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility; import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext; import com.bigdata.rdf.sparql.ast.explainhints.BottomUpSemanticsExplainHint; import com.bigdata.rdf.sparql.ast.explainhints.IExplainHint; import com.bigdata.rdf.sparql.ast.explainhints.UnsatisfiableMinusExplainHint; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.IStriterator; import cutthecrap.utils.striterators.Striterator; /** * Rewrites aspects of queries where bottom-up evaluation would produce * different results. This includes joins which are not "well designed" as * defined in section 4.2 of "Semantics and Complexity of SPARQL", 2006, Jorge * Prez et al and also FILTERs on variables whose bindings are not in scope. * <p> * Note: The test suite for this class is a set of DAWG tests which focus on * bottom up evaluation semantics, including: * <p> * Nested Optionals - 1 (Query is not well designed because there are no shared * variables in the intermediate join group and there is an embedded OPTIONAL * join group. Since ?v is not present in the intermediate join group the (:x3 * :q ?w . OPTIONAL { :x2 :p ?v }) solutions must be computed first and then * joined against the (:x1 :p ?v) solutions.) * * <pre> * SELECT * * { * :x1 :p ?v . * OPTIONAL * { * :x3 :q ?w . * OPTIONAL { :x2 :p ?v } * } * } * </pre> * * Filter-scope - 1 (Query is not well designed because there are no shared * variables in the intermediate join group and there is an embedded OPTIONAL * join group. Also, ?v is used in the FILTER but is not visible in that scope.) * * <pre> * SELECT * * { * :x :p ?v . * { :x :q ?w * OPTIONAL { :x :p ?v2 FILTER(?v = 1) } * } * } * </pre> * * Join-scope - 1 (Query is not well designed because there are no shared * variables in the intermediate group and there is an embedded OPTIONAL join * group.) * * <pre> * SELECT * * { * ?X :name "paul" * {?Y :name "george" . OPTIONAL { ?X :email ?Z } } * } * </pre> * * Filter-nested - 2 (Filter on variable ?v which is not in scope) * * <pre> * SELECT ?v * { :x :p ?v . { FILTER(?v = 1) } } * </pre> * * bind07 - BIND (?o not in scope for bind) * * <pre> * SELECT ?s ?p ?o ?z * { * ?s ?p ?o . * { BIND(?o+1 AS ?z) } UNION { BIND(?o+2 AS ?z) } * } * </pre> * * Nested groups which do not share variables with their parent can be lifted * out into a named subquery. This has the same effect as bottom up evaluation * since we will run the named subquery first and then perform the join against * the parent group. However, in this case an exogenous binding which causes a * shared variable to exist would mean that the query could run normally since * the value in the outer group and the inner group would now be correlated * through the exogenous binding. E.g., <code?X</code> in the last example * above. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/232 * @see http://www.dcc.uchile.cl/~cgutierr/papers/sparql.pdf * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: ASTBottomUpOptimizer.java 5189 2011-09-14 17:56:53Z thompsonbry * $ * * TODO I have been assuming that the presence of any shared variable * is enough to enforce correlation between the solution sets and cause * the results of bottom up evaluation to be the same as our standard * evaluation model. If this is not true then we could just lift * everything into a named subquery, order the named subqueries by * their dependencies and just let it run. */ public class ASTBottomUpOptimizer implements IASTOptimizer { /** * Used for the prefix of the generated named set name. */ static String NAMED_SET_PREFIX = "%-bottom-up-"; /** * */ public ASTBottomUpOptimizer() { } @Override public QueryNodeWithBindingSet optimize( final AST2BOpContext context, final QueryNodeWithBindingSet input) { final IQueryNode queryNode = input.getQueryNode(); final IBindingSet[] bindingSets = input.getBindingSets(); if (!(queryNode instanceof QueryRoot)) return new QueryNodeWithBindingSet(queryNode, bindingSets); final QueryRoot queryRoot = (QueryRoot) queryNode; /* * Rewrite badly designed left joins by lifting them into a named * subquery. */ { /* * Collect optional groups. * * Note: We can not transform graph patterns inside of SERVICE calls * so this explicitly visits the interesting parts of the tree. */ final StaticAnalysis sa = new StaticAnalysis(queryRoot, context); // List of the inner optional groups for badly designed left joins. final List<JoinGroupNode> innerOptionalGroups = new LinkedList<JoinGroupNode>(); { if (queryRoot.getNamedSubqueries() != null) { for (NamedSubqueryRoot namedSubquery : queryRoot .getNamedSubqueries()) { @SuppressWarnings("unchecked") final GraphPatternGroup<IGroupMemberNode> group = (GraphPatternGroup<IGroupMemberNode>) namedSubquery .getWhereClause(); checkForBadlyDesignedLeftJoin(context, sa, group, innerOptionalGroups); } } @SuppressWarnings("unchecked") final GraphPatternGroup<IGroupMemberNode> group = (GraphPatternGroup<IGroupMemberNode>) queryRoot .getWhereClause(); checkForBadlyDesignedLeftJoin(context, sa, group, innerOptionalGroups); } /* * Convert badly designed left joins into named subqueries. This * gives the join group effective "bottom-up" evaluation semantics * since we will run the named subqueries before we run anything * else. */ for (JoinGroupNode group : innerOptionalGroups) { liftBadlyDesignedLeftJoin(context, sa, queryRoot, group); } } /* * Hide variables which would not be in scope for bottom up evaluation. */ { final StaticAnalysis sa = new StaticAnalysis(queryRoot, context); // Handle named subqueries. if (queryRoot.getNamedSubqueries() != null) { for (NamedSubqueryRoot namedSubquery : queryRoot .getNamedSubqueries()) { handleFiltersWithVariablesNotInScope(context, sa, namedSubquery, bindingSets); } } handleFiltersWithVariablesNotInScope(context, sa, queryRoot, bindingSets); } /* * Handle MINUS when it appears without shared variables. */ { final StaticAnalysis sa = new StaticAnalysis(queryRoot, context); // Handle named subqueries. if (queryRoot.getNamedSubqueries() != null) { for (NamedSubqueryRoot namedSubquery : queryRoot .getNamedSubqueries()) { // WHERE clause for the named subquery. handleMinusWithoutSharedVariables(context, sa, namedSubquery.getWhereClause()); } } handleMinusWithoutSharedVariables(context, sa, queryRoot.getWhereClause()); } return new QueryNodeWithBindingSet(queryNode, bindingSets); } /** * We are looking for queries of the form: * * <pre> * P = ((?X, name, paul) OPT ((?Y, name, george) OPT (?X, email, ?Z))) * </pre> * * i.e. variables used by the right side of a left join that are not bound * in the parent group but are bound in groups above the parent group. */ private void checkForBadlyDesignedLeftJoin( final IEvaluationContext context, final StaticAnalysis sa, final GraphPatternGroup<IGroupMemberNode> whereClause, final List<JoinGroupNode> badlyDesignedLeftJoins) { // Check all join groups. final Iterator<JoinGroupNode> itr = BOpUtility.visitAll( (BOp) whereClause, JoinGroupNode.class); while(itr.hasNext()) { final JoinGroupNode group = itr.next(); if (!group.isOptional()) { // Ignore non-optional join groups. continue; } /* * This is a candidate for an inner join group of a badly designed * optional join pattern, so check it in depth. */ checkForBadlyDesignedLeftJoin2(context, sa, group, badlyDesignedLeftJoins); } } /** * Identify problem variables. These are variables appear in joins within an * optional <i>group</i>, but which do appear in joins in the groups's * parent but do appear in joins in some parent of that parent. * <p> * Under bottom up evaluation semantics, the variable become bound from the * inner most nested group first. This means that the optional group can * join with its parent, producing bindings for a variable not shared * transitively by its parent with its parent's parents. For example, the * <code>?X</code> in the inner optional will have already been joined with * the <code>?Y</code> and is only then joined with the access path for * <code>?X :name "paul"</code>. If there was an optional join for * <code>?X</code>, then <code>?X</code> will already be bound for that * solution in that access path. Under these circumstances, bottom up * evaluation can produce different results than left-to-right evaluation. * <p> * In the data set for this query, while there are solutions for * <code>?X name "paul"</code>, there is no solution for * <code>?X name "paul"</code> for which <code>?X :email ?Z</code> is also * true. Hence, this query has no solutions in the data. * * <pre> * SELECT * * { * ?X :name "paul" * {?Y :name "george" . OPTIONAL { ?X :email ?Z } } * } * </pre> * * (This query is <code>var-scope-join-1</code> from the DAWG compliance * test suite.) * <ol> * <li>Add all vars used in the group (statement patterns and filters)</li> * <li>Remove all vars bound by the parent group (statement patterns)</li> * <li>Retain all vars from the grandparent groups (statement patterns)</li> * </ol> * * @param sa * @param group * A group to inspect. It is is an optional group, then we * consider this as a candidate for a badly designed left join * pattern. Otherwise we recursively descend into the group. * @param badlyDesignedLeftJoins * A list of all badly designed left joins which have been * identified. * * FIXME This ignores the exogenous variables. unit test for this * case and fix. [A variable would have to be bound is all * exogenous solutions in order to allow us to avoid the rewrite * for bottom up semantics. E.g., it would have to be a member of * {@link ISolutionSetStats#getAlwaysBound()} but not a member of * {@link ISolutionSetStats#getConstants()} since it does not * have to be bound to the same value in each solution]. * <p> * I have made a partial fix. However, an exogenous variable IS * NOT visible within a subquery unless it is projected into that * subquery. Thus, it is incorrect to simply consult * {@link ISolutionSetStats#getAlwaysBound()} * * @see https://sourceforge.net/apps/trac/bigdata/ticket/412 * (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.) */ private void checkForBadlyDesignedLeftJoin2( final IEvaluationContext context, final StaticAnalysis sa, final GraphPatternGroup<IGroupMemberNode> group, final List<JoinGroupNode> badlyDesignedLeftJoins) { assert group.isOptional(); /* * Check to see whether this is the inner optional of a badly designed * left-join pattern. */ final IGroupNode<? extends IGroupMemberNode> p = // sa.findParentJoinGroup(group) group.getParentJoinGroup() ; if (p == null) { // No parent. return; } // System.err.println("Considering: "+group); // if(((JoinGroupNode)p).isMinus()) return; final IGroupNode<? extends IGroupMemberNode> pp = p .getParentJoinGroup(); if (pp == null) { // No parent's parent. return; } /* * This is all definitely bound variables above the candidate optional * group in the hierarchy. * * Note: [topDownVars] needs to be reset on each entry with a new Set to * avoid side-effects when we recursively explore sibling groups for * this pattern. This method was rewritten without recursion to avoid * that problem. It is now driven out of an iterator visiting the * candidate optional join groups. */ final Set<IVariable<?>> topDownVars = sa.getDefinitelyIncomingBindings( p, new LinkedHashSet<IVariable<?>>()); /* * Obtain the set of variables used in JOINs -OR- FILTERs within this * optional group. * * Note: We must consider the variables used in filters as well when * examining a candidate inner optional group for a badly designed left * join. This is necessary in order to capture uncorrelated variables * having the same name in the FILTER and in the parent's parent. */ final Set<IVariable<?>> innerGroupVars = sa .getDefinitelyProducedBindingsAndFilterVariables(group, new LinkedHashSet<IVariable<?>>()); /* * Obtain the set of variables used in joins within the parent join * group. */ final Set<IVariable<?>> parentVars = sa.getDefinitelyProducedBindings( (IBindingProducerNode) p, new LinkedHashSet<IVariable<?>>(), false/* recursive */); /* * The inner optional is part of a badly designed left join if it uses * variables which are not present in the parent but which are present * in the group hierarchy above that parent. */ /* * Remove any variables which are bound in all of the exogenous * solutions. These are visible everywhere (except within a subquery if * they are not projected into that subquery). * * FIXME This is not a 100% correct fix. The problem is that it ignores * the variable scoping rules for a subquery. Variables are only visible * within a subquery if they are projected into that subquery, even if * the binding is exogenous. The correct fix is to lift this into * StaticAnalyis#getDefinitelyProducedBindings(), and which point the * line below can be removed as it will have been correctly handled by * the method on StaticAnalysis. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/412 */ innerGroupVars.removeAll(context.getSolutionSetStats().getAlwaysBound()); // remove all variables declared by the parent. innerGroupVars.removeAll(parentVars); // retain all variables declared by the parent's parent. innerGroupVars.retainAll(topDownVars); if (!innerGroupVars.isEmpty()) { badlyDesignedLeftJoins.add((JoinGroupNode) group); } } /** * If the {@link JoinGroupNode} qualifies as a badly designed left join then * lift it into a {@link NamedSubqueryRoot} and replace it with a * {@link NamedSubqueryInclude}. * * @param group * The OPTIONAL join group. This group and its parent * {@link JoinGroupNode} will be lifted out and replaced by a * {@link NamedSubqueryInclude}. */ private void liftBadlyDesignedLeftJoin(final AST2BOpContext context, final StaticAnalysis sa, final QueryRoot queryRoot, final JoinGroupNode group) { // The parent join group. final JoinGroupNode p = group.getParentJoinGroup(); if (p == null) throw new AssertionError(); // The parent's parent join group. final JoinGroupNode pp = p.getParentJoinGroup(); if (pp == null) /** * BLZG-1760: in case we have multiple OPTIONALs nested inside one * group, we may enter this method multiple times and, as replacement * is taking place in the first one, there's nothing that needs to be * done in following iterations, so just return. */ return; final String namedSet = context.createVar(NAMED_SET_PREFIX); final NamedSubqueryRoot nsr = new NamedSubqueryRoot(QueryType.SELECT, namedSet); // Copy across query hints for the join group. nsr.setQueryHints(p.getQueryHints()); { { final ProjectionNode projection = new ProjectionNode(); nsr.setProjection(projection); final Set<IVariable<?>> vars = new LinkedHashSet<IVariable<?>>(); sa.getMaybeProducedBindings(p, vars, true/* recursive */); for (IVariable<?> var : vars) { projection.addProjectionVar(new VarNode(var.getName())); } } // See #1087 nsr.setWhereClause(BOpUtility.deepCopy(p)); queryRoot.getNamedSubqueriesNotNull().add(nsr); } final NamedSubqueryInclude nsi = new NamedSubqueryInclude(namedSet); // Copy across query hints for the join group. nsi.setQueryHints(p.getQueryHints()); if (p.isOptional()) { /* * TODO This is a hack because the INCLUDE operation (a solution * set hash join) does not currently support OPTIONAL. As a * workaround the INCLUDE is stuffed into an OPTIONAL group. */ final JoinGroupNode tmp = new JoinGroupNode(); tmp.setOptional(true); tmp.addChild(nsi); pp.replaceWith(p, tmp); } else if (p.isMinus()) { /* * TODO This is a hack because the INCLUDE operation does not * currently support MINUS. As a workaround the INCLUDE is stuffed * into an MINUS group. */ final JoinGroupNode tmp = new JoinGroupNode(); tmp.setMinus(true); tmp.addChild(nsi); pp.replaceWith(p, tmp); } else { /** * Replace with named subquery INCLUDE. * * Note: we can not do that starting from pp, since pp is the * parent join group node (which may differ from p.getParent(). * * See ticket #1087 for an example query where this is the case */ final IGroupNode<?> ppNode = p.getParent(); if (ppNode instanceof GroupNodeBase) { final GroupNodeBase<?> gnb = (GroupNodeBase<?>) ppNode; if (ppNode instanceof JoinGroupNode) { gnb.replaceWith(p, nsi); } else { // if the parent is not a JoinGroupNode, we wrap the include // into a join group node; this is necessary because, for // instance, INCLUDE is not supported inside all constructs // (e.g. fails in case we INCLUDE into a UNION operator) final JoinGroupNode nsiWrapper = new JoinGroupNode(); nsiWrapper.addChild(nsi); gnb.replaceWith(p, nsiWrapper); } } } } /** * Examine each {@link JoinGroupNode} in the query and each FILTER in each * {@link JoinGroupNode}. If the filter depends on a variable which is not * in scope then we must rewrite the AST in order to preserve bottom up * evaluation semantics. * <p> * Such filters and variables are identified. The variables within the * filters are then rewritten in a consistent manner across the filters * within the group, renaming the provably unbound variables in the filters * to anonymous variables. This provides effective bottom up evaluation * scope for the variables. * <p> * Note: This will see ALL join groups, including those in a SERVICE or * (NOT) EXISTS annotation. Therefore, we use findParent() to identify when * the FILTER is in a (NOT) EXISTS graph pattern since the graph pattern * appears as an annotation and is not back linked from the FILTER in which * it appears. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/414 (SPARQL 1.1 * EXISTS, NOT EXISTS, and MINUS) */ @SuppressWarnings({ "unchecked", "rawtypes" }) private void handleFiltersWithVariablesNotInScope( final AST2BOpContext context, final StaticAnalysis sa, final QueryBase queryBase, final IBindingSet[] bindingSets) { final Set<IVariable<?>> globallyScopedVars = context == null ? (Set) Collections.emptySet() : context.getGloballyScopedVariables(); // Map for renamed variables. final Map<IVariable<?>/* old */, IVariable<?>/* new */> map = new LinkedHashMap<IVariable<?>, IVariable<?>>(); /* * Visit all join groups, which is where the filters are found. */ final Iterator<JoinGroupNode> itr = BOpUtility.visitAll( queryBase.getWhereClause(), JoinGroupNode.class); while (itr.hasNext()) { final JoinGroupNode group = itr.next(); if (sa.findParent(group) instanceof FilterNode) { /* * Skip EXISTS and NOT EXISTS graph patterns when they are * visited directly. These are handled when we visit the join * group containing the FILTER in which they appear. * * Note: The only time that findParent() will report a * FilterNode is when either EXISTS or NOT EXISTS is used and * the group is the graph pattern for those functions. * * TODO This could still fail on nested groups within the (NOT) * EXISTS graph pattern since findParent() would report a * JoinGroupNode parent rather than the eventual FilterNode * parent. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/414 * (SPARQL 1.1 EXISTS, NOT EXISTS, and MINUS) */ continue; } if (sa.findParent(group) instanceof ArbitraryLengthPathNode) { /* * Skip the filters in an ALP subgroup. Although evaluated * as a subgroup, an ALP node is logically not really a subgroup. */ continue; } /* * All variables potentially bound by joins in this group or a * subgroup. */ final Set<IVariable<?>> maybeBound = sa .getMaybeProducedBindings(group, new LinkedHashSet<IVariable<?>>(), true/* recursive */); /* * Add globally scoped variables, we're not allowed to rewrite * filters for them, as they are globally visible. Note that we do * not want to add any exogeneous variables from the outer VALUES * clause: by semantics, they are joined in *last*, so they're * not visible in any scope. */ maybeBound.addAll(globallyScopedVars); if (group.isOptional()) { /* * "A FILTER inside an OPTIONAL can reference a variable * bound in the required part of the OPTIONAL." * * Note: This is ONLY true when the [group] is OPTIONAL. * Otherwise the variables in the parent are not visible. * * Two fairly difficult test cases articulating the scope rules * are: * * http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/filter-nested-2.rq * * and * * http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified * (see * http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#convertGraphPattern) * */ // The "required" part of the optional is the parent group. final JoinGroupNode p = group.getParentJoinGroup(); if (p != null) { // bindings "maybe" produced in the parent (non-recursive) final Set<IVariable<?>> incomingBound = sa .getMaybeProducedBindings(p, new LinkedHashSet<IVariable<?>>(), false/* recursive */); // add to those visible in FILTERs for this group. maybeBound.addAll(incomingBound); } } // For everything in this group. for (IGroupMemberNode child : group) { // Only consider the FILTERs and BINDs. if (!(child instanceof FilterNode || child instanceof AssignmentNode)) continue; final GroupMemberValueExpressionNodeBase filter = (GroupMemberValueExpressionNodeBase) child; final IValueExpressionNode nodeParent = (filter instanceof IValueExpressionNode) ? (IValueExpressionNode)filter : null; if(rewriteUnboundVariablesInFilter(context, maybeBound, map, nodeParent, filter.getValueExpressionNode())) { /* * Re-generate the IVE for this filter. */ // Recursively clear the old value expression. // gather subexpression (avoiding CCME) List<FunctionNode> subexpr = new ArrayList<FunctionNode>(); final Iterator<FunctionNode> veitr = BOpUtility.visitAll(filter, FunctionNode.class); while (veitr.hasNext()) { subexpr.add(veitr.next()); } // clear for (FunctionNode ive:subexpr) { ive.setValueExpression(null); } final GlobalAnnotations globals = new GlobalAnnotations( context.getLexiconNamespace(), context.getTimestamp() ); /** * Re-generate the value expression. Note that this must be * done recursively in the general case, e.g in the case * of nested FILTER [NOT] EXISTS nodes. See for instance * ticket BLZG-1281 for an example query. */ // first set up an iterator detecting all // IValueExpressionNodeContainers final IStriterator it = new Striterator( BOpUtility.preOrderIteratorWithAnnotations(filter)) .addFilter(new Filter() { private static final long serialVersionUID = 1L; @Override public boolean isValid(Object obj) { return obj instanceof IValueExpressionNodeContainer; } }); while (it.hasNext()) { AST2BOpUtility.toVE(context.getBOpContext(), globals, ((IValueExpressionNodeContainer) it.next()) .getValueExpressionNode()); } } } } } /** * If a FILTER depends on a variable which is not in scope for that filter * then that variable will always be unbound in that scope. However, we can * not fail the entire filter since it could use <code>BOUND(var)</code>. * This takes the approach of rewriting the FILTER to use an anonymous * variable for any variable which is provably not bound. * <p> * Note: The alternative approach is to replace the unbound variable with a * type error. However, BOUND(?x) would have to be "replaced" by setting its * {@link IValueExpressionNode} to [false]. Also, COALESCE(....) could use * an unbound variable and no type error should be thrown. We either have to * remove the expression the unbound variable shows up in from the * COALESCE() or change it to an anonymous variable. If you want to pursue * this approach see {@link SparqlTypeErrorBOp#INSTANCE}. * * @param context * The context is used to generate anonymous variables. * @param maybeBound * The set of variables which are in scope in the group. * @param map * A map used to provide consistent variable renaming in the * filters of the group. * @param parent * The parent {@link IValueExpressionNode}. * @param node * An {@link IValueExpressionNode}. If this is a {@link VarNode} * and the variable is not in scope, then the {@link VarNode} is * replaced in the parent by an anonymous variable. * * @return <code>true</code> if the expression was modified and its * {@link IValueExpressionNode} needs to be rebuilt. * * @see AST2BOpUtility#toVE(String, IValueExpressionNode) */ private boolean rewriteUnboundVariablesInFilter(final AST2BOpContext context, final Set<IVariable<?>> maybeBound, final Map<IVariable<?>/* old */, IVariable<?>/* new */> map, final IValueExpressionNode parent, final IValueExpressionNode node) { boolean modified = false; // recursion. { final int arity = ((BOp) node).arity(); for (int i = 0; i < arity; i++) { final BOp tmp = ((BOp) node).get(i); if(!(tmp instanceof IValueExpressionNode)) continue; final IValueExpressionNode child = (IValueExpressionNode) tmp; modified |= rewriteUnboundVariablesInFilter(context, maybeBound, map, node, child); } } if (!(node instanceof VarNode)) { // Not a variable. return modified; } final VarNode varNode = (VarNode) node; final IVariable<?> ovar = varNode.getValueExpression(); if (maybeBound.contains(ovar)) { // A variable which might be bound during evaluation. return modified; } /* * A variable which is provably not bound. * * Note: In order to mimic the variable scope for bottom-up evaluation * we need to "hide" this variable. */ IVariable<?> nvar = map.get(ovar); if(nvar == null) { /* * An anonymous variable which will never be bound by the query. The * map is used to share the replace an unbound variable with the * corresponding anonymous variable in the same manner throughout * the group. */ map.put(ovar, nvar = Var.var(context.createVar("-unbound-var-" + ovar.getName() + "-"))); /* * This indicates a potential problem with the query, so we * set an explain hint for it. */ final IExplainHint explainHint = new BottomUpSemanticsExplainHint(ovar, nvar, (BOp)node); if (parent!=null) { ((ASTBase) parent).addExplainHint(explainHint); } // nowhere to append } if (parent != null) ((ASTBase) parent).replaceAllWith(ovar, nvar); return true; } /** * Handle MINUS when it appears without shared variables. We just get rid of * the MINUS group since it can not interact with the parent group without * shared variables (without shared variables, nothing joins and if nothing * joins then nothing is removed from the parent). * * @param context * @param group */ @SuppressWarnings({ "unchecked", "rawtypes" }) private void handleMinusWithoutSharedVariables( final IEvaluationContext context, final StaticAnalysis sa, final GraphPatternGroup<?> group) { int arity = group.arity(); for (int i = 0; i < arity; i++) { final IGroupMemberNode child = (IGroupMemberNode) group.get(i); if (!(child instanceof GraphPatternGroup)) { continue; } final GraphPatternGroup<?> childGroup = (GraphPatternGroup<?>)child; /* * Recursion. */ handleMinusWithoutSharedVariables(context, sa, childGroup); /* * Examine this child. */ if(childGroup.isMinus()) { /** * The static condition under which we can drop the MINUS is * that the left and right variables do not overlap, satisfying * the condition that the intersection of the left and right * variables is empty; for a justification, see * http://www.w3.org/TR/sparql11-query/#sparqlAlgebra */ final Set<IVariable<?>> incomingBound = sa .getMaybeIncomingBindings(childGroup, new LinkedHashSet<IVariable<?>>()); final Set<IVariable<?>> maybeProduced = sa .getMaybeProducedBindings(childGroup, new LinkedHashSet<IVariable<?>>(), true/* recursive */); final Set<IVariable<?>> intersection = new LinkedHashSet<IVariable<?>>( incomingBound); intersection.retainAll(maybeProduced); // System.err.println("intersection=" + intersection + ", incoming=" // + incomingBound + ", produced=" + maybeProduced); if (intersection.isEmpty()) { // this indicates an ill-designed query, as it is most // likely not what the author envisioned, therefore we // attach an explaing hint final IExplainHint explainHint = new UnsatisfiableMinusExplainHint(childGroup); group.addExplainHint(explainHint); // Remove the MINUS operator. It can not have any effect. ((IGroupNode) group).removeChild(childGroup); /** * BLZG-1627: the group has one member less now, so * we decrease the arity to avoid running out of bounds. */ arity--; } } } } }