/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on May 14, 2015 */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.openrdf.model.URI; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.sparql.ast.AssignmentNode; import com.bigdata.rdf.sparql.ast.BindingsClause; import com.bigdata.rdf.sparql.ast.ConstantNode; import com.bigdata.rdf.sparql.ast.FilterNode; import com.bigdata.rdf.sparql.ast.FunctionNode; import com.bigdata.rdf.sparql.ast.FunctionRegistry; import com.bigdata.rdf.sparql.ast.GroupMemberValueExpressionNodeBase; import com.bigdata.rdf.sparql.ast.GroupNodeBase; import com.bigdata.rdf.sparql.ast.IBindingProducerNode; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.IQueryNode; import com.bigdata.rdf.sparql.ast.ISolutionSetStats; import com.bigdata.rdf.sparql.ast.IValueExpressionNode; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.PropertyPathNode; import com.bigdata.rdf.sparql.ast.QueryBase; import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet; import com.bigdata.rdf.sparql.ast.QueryRoot; import com.bigdata.rdf.sparql.ast.SolutionSetStatserator; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.SubqueryRoot; import com.bigdata.rdf.sparql.ast.TermNode; import com.bigdata.rdf.sparql.ast.UnionNode; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext; /** * <p> * Optimizer that aims at the optimization of SPARQL 1.1 pattern detecting * static (i.e., non runtime dependent) binding for variables, moving * them to the top-level of the query where possible, and attaching them with * occurrences of the variable. * </p> * * <p> * The optimizer may apply to the following construct: * * (A) BIND + VALUES clauses with constant values; * (B) FILTER IN clauses with URIs; * (C) FILTERs with sameTerm(?x,<http://someUri>) and ?x=<http://someUri> clauses * </p> * * <p> * It proceeds as follows: * </p> * * <p> * 1. Identify the constructs mentioned above in the query and record identified * static bindings. We distinguish between produced static bindings, namely * constructs that introduce bindings (case A above) and enforced static * bindings (cases B and C) above. Produced bindings are recorded in class * {@link StaticBindingInfo}, and for them we also remove the construct * producing the bindings, as they will later be added to as static bindings * to the query top level (see step 3a below). See step 3b below for the * treatment of enforced bindings * </p> * * <p> * 2. Along the way, we record usages of all variables, see class * {@link VariableUsageInfo}. These usages (i) may be used for inlining * in a later step, whenever we detected static bindings and (b) are used * to decide whether static bindings can be considered global. There are * some pitfalls here, in particular with FILTER expressions, see the code * for in-depth comments. * </p> * * <p> * 3. Having extracted the static bindings and the associated variable usages, * we proceed as follows: * * 3a. Produced bindings are moved to the top-level. For the main query * this means inserting them into the set of exogeneous bindings, * for subqueries we construct a VALUES clause. * * 3b. Both the produced and enforced bindings are "inlined" to identified * usages of the variable. This essentially means replacing occurrences * of the variable through a hybrid of Variable+Constant. This info * can then later be used in further optimization steps and at runtime. * </p> * * <p> * 4. As a side effect, the optimizer re-initializes the {@link StaticAnalysis} * class with the statically known bindings for the top-level query, which * againg may be valuable input for further optimization. * </p> * * <p> * Note: This optimizer generalizes the {@link ASTSimpleBindingsOptimizer}, the * {@link ASTValuesOptimizer}, and the {@link ASTBindingAssigner}, which * have been disabled and marked deprecated * </p> * * <p> * The following extensions are not considered crucial for now, but might be * subject to future work on this optimizer: (i) we may want to decompose * FILTERs prior to running this optimizer, which may be useful to identify; * (ii) in some cases it is also valid to propagate static bindings to * optional patterns; implementing this might increase the benefit of the * optimizer; (iii) we could implement some special handling for the variables * reported by {@link IEvaluationContext#getGloballyScopedVariables()}: * currently, they're just treated as "normal" variables and hence only inlined * at top-level, but it would be possible to inline them into subqueries and * in any nested scope as well, giving us a (in general) a better evaluation * plan. * </p> * * @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a> * * @version $Id$ */ public class ASTStaticBindingsOptimizer implements IASTOptimizer { @SuppressWarnings({ "rawtypes", "unused", "unchecked" }) @Override public QueryNodeWithBindingSet optimize( final AST2BOpContext context, final QueryNodeWithBindingSet input) { final IQueryNode queryNode = input.getQueryNode(); final IBindingSet[] bindingSets = input.getBindingSets(); /** * We collect statically enforced bindings in this binding set, which * will later be post processed and injected (by joining all bindings). */ final StaticBindingInfo staticBindingInfo = new StaticBindingInfo(bindingSets); /** * In case the binding sets variable is null, nothing needs to be done * (query will return empty result anyways). Also, this optimizer is * only applicable to top-level queries. */ if (bindingSets == null || !(queryNode instanceof QueryRoot)) { return new QueryNodeWithBindingSet(queryNode, bindingSets); } final QueryRoot queryRoot = (QueryRoot) queryNode; // initialize variables used throughout the optimizer final StaticAnalysis sa = new StaticAnalysis(queryRoot, context); final VariableUsageInfo varUsageInfo = new VariableUsageInfo(); final Set<InlineTasks> inlineTasks = new HashSet<InlineTasks>(); /** * Setup inlining tasks for existing bindings in the binding set */ // extract information about used vars from a top-level perspective final VariableUsageInfo childVarUsageInfo = new VariableUsageInfo(); childVarUsageInfo.extractVarSPUsageInfoChildrenOrSelf( queryRoot.getWhereClause()); final ISolutionSetStats stats = SolutionSetStatserator.get(bindingSets); final Map<IVariable<?>, IConstant<?>> staticVars = SolutionSetStatserator.get(bindingSets).getConstants(); for (IVariable var : staticVars.keySet()) { if (childVarUsageInfo.varUsed(var)) { final IConstant value = staticVars.get(var); inlineTasks.add( new InlineTasks(var, value, childVarUsageInfo.getVarUsages(var))); } } /** * Apply the optimization (eliminating static binding producing constructs * from the query and collecting the staticBindings set) */ final IBindingSet[] bindingSetsOut = optimize( sa, queryRoot, staticBindingInfo, varUsageInfo, inlineTasks, queryRoot.getBindingsClause()); /** * Propagate information about changed binding sets to the context. */ context.setSolutionSetStats(SolutionSetStatserator.get(bindingSetsOut)); return new QueryNodeWithBindingSet(queryRoot, bindingSetsOut); } /** * Applies the optimization to the given query root. */ @SuppressWarnings({ "unchecked", "rawtypes" }) private IBindingSet[] optimize( final StaticAnalysis sa, final QueryBase queryRoot, final StaticBindingInfo staticBindingInfo, final VariableUsageInfo ancestorVarUsageInfo, final Set<InlineTasks> inlineTasks, final BindingsClause bindingsClause) { /** * First, collect static bindings from the outer VALUES clause. In case * of the root query, those will be added to the exogeneous binding set * later on; in case of subqueries, we'll merge them with other produced * static bindings into a new VALUES clause in the end. */ if (bindingsClause!=null) { List<IBindingSet> bs = bindingsClause.getBindingSets(); IBindingSet[] bsList = bs.toArray(new IBindingSet[bs.size()]); // register produced bindings staticBindingInfo.addProduced(bs); // remove the bindings clause queryRoot.setBindingsClause(null); // extract information about used vars from a top-level perspective final VariableUsageInfo childVarUsageInfo = new VariableUsageInfo(); childVarUsageInfo.extractVarSPUsageInfoChildrenOrSelf(queryRoot.getWhereClause()); // set up inlining task final Map<IVariable<?>, IConstant<?>> staticVars = SolutionSetStatserator.get(bsList).getConstants(); for (IVariable<?> var : staticVars.keySet()) { if (childVarUsageInfo.varUsed(var)) { final IConstant value = staticVars.get(var); inlineTasks.add(new InlineTasks( var, value, childVarUsageInfo.getVarUsages(var))); } } } /** * The optimization phase performs rewriting & extraction of static * binding infos and inline tasks. */ optimize( sa, queryRoot.getWhereClause(), staticBindingInfo, ancestorVarUsageInfo, inlineTasks); /** * Having gathered the inline tasks, let's apply them now. */ for (InlineTasks inlineTask : inlineTasks) { inlineTask.apply(); } /** * Return the binding set, leaving the caller the decision on what to * do with it (i.e., adding it to the exogeneous bindings or constructing * a VALUES clause). The binding set is constructed by joining all * produced bindings. Note that this is the case for the outer query; * bindings for inner queries (where we add them to their VALUES clause * are treated in the optimize method for the subquery root). */ final IBindingSet[] bindingSetsOut = staticBindingInfo.joinProduced(); return bindingSetsOut; } /** * Applies the optimization to a given subquery root (wrapper around * the main entry point). */ private IBindingSet[] optimize( final StaticAnalysis sa, final SubqueryRoot subqueryRoot, final StaticBindingInfo staticBindingInfo, final VariableUsageInfo ancestorVarUsageInfo, final Set<InlineTasks> inlineTasks) { final IBindingSet[] staticBindings = optimize( sa, subqueryRoot, staticBindingInfo, ancestorVarUsageInfo, inlineTasks, subqueryRoot.getBindingsClause()); // record static bindings in subquery VALUES clause, if any final LinkedHashSet<IVariable<?>> bcVars = new LinkedHashSet<IVariable<?>>(); bcVars.addAll(SolutionSetStatserator.get(staticBindings).getUsedVars()); final List<IBindingSet> bcBindings = Arrays.asList(staticBindings); if (!bcVars.isEmpty()) { final BindingsClause bc = new BindingsClause(bcVars,bcBindings); subqueryRoot.setBindingsClause(bc); } return staticBindings; } /** * Applies the optimization to any {@link GroupNodeBase}. * * @param group a join group * @param staticBindings the list of detected static bindings * @return */ @SuppressWarnings({ "rawtypes", "unchecked", "unused" }) private void optimize( final StaticAnalysis sa, final GroupNodeBase<?> group, final StaticBindingInfo staticBindingInfo, final VariableUsageInfo ancestorVarUsageInfo, final Set<InlineTasks> inlineTasks) { if (group == null) { return; } // only process non-optional patterns if (group instanceof JoinGroupNode && (((JoinGroupNode) group).isOptional() || ((JoinGroupNode) group).isMinus())) { return; } // can't do anything here in the general case if (group instanceof UnionNode) { // Note: we may look up BIND/VALUES clauses that co-appear in both // parts of the UNION here. This is considered an edge case // though, so we omit the implementation for now. return; } /** * Collect information about which variables are used where, for later * inlining. Further, this information is required to make sure that * we do not make static bindings exogeneous for some variable ?x for * which a filter exists in the same scope or in the parent scope, * which would lead to the case that this variable is effectively * considered bound in the filter scope where it shouldn't (such cases * will be captured by rewritings in the ASTBottomUpOptimizer later on). **/ final VariableUsageInfo selfVarUsageInfo = new VariableUsageInfo(); selfVarUsageInfo.extractVarUsageInfoSelf(group); final VariableUsageInfo ancOrSelfVarUsageInfo = VariableUsageInfo.merge(ancestorVarUsageInfo, selfVarUsageInfo); /** * Compute usage info for the node's children (to be reused later). */ final VariableUsageInfo childVarUsageInfo = new VariableUsageInfo(); childVarUsageInfo.extractVarSPUsageInfoChildren(group); /** * Collect the children that introduce static bindings for later removal. */ final List<IGroupMemberNode> toRemove = new ArrayList<IGroupMemberNode>(); // ticket 933b: the prerequisite for static binding of a variable is that // there is no preceding OPTIONAL or MINUS possibly binding the variable // within the join group, since the OPTIONAL or MINUS would be logically // evaluated first final Set<IVariable<?>> optOrMinusVars = new HashSet<IVariable<?>>(); for (IGroupMemberNode child : group) { if (child instanceof AssignmentNode) { final AssignmentNode an = (AssignmentNode)child; final IValueExpression<?> ve = an.getValueExpression(); /** * We can optimize cases where (i) the value expression is a * constant that is (ii) represented through a ConstantNode. Note * that value expressions not represented through ConstantNodes * (such as, e.g., CONCAT("a", "b") in principle are amenable to * the optimization as well, but their constructed values have not * yet been joined against the dictionary and later joins might * fail. We could add this dictionary resolving step for the * constructed static bindings (i.e., the exogeneous bindings), * but this is something we'd need to do before applying the * inlining. For now, this is out of scope. Maybe a simpler to * implement strategy would be to resolve these nodes in a prior * phase and look them up, then leaving this code here unchanged. **/ if (ve instanceof IConstant && an.args().get(1) instanceof ConstantNode) { final IVariable<?> boundVar = an.getVar(); // pull out the expression to the top of the query root, if // possible (the check is necessary to avoid scoping problems // caused by bottom-up semantics, i.e. unsafe filter expressions) if (!ancOrSelfVarUsageInfo.varUsedInFilterOrAssignment(boundVar) && !optOrMinusVars.contains(boundVar)) { final IBindingSet bs = new ListBindingSet(); bs.set(boundVar, (IConstant)an.getValueExpression()); staticBindingInfo.addProduced(bs); toRemove.add(child); } // next, we inline the task; note that inlining is possible, // no matter whether we pull the bindings to the top or not final VariableUsageInfo usageInfo = VariableUsageInfo.merge( ancOrSelfVarUsageInfo, childVarUsageInfo); // add inline tasks for variable if (usageInfo.varUsed(boundVar)) { inlineTasks.add( new InlineTasks( boundVar, (IConstant) an.getValueExpression(), usageInfo.getVarUsages(boundVar))); } } // case 2: optimize in-line bindings clauses } else if (child instanceof BindingsClause) { final BindingsClause bc = (BindingsClause)child; final List<IBindingSet> bss = bc.getBindingSets(); final Set<IVariable<?>> bssVars = sa.getVarsInBindingSet(bss); boolean someVarUsedInFilterOrPrevOptOrMinus = false; for (IVariable<?> bssVar : bssVars) { someVarUsedInFilterOrPrevOptOrMinus |= ancOrSelfVarUsageInfo.varUsedInFilterOrAssignment(bssVar) || optOrMinusVars.contains(bssVar); } // in case none of the vars is used in a filter below, we can // safely pull it out if (!someVarUsedInFilterOrPrevOptOrMinus) { staticBindingInfo.addProduced(bc.getBindingSets()); toRemove.add(child); final VariableUsageInfo usageInfo = VariableUsageInfo.merge( ancOrSelfVarUsageInfo, childVarUsageInfo); } /* * In the following, we set up inline tasks for vars with unique * value in the specified binding set. */ final VariableUsageInfo usageInfo = VariableUsageInfo.merge(ancOrSelfVarUsageInfo, childVarUsageInfo); final IBindingSet[] bs = bss.toArray(new IBindingSet[bss.size()]); final Map<IVariable<?>, IConstant<?>> constantVars = SolutionSetStatserator.get(bs).getConstants(); for (IVariable<?> var : constantVars.keySet()) { if (usageInfo.varUsed(var)) { final IConstant<?> constantVal = constantVars.get(var); inlineTasks.add(new InlineTasks( var, (IConstant)constantVal,usageInfo.getVarUsages(var))); } } // case 3: optimize filter nodes inducing static bindings; not that // (unless the two cases before) FILTER nodes are not producing // any bindings, so they are not removed (but we only create // inline tasks for them, where possible). } else if (child instanceof FilterNode) { FilterNode filter = (FilterNode)child; final IValueExpressionNode vexpr = filter.getValueExpressionNode(); if(!(vexpr instanceof FunctionNode)) return; final FunctionNode functionNode = (FunctionNode) vexpr; final URI functionURI = functionNode.getFunctionURI(); // case 3.1: FILTER ?x=<http://uri> or sameTerm(?x,<http://uri>) if (functionURI.equals(FunctionRegistry.SAME_TERM) || functionURI.equals(FunctionRegistry.EQ)) { final IValueExpressionNode left = (IValueExpressionNode) functionNode.get(0); final IValueExpressionNode right = (IValueExpressionNode) functionNode.get(1); final IBindingSet bs = new ListBindingSet(); if (left instanceof VarNode && right instanceof ConstantNode) { final IV constant = ((ConstantNode) right).getValueExpression().get(); // we cannot do the inline for EQ when then constant is a literal if (functionURI.equals(FunctionRegistry.EQ) && constant.isLiteral()) return; bs.set( (IVariable)left.getValueExpression(), (IConstant)right.getValueExpression()); } else if (left instanceof ConstantNode && right instanceof VarNode) { final IV constant = ((ConstantNode) left).getValueExpression().get(); // we cannot do the inline for EQ when then constant is a literal if (functionURI.equals(FunctionRegistry.EQ) && constant.isLiteral()) return; bs.set( (IVariable)right.getValueExpression(), (IConstant)left.getValueExpression()); } /* * In case the filter describes a single static mapping, we also * schedule an inline task. */ if (!bs.isEmpty() && bs.size()==1) { final Entry<IVariable,IConstant> entry = bs.iterator().next(); final IVariable<IV> var = entry.getKey(); final IConstant<IV> val = entry.getValue(); final VariableUsageInfo usageInfo = VariableUsageInfo.merge( ancOrSelfVarUsageInfo, childVarUsageInfo); // add inline tasks for variable if (usageInfo.varUsed(var)) { inlineTasks.add( new InlineTasks(var, val, usageInfo.getVarUsages(var))); } } // just in case: can't handle, ignore // case 3.1: FILTER with unary IN expression } else if (functionURI.equals(FunctionRegistry.IN)) { final int arity = functionNode.arity(); // we're only interested in unary IN expressions if (arity==2) { final BOp varNodeCandidate = functionNode.get(0); if (varNodeCandidate instanceof VarNode) { final VarNode varNode = (VarNode)varNodeCandidate; final IVariable<IV> var = (IVariable<IV>)(varNode.getValueExpression()); final BOp valueBOp = functionNode.get(1); if (valueBOp instanceof ConstantNode) { final ConstantNode valueNode = (ConstantNode)valueBOp; final IConstant<IV> value = valueNode.getValueExpression(); if (value.get().isURI()) { final VariableUsageInfo usageInfo = VariableUsageInfo.merge( ancOrSelfVarUsageInfo, childVarUsageInfo); if (usageInfo.varUsed(var)) { inlineTasks.add(new InlineTasks( var, value, usageInfo.getVarUsages(var))); } } } } } } // else: there's nothing obvious we can do } if (child instanceof IBindingProducerNode && StaticAnalysis.isMinusOrOptional(child)) { sa.getMaybeProducedBindings( (IBindingProducerNode)child, optOrMinusVars, true); } } /** * Remove the children for which static bindings were extracted (they * were recorded in the prior iteration over the group. */ for (IGroupMemberNode node : toRemove) { while (group.removeArg(node)) { // repeat } } // recurse into the (remaining) childen for (IGroupMemberNode child : group) { if (child instanceof GroupNodeBase) { /** * Recursive application of optimization, starting out from what. */ optimize(sa, (GroupNodeBase<?>) child, staticBindingInfo, ancOrSelfVarUsageInfo, inlineTasks); } else if (child instanceof SubqueryRoot) { /** * Apply to subquery, voiding all collected information to account * for the new scope induced by the subquery. */ optimize(sa, (SubqueryRoot) child, new StaticBindingInfo(), new VariableUsageInfo(), new HashSet<InlineTasks>()); } } } /** * Helper class used to record usage of a given variable, i.e. linking * variables to constructs in which they occur. */ public static class VariableUsageInfo { /** * Map recording variable usages */ final Map<IVariable<?>,List<IQueryNode>> usageMap; /** * Constructor creating an empty object (no usages). */ public VariableUsageInfo() { usageMap = new HashMap<IVariable<?>,List<IQueryNode>>(); } /** * Returns true if a usage record for the given variable inside a FILTER * or BIND/VALUES node has been recorded. The rationale of this check is * as follows: if a variable used inside a static binding is queries in * such an ancestor filter, the binding must not be considered global. * As an example, consider the following query: * * <pre> * SELECT * WHERE { * FILTER(!bound(?x)) * { * BIND(1 AS ?x) * } * } * * Expected result is the empty set, according to bottom-up semantics. * We must *not* rewrite this query as * * SELECT * WHERE { * FILTER(!bound(?x)) * } VALUES ?x { 1 } * * The reason is that in this case the FILTER will pass, while ?x in the * original query the BIND(1 AS ?x) is in scope and the filter fails. * * The same holds for queries where the FILTER is at the same level * as the BIND/VALUES clause, such as: * * SELECT * WHERE { * FILTER(!bound(?x)) * { * BIND(1 AS ?x) * } * } * * @param var * @return */ public boolean varUsedInFilterOrAssignment(IVariable<?> var) { if (!usageMap.containsKey(var)) { return false; } final List<IQueryNode> varOccurrences = usageMap.get(var); for (int i=0; i<varOccurrences.size(); i++) { final IQueryNode n = varOccurrences.get(i); if (n instanceof FilterNode || n instanceof AssignmentNode) { return true; } } return false; // no filter occurrence detected } /** * Returns true iff usage records for the var are available. * * @param var * @return */ public boolean varUsed(IVariable<?> var) { return usageMap.containsKey(var) && !usageMap.get(var).isEmpty(); } /** * Returns the list of variable usages. * * @param var * @return */ public List<IQueryNode> getVarUsages(IVariable<?> var) { return usageMap.get(var); } /** * Gets the map recoding all usages of all variables (internal data * structure maintained by the class). * * @return */ public Map<IVariable<?>,List<IQueryNode>> getUsageMap() { return usageMap; } /** * Extracts variable usage information (variables and the nodes they * occur in), investigating FILTERs, assignment nodes, and statement * pattern nodes, but /not/ recursing into children. * * @param group the group where to extract filter var info (non-recursively) */ public void extractVarUsageInfoSelf(final GroupNodeBase<?> group) { for (IQueryNode node : group) { // Note: there's no need to handle BindingsClause here, since // a BindingsClause always binds to constant values if (node instanceof FilterNode || node instanceof AssignmentNode) { final GroupMemberValueExpressionNodeBase filter = (GroupMemberValueExpressionNodeBase) node; final IValueExpressionNode vexpr = filter.getValueExpressionNode(); extractVarUsageInfo(node, (IValueExpressionNode)vexpr); } else if (node instanceof StatementPatternNode) { final StatementPatternNode spn = (StatementPatternNode)node; for (IVariable<?> spnVar : spn.getProducedBindings()) { registerVarToChildMappingInUsageMap(spnVar, node); } // BLZG-2042: inline BIND information into property path nodes } else if (node instanceof PropertyPathNode) { extractVarUsageInfoForPropertyPathNode((PropertyPathNode)node); } } } private void extractVarUsageInfoForPropertyPathNode( final PropertyPathNode ppNode) { // cover subject and object position variable if (ppNode != null && ppNode.arity() >= 3 /* should always be true, just in case */) { final BOp subjectNode = ppNode.get(0); if (subjectNode instanceof VarNode) { registerVarToChildMappingInUsageMap(((VarNode) subjectNode).getValueExpression(), ppNode); } final BOp objectNode = ppNode.get(2); if (objectNode instanceof VarNode) { registerVarToChildMappingInUsageMap(((VarNode) objectNode).getValueExpression(), ppNode); } } // cover context position variable, if defined if (ppNode.arity() >= 4) { final BOp contextNode = ppNode.get(3); if (contextNode instanceof VarNode) { registerVarToChildMappingInUsageMap(((VarNode) contextNode).getValueExpression(), ppNode); } } } /** * Extracts variable usage information from an {@link IValueExpressionNode}. * * @param node the "parent" node which is reported back * @param the value expression node to investigate */ private void extractVarUsageInfo( final IQueryNode node, final IValueExpressionNode vexpNode) { /** * If the vexpNode is a VarNode, we're done and add it to the map * (if it has not yet been recorded before). */ if (vexpNode instanceof VarNode) { final VarNode varNode = (VarNode)vexpNode; final IVariable<?> iVar = varNode.getValueExpression(); if (!usageMap.containsKey(iVar)) { usageMap.put(iVar, new ArrayList<IQueryNode>()); } // add node to list usageMap.get(iVar).add(node); return; } /** * Otherwise, we scan for recursively nested var nodes */ final BOp nodeAsBop = (BOp)vexpNode; final int arity = nodeAsBop.arity(); for (int i=0; i<arity; i++) { final BOp child = nodeAsBop.get(i); if (child instanceof IValueExpressionNode) { // recurse extractVarUsageInfo(node, (IValueExpressionNode)child); } } } /** * Extracts usage information for the variable from statement patterns * being direct children of the current node (ignoring the node itself). * * @param group the group node base in which we perform the lookup */ @SuppressWarnings("rawtypes") public void extractVarSPUsageInfoChildren(GroupNodeBase<?> group) { for (IQueryNode child : group) { if (child instanceof GroupNodeBase) { extractVarSPUsageInfoChildrenOrSelf((GroupNodeBase)child); } } } /** * Extracts usage information for the variable inside statement patterns * from the current node itself and its children (recursively). * * Note that we do not report variable usage in filters, as those * might be not in scope and inlining is not safe (interference with * {@link ASTBottomUpOptimizer#handleFiltersWithVariablesNotInScope})!). * This maybe somewhat too strict (leading to situations where we do * not inline static variable bindings), but is safe. * * @param group */ @SuppressWarnings("rawtypes") public void extractVarSPUsageInfoChildrenOrSelf(GroupNodeBase<?> group) { if (group==null) { return; } // abort for optional patterns if (group instanceof JoinGroupNode && ((JoinGroupNode) group).isOptional()) { return; } for (IQueryNode child : group) { if (child instanceof GroupNodeBase) { extractVarSPUsageInfoChildrenOrSelf((GroupNodeBase)child); } else if (child instanceof StatementPatternNode) { final StatementPatternNode spn = (StatementPatternNode)child; for (IVariable<?> spnVar : spn.getProducedBindings()) { registerVarToChildMappingInUsageMap(spnVar,child); } } else if (child instanceof GroupNodeBase) { extractVarSPUsageInfoChildrenOrSelf((GroupNodeBase)child); // BLZG-2042: inline BIND information into property path nodes } else if (child instanceof PropertyPathNode) { extractVarUsageInfoForPropertyPathNode((PropertyPathNode)child); } } } /** * Registers the mapping between the variable var and the child in the usage map. If var is null * or the child is null, no action will be taken. * * @param var the variable * @param child the child using the variable */ private void registerVarToChildMappingInUsageMap(final IVariable<?> var, final IQueryNode child) { if (var==null) return; if (!usageMap.containsKey(var)) { usageMap.put(var, new ArrayList<IQueryNode>()); } // add node to list usageMap.get(var).add(child); } /** * Merges two {@link VariableUsageInfo}, creating a new one containing * the merged information. The original objects are not modified. */ public static VariableUsageInfo merge( final VariableUsageInfo x, final VariableUsageInfo y) { final VariableUsageInfo merged = new VariableUsageInfo(); final Map<IVariable<?>, List<IQueryNode>> usageMap = merged.getUsageMap(); final Map<IVariable<?>, List<IQueryNode>> xUsageMap = x.getUsageMap(); for (IVariable<?> var : xUsageMap.keySet()) { // make sure there's an entry for the key ... if (!usageMap.containsKey(var)) usageMap.put(var, new ArrayList<IQueryNode>()); // ... and perform merge usageMap.get(var).addAll(xUsageMap.get(var)); } final Map<IVariable<?>, List<IQueryNode>> yUsageMap = y.getUsageMap(); for (IVariable<?> var : yUsageMap.keySet()) { // make sure there's an entry for the key ... if (!usageMap.containsKey(var)) usageMap.put(var, new ArrayList<IQueryNode>()); // ... and perform merge usageMap.get(var).addAll(yUsageMap.get(var)); } return merged; } } /** * Class that helps to keep track of static bindings that have been spotted * during query analysis. The class maintains a list of "produced" static * bindings, i.e. such derived from assignment nodes etc. */ public static class StaticBindingInfo { final List<List<IBindingSet>> produced; final IBindingSet[] queryInput; /** * Constructor with empty input binding set. */ public StaticBindingInfo() { this.produced = new ArrayList<List<IBindingSet>>(); this.queryInput = new IBindingSet[] { new ListBindingSet() }; } /** * Constructor with given input binding set. */ public StaticBindingInfo(IBindingSet[] queryInput) { this.produced = new ArrayList<List<IBindingSet>>(); this.queryInput = queryInput; } public void addProduced(IBindingSet bs) { produced.add(wrap(bs)); } public void addProduced(List<IBindingSet> bsList) { produced.add(bsList); } public List<List<IBindingSet>> getProduced() { return produced; } public IBindingSet[] joinProduced() { return join(produced); } private List<IBindingSet> wrap(IBindingSet bs) { final List<IBindingSet> bsList = new ArrayList<IBindingSet>(); bsList.add(bs); return bsList; } /** * Joins the staticBindings with the queryInput binding set, returning * the resulting binding set. */ private IBindingSet[] join(List<List<IBindingSet>> staticBindings) { if (queryInput == null || queryInput.length == 0) { return queryInput; // nothing to be done } // we join everything together the statically derived bindings in a // nested loop fashion; typically, we may expect one binding, but this // may also result in multiple binding sets (or the empty binding set) // in the general case List<IBindingSet> leftBindingSets = Arrays.asList(queryInput); for (List<IBindingSet> staticBinding : staticBindings) { final List<IBindingSet> tmp = new LinkedList<IBindingSet>(); for (IBindingSet left : leftBindingSets) { final Iterator<IBindingSet> rightItr = staticBinding.iterator(); while (rightItr.hasNext()) { final IBindingSet right = rightItr.next(); final IBindingSet join = BOpContext.bind(left, right, null /* constraints */, null /* varsToKeep */); if (join != null) { tmp.add(join); } } } leftBindingSets = tmp; // prepare for next iteration } return leftBindingSets .toArray(new IBindingSet[leftBindingSets.size()]); } } /** * Task specifying the inlining opportunities for a given variable in the * form of a list of query nodes in which a statically derived value for * the given variable can be inlined. * * Can be applied by calling the {@link InlineTasks#apply()} method. */ public static class InlineTasks { @SuppressWarnings("rawtypes") final private IVariable var; @SuppressWarnings("rawtypes") final private IConstant<IV> val; /** * The nodes in which the variable can be inlined. */ final private List<IQueryNode> nodes; /** * Construct an inline task * * @param var the variable to inline * @param val the known value for the variable * @param nodes the nodes in which inlining is valid */ @SuppressWarnings("rawtypes") public InlineTasks( final IVariable var, final IConstant<IV> val, final List<IQueryNode> nodes) { this.var = var; this.nodes = nodes; this.val = val; } @SuppressWarnings("rawtypes") public IVariable getVar() { return var; } /** * Applies the {@link InlineTasks} for the variable through the given * constant to the patterns specified in the task. */ @SuppressWarnings("rawtypes") public void apply() { final IV valIV = val.get(); for (IQueryNode node : nodes) { apply(valIV, node); } } /** * Applies the {@link InlineTasks} for the class variable with the * parameter val to the given query node. * * @param val * @param node */ @SuppressWarnings("rawtypes") private void apply(final IV val, IQueryNode node) { if (node instanceof FilterNode) { final FilterNode filter = (FilterNode)node; final IValueExpressionNode vexpr = filter.getValueExpressionNode(); applyToValueExpressionNode(val, vexpr); } else if (node instanceof AssignmentNode) { applyToAssignmentNode(val, (AssignmentNode)node); } else if (node instanceof StatementPatternNode) { applyToStatementPattern(val, (StatementPatternNode)node); // BLZG-2042: inline BIND information into property path nodes } else if (node instanceof PropertyPathNode) { applyToPropertyPathNode(val, (PropertyPathNode)node); } else { // other patterns have not been recorded throw new IllegalArgumentException("Unexpected node type for " + node); } } /** * Applies the {@link InlineTasks} for the class variable with the * parameter val to the given assignment node. * * @param val * @param an */ @SuppressWarnings("rawtypes") private void applyToAssignmentNode(final IV val, final AssignmentNode an) { final IValueExpressionNode vexpr = an.getValueExpressionNode(); applyToValueExpressionNode(val, vexpr); } /** * Applies the {@link InlineTasks} for the class variable with the * parameter val to the given statement pattern node. * * @param val * @param spn */ @SuppressWarnings("rawtypes") private void applyToStatementPattern( final IV val, final StatementPatternNode spn) { final TermNode s = spn.s(); final TermNode p = spn.p(); final TermNode o = spn.o(); final TermNode c = spn.c(); if (s!=null && s instanceof VarNode && s.get(0).equals(var)) { final VarNode sVar = (VarNode)s; final ConstantNode constNode = new ConstantNode( new Constant<IV>(sVar.getValueExpression(),val)); spn.setArg(0, constNode); } if (p!=null && p instanceof VarNode && p.get(0).equals(var)) { final VarNode pVar = (VarNode)p; final ConstantNode constNode = new ConstantNode( new Constant<IV>(pVar.getValueExpression(),val)); spn.setArg(1, constNode); } if (o!=null && o instanceof VarNode && o.get(0).equals(var)) { final VarNode oVar = (VarNode)o; final ConstantNode constNode = new ConstantNode( new Constant<IV>(oVar.getValueExpression(),val)); spn.setArg(2, constNode); } if (c!=null && c instanceof VarNode && c.get(0).equals(var)) { final VarNode cVar = (VarNode)c; final ConstantNode constNode = new ConstantNode( new Constant<IV>(cVar.getValueExpression(),val)); spn.setArg(3, constNode); } } /** * Applies the {@link InlineTasks} for the class variable with the * parameter val to the given statement pattern node. * * @param val * @param spn */ @SuppressWarnings("rawtypes") private void applyToPropertyPathNode( final IV val, final PropertyPathNode ppn) { if (ppn.arity()>=3 /* should always be true, just in case */) { // cover subject variable replacement final BOp s = ppn.get(0); if (s!=null && s instanceof VarNode && s.get(0).equals(var)) { final VarNode sVar = (VarNode)s; final ConstantNode constNode = new ConstantNode( new Constant<IV>(sVar.getValueExpression(),val)); ppn.setArg(0, constNode); } // cover object variable replacement final BOp o = ppn.get(2); if (o!=null && o instanceof VarNode && o.get(0).equals(var)) { final VarNode oVar = (VarNode)o; final ConstantNode constNode = new ConstantNode( new Constant<IV>(oVar.getValueExpression(),val)); ppn.setArg(2, constNode); } } // cover context variable replacement if (ppn.arity()>=4) { final BOp c = ppn.get(3); if (c!=null && c instanceof VarNode && c.get(0).equals(var)) { final VarNode cVar = (VarNode)c; final ConstantNode constNode = new ConstantNode( new Constant<IV>(cVar.getValueExpression(),val)); ppn.setArg(3, constNode); } } } /** * Applies the {@link InlineTasks} for the class variable with the * parameter val to the value expression node statement pattern node. * * @param val * @param spn */ @SuppressWarnings("rawtypes") private void applyToValueExpressionNode( final IV val, final IValueExpressionNode vexpr) { if(vexpr==null || !(vexpr instanceof FunctionNode)) return; final FunctionNode functionNode = (FunctionNode) vexpr; for (int i=0; i<functionNode.arity(); i++) { final IValueExpressionNode cur = (IValueExpressionNode) functionNode.get(i); if (cur instanceof VarNode && ((VarNode)cur).get(0).equals(var)) { final ConstantNode constNode = new ConstantNode( new Constant<IV>(((VarNode)cur).getValueExpression(),val)); functionNode.setArg(i, constNode); } else { applyToValueExpressionNode(val, cur); // recurse } } } } }