/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Oct 20, 2011 */ package com.bigdata.rdf.sparql.ast; import java.util.Arrays; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import org.apache.log4j.Logger; import com.bigdata.bop.BOp; import com.bigdata.bop.IVariable; import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext; import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager; /** * Class provides methods to determine if two {@link IJoinNode}s can join on a * shared variable and if they can join on an variable which is indirectly * shared through constraints which can be attached to that join. * <p> * Note: This is a port of com.bigdata.bop.joinGraph.PartitionedJoinGroup * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: StaticAnalysis_CanJoin.java 5378 2011-10-20 20:37:08Z * thompsonbry $ */ public abstract class StaticAnalysis_CanJoin extends StaticAnalysisBase { private static final Logger log = Logger.getLogger(StaticAnalysis.class); /** * * @param queryRoot * The root of the query. We need to have this on hand in order * to resolve {@link NamedSubqueryInclude}s during static * analysis. * @param evaluationContext * The evaluation context provides access to the * {@link ISolutionSetStats} and the {@link ISolutionSetManager} for * named solution sets. */ public StaticAnalysis_CanJoin(final QueryRoot queryRoot, final IEvaluationContext evaluationContext) { super(queryRoot, evaluationContext); } /** * Return <code>true</code> iff two predicates can join on the basis of at * least one variable which is shared directly by those predicates. Only the * operands of the predicates are considered. * <p> * Note: This method will only identify joins where the predicates directly * share at least one variable. However, joins are also possible when the * predicates share variables via one or more constraint(s). Use * {@link canJoinUsingConstraints} to identify such joins. * <p> * Note: Any two predicates may join regardless of the presence of shared * variables. However, such joins will produce the full cross product of the * binding sets selected by each predicate. As such, they should be run last * and this method will not return <code>true</code> for such predicates. * * @param p1 * A predicate. * @param p2 * Another predicate. * * @return <code>true</code> iff the predicates share at least one variable * as an operand. * * @throws IllegalArgumentException * if the two either reference is <code>null</code>. */ public boolean canJoin(final IJoinNode p1, final IJoinNode p2) { if (p1 == null) throw new IllegalArgumentException(); if (p2 == null) throw new IllegalArgumentException(); final Set<IVariable<?>> set1 = getSpannedVariables((BOp) p1, false/* filters */, new LinkedHashSet<IVariable<?>>()); final Set<IVariable<?>> set2 = getSpannedVariables((BOp) p2, false/* filters */, new LinkedHashSet<IVariable<?>>()); // The difference gives us the shared variables. set1.retainAll(set2); final boolean nothingShared = set1.isEmpty(); final boolean canJoin = !nothingShared; if (log.isDebugEnabled()) { if (!nothingShared) { log.debug("No directly shared variables: p1=" + p1 + ", p2=" + p2); } else { log.debug("Can join: sharedVars=" + set1); } } return canJoin; } /** * Return <code>true</code> iff a predicate may be used to extend a join * path on the basis of at least one variable which is shared either * directly or via one or more constraints which may be attached to the * predicate when it is added to the join path. The join path is used to * decide which variables are known to be bound, which in turn decides which * constraints may be run. Unlike the case when the variable is directly * shared between the two predicates, a join involving a constraint requires * us to know which variables are already bound so we can know when the * constraint may be attached. * <p> * Note: Use {@link StaticAnalysis#canJoin(IJoinNode, IJoinNode)} instead to * identify joins based on a variable which is directly shared. * <p> * Note: Any two predicates may join regardless of the presence of shared * variables. However, such joins will produce the full cross product of the * binding sets selected by each predicate. As such, they should be run last * and this method will not return <code>true</code> for such predicates. * * @param path * A join path containing at least one predicate. * @param vertex * A predicate which is being considered as an extension of that * join path. * @param constraints * A set of zero or more constraints (optional). Constraints are * attached dynamically once the variables which they use are * bound. Hence, a constraint will always share a variable with * any predicate to which it is attached. If any constraints are * attached to the given vertex and they share a variable which * has already been bound by the join path, then the vertex may * join with the join path even if it does not directly bind that * variable. * * @return <code>true</code> iff the vertex can join with the join path via * a shared variable. * * @throws IllegalArgumentException * if the join path is <code>null</code>. * @throws IllegalArgumentException * if the join path is empty. * @throws IllegalArgumentException * if any element in the join path is <code>null</code>. * @throws IllegalArgumentException * if the vertex is <code>null</code>. * @throws IllegalArgumentException * if the vertex is already part of the join path. * @throws IllegalArgumentException * if any element in the optional constraints array is * <code>null</code>. */ public boolean canJoinUsingConstraints(final IJoinNode[] path, final IJoinNode vertex, final FilterNode[] constraints) { /* * Check arguments. */ if (path == null) throw new IllegalArgumentException(); if (vertex == null) throw new IllegalArgumentException(); // constraints MAY be null. if (path.length == 0) throw new IllegalArgumentException(); { for (IJoinNode p : path) { if (p == null) throw new IllegalArgumentException(); if (vertex == p) throw new IllegalArgumentException(); } } /* * Find the set of variables which are known to be bound because they * are referenced as operands of the predicates in the join path. */ final Set<IVariable<?>> knownBound = new LinkedHashSet<IVariable<?>>(); { for (IJoinNode p : path) { getSpannedVariables((BOp) p, false/* filters */, knownBound); } } /* * If the given predicate directly shares a variable with any of the * predicates in the join path, then we can return immediately. */ { final Set<IVariable<?>> vset = getSpannedVariables((BOp) vertex, new LinkedHashSet<IVariable<?>>()); vset.retainAll(knownBound); if (!vset.isEmpty()) { if (log.isDebugEnabled()) log.debug("Can join: sharedVars=" + vset + ", path=" + Arrays.toString(path) + ", vertex=" + vertex); return true; } } if (constraints == null) { // No opportunity for a constraint based join. if (log.isDebugEnabled()) log.debug("No directly shared variable: path=" + Arrays.toString(path) + ", vertex=" + vertex); return false; } /* * Find the set of constraints which can run with the vertex given the * join path. */ { // Extend the new join path. final IJoinNode[] newPath = new IJoinNode[path.length + 1]; System.arraycopy(path/* src */, 0/* srcPos */, newPath/* dest */, 0/* destPos */, path.length); newPath[path.length] = vertex; /* * Find the constraints that will run with each vertex of the new * join path. */ final FilterNode[][] constraintRunArray = getJoinGraphConstraints( newPath, constraints, null/*knownBound*/, true/*pathIsComplete*/ ); /* * Consider only the constraints attached to the last vertex in the * new join path. All of their variables will be bound since (by * definition) a constraint may not run until its variables are * bound. If any of the constraints attached to that last share any * variables which were already known to be bound in the caller's * join path, then the vertex can join (without of necessity being a * full cross product join). */ final FilterNode[] vertexConstraints = constraintRunArray[path.length]; for (FilterNode c : vertexConstraints) { // consider all variables spanned by the constraint. final Set<IVariable<?>> vset = getSpannedVariables(c, true/* filters */, new LinkedHashSet<IVariable<?>>()); vset.retainAll(knownBound); if (!vset.isEmpty()) { if (log.isDebugEnabled()) log.debug("Can join: sharedVars=" + vset + ", path=" + Arrays.toString(path) + ", vertex=" + vertex + ", constraint=" + c); return true; } } } if (log.isDebugEnabled()) log.debug("No shared variable: path=" + Arrays.toString(path) + ", vertex=" + vertex + ", constraints=" + Arrays.toString(constraints)); return false; } /** * Given a join path, return the set of constraints to be associated with * each join in that join path. Only those constraints whose variables are * known to be bound will be attached. * * @param path * The join path. * @param joinGraphConstraints * The constraints to be applied to the join path (optional). * @param knownBoundVars * Variables that are known to be bound as inputs to this join * graph (parent queries). * @param pathIsComplete * <code>true</code> iff the <i>path</i> represents a complete * join path. When <code>true</code>, any constraints which have * not already been attached will be attached to the last predicate * in the join path. * * @return The constraints to be paired with each element of the join path. * * @throws IllegalArgumentException * if the join path is <code>null</code>. * @throws IllegalArgumentException * if the join path is empty. * @throws IllegalArgumentException * if any element of the join path is <code>null</code>. * @throws IllegalArgumentException * if any element of the join graph constraints is * <code>null</code>. */ public FilterNode[][] getJoinGraphConstraints( final IJoinNode[] path,// final FilterNode[] joinGraphConstraints,// Set<IVariable<?>> boundVars,// final boolean pathIsComplete// ) { if (path == null) throw new IllegalArgumentException(); if (path.length == 0) throw new IllegalArgumentException(); if (boundVars == null) boundVars = new LinkedHashSet<IVariable<?>>(); // the set of constraints for each predicate in the join path. final FilterNode[][] ret = new FilterNode[path.length][]; /* * For each predicate in the path in the given order, figure out which * constraint(s) would attach to that predicate based on which variables * first become bound with that predicate. For the last predicate in the * given join path, we return that set of constraints. */ // the set of constraints which have been consumed. final Set<FilterNode> used = new LinkedHashSet<FilterNode>(); for (int i = 0; i < path.length; i++) { // a predicate in the path. final IJoinNode p = path[i]; if (p == null) throw new IllegalArgumentException(); // the constraints for the current predicate in the join path. final List<FilterNode> constraints = new LinkedList<FilterNode>(); // /* // * Visit the variables used by the predicate (and bound by it since // * it is not an optional predicate) and add them into the total set // * of variables which are bound at this point in the join path. // */ // getSpannedVariables((BOp) p, boundVars); // above does not work if p is a Union nor, I suspect, a Minus - jjc // tring this next line as an alternative - jjc. getDefinitelyProducedBindings(p, boundVars, true); if (joinGraphConstraints != null) { // consider each constraint. for (FilterNode c : joinGraphConstraints) { if (c == null) throw new IllegalArgumentException(); if (used.contains(c)) { /* * Skip constraints which were already assigned to * predicates before this one in the join path. */ continue; } boolean attach = false; if (pathIsComplete && i == path.length - 1) { // attach all unused constraints to last predicate attach = true; } else { /* * true iff all variables used by this constraint are * bound at this point in the join path. */ final boolean allVarsBound; // all variables used by the constraint. final Set<IVariable<?>> vset = getSpannedVariables(c, true/* filters */, new LinkedHashSet<IVariable<?>>()); // remove all bound variables. vset.removeAll(boundVars); // the constraint can be attached if [vset] is empty. allVarsBound = vset.isEmpty(); attach = allVarsBound; } if (attach) { /* * All variables have become bound for this constraint, * so add it to the set of "used" constraints. */ used.add(c); if (log.isDebugEnabled()) { log.debug("Constraint attached at index " + i + " of " + path.length + ", constraint=" + c); } constraints.add(c); } // if(allVarsBound) } // next constraint } // joinGraphConstraints != null; // store the constraint[] for that predicate. ret[i] = constraints.toArray(new FilterNode[constraints.size()]); } // next predicate in the join path. /* * Return the set of constraints associated with each predicate in the * join path. */ return ret; } /** * Return the set of variables which MUST be bound for solutions after the * evaluation of this group. A group will produce "MUST" bindings for * variables from its statement patterns and a LET based on an expression * whose variables are known bound. * <p> * The returned collection reflects "bottom-up" evaluation semantics. This * method does NOT consider variables which are already bound on entry to * the group. * <p> * Note: When invoked for an OPTIONAL or MINUS join group, the variables * which would become bound during the evaluation of the join group are * reported. Caller's who wish to NOT have variables reported for OPTIONAL * or MINUS groups MUST NOT invoke this method for those groups. * <p> * Note: The recursive analysis does not throw out variables when part of * the tree will provably fail to bind anything. It is the role of query * optimizers to identify those situations and prune the AST appropriately. * <p> * The class hierarchy is a little untidy at this point. * This method is defined in the only subclass of this abstract class. * Initially it was thought to not be needed here. * * @param node * The node to be analyzed. * @param vars * Where to store the "MUST" bound variables. * @param recursive * When <code>true</code>, the child groups will be recursively * analyzed. When <code>false</code>, only <i>this</i> group will * be analyzed. * * @return The argument. */ public abstract Set<IVariable<?>> getDefinitelyProducedBindings( final IBindingProducerNode node, final Set<IVariable<?>> vars, final boolean recursive); }