/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 7, 2011
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.LinkedHashSet;
import java.util.Set;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.ExistsNode;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.GroupMemberValueExpressionNodeBase;
import com.bigdata.rdf.sparql.ast.IGraphPatternContainer.Annotations;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IQueryNode;
import com.bigdata.rdf.sparql.ast.IValueExpressionNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot;
import com.bigdata.rdf.sparql.ast.NotExistsNode;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryBase;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.SubqueryFunctionNodeBase;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
/**
* Optimizer identifies value expressions using EXISTS or NOT EXISTS and
* rewrites them in a form suitable for evaluation by the query engine. The main
* problem with (NOT) EXIST is that the query engine is not written to evaluate
* graph patterns within value expressions. Therefore the graph pattern is
* extracted into a subquery which must be evaluated before the FILTER may be
* evaluated.
* <p>
* Like the FILTER in which the (NOT EXISTS) appears (and like MINUS) the order
* in which graph pattern must be evaluated is determined by the same general
* principles which govern the attachment of FILTERs to joins. That is, when a
* variable on which the filter depends is bound by a required join they must
* not run before that required join. And, when a variable on which the filter
* depends is only bound by an optional join, then they must not run until after
* that optional join.
* <p>
* For (NOT) EXISTS evaluation we need to bind the outcome of the graph pattern
* (that is, whether or not the graph pattern is satisified) onto an anonymous
* variable (assigned by the SPARQL parser). It is the truth state of that
* anonymous variable which is tested by the filter. This is handled by special
* handshaking with the join in which we declare the anonymous variable to the
* join (this is the "ASK_VAR") and project only that anonymous variable out of
* the join.
* <p>
* All variables which are in scope when the (NOT) EXISTS graph pattern is
* evaluated must be projected into the subquery (since they must be visible to
* it), but the bindings produced by the subquery (other than the anonymous
* variable indicating whether or not the graph pattern "exists") must be
* discarded. Again, this is handled by projecting out only the "anonymous"
* variable and the join variables (which are bound on entry to the join and are
* used within the (NOT) EXISTS graph pattern). Other bindings are discarded.
* <p>
* The interpretation of the truth state of the variable by the FILTER provides
* the "exists" or "not exists" semantics. The hash index containing the ASK VAR
* bindings must remain visible until the corresponding FILTER has been
* evaluated.
* <p>
* Note: This rewrite must run relatively early to ensure that other optimizers
* are able to run against the graph pattern once it has been lifted out of the
* (NOT) EXISTS onto a subquery.
*
* @see ExistsNode
* @see NotExistsNode
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class ASTExistsOptimizer implements IASTOptimizer {
@Override
public QueryNodeWithBindingSet optimize(
final AST2BOpContext context, final QueryNodeWithBindingSet input) {
final IQueryNode queryNode = input.getQueryNode();
final IBindingSet[] bindingSets = input.getBindingSets();
if (!(queryNode instanceof QueryRoot))
return new QueryNodeWithBindingSet(queryNode, bindingSets);
final QueryRoot queryRoot = (QueryRoot) queryNode;
final StaticAnalysis sa = new StaticAnalysis(queryRoot, context);
final Set<IVariable<?>> exogenousVars = context.getSolutionSetStats()
.getUsedVars();
if (queryRoot.getNamedSubqueries() != null) {
for (NamedSubqueryRoot subqueryRoot : queryRoot
.getNamedSubqueries()) {
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> whereClause = subqueryRoot
.getWhereClause();
rewrite(sa, exogenousVars, subqueryRoot, whereClause);
}
}
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> whereClause = queryRoot
.getWhereClause();
if (whereClause == null) {
throw new RuntimeException("Missing where clause? : " + queryNode);
}
rewrite(sa, exogenousVars, queryRoot, whereClause);
return new QueryNodeWithBindingSet(queryRoot, bindingSets);
}
/**
* Look for FILTER.
*
* @param p
* The parent.
*/
@SuppressWarnings("unchecked")
private void rewrite(final StaticAnalysis sa,
final Set<IVariable<?>> exogenousVars,
final QueryBase query,
final GraphPatternGroup<IGroupMemberNode> p) {
final int arity = p.size();
for (int i = 0; i < arity; i++) {
final IGroupMemberNode child = (IGroupMemberNode) p.get(i);
if (child instanceof FilterNode) {
final FilterNode filter = (FilterNode) child;
/**
* BLZG-1475: there are cases where we have nested FILTER
* EXISTS or FILTER NOT EXISTS expressions; in such cases, we
* rewrite the inner expressions first
*/
final IValueExpressionNode vexp = filter.getValueExpressionNode();
if (vexp!=null) {
final Object gpGroup =
child.get(0).getProperty(Annotations.GRAPH_PATTERN, null);
if (gpGroup instanceof GraphPatternGroup) {
rewrite(sa, exogenousVars, query,
(GraphPatternGroup<IGroupMemberNode>) gpGroup);
}
}
// rewrite filter.
rewrite(sa, exogenousVars, query, p, filter, vexp);
}
if (child instanceof GraphPatternGroup<?>) {
// Recursion.
rewrite(sa, exogenousVars, query,
(GraphPatternGroup<IGroupMemberNode>) child);
}
if (child instanceof SubqueryRoot) {
// Recursion.
final SubqueryRoot subquery = (SubqueryRoot) child;
rewrite(sa, exogenousVars, subquery, subquery.getWhereClause());
}
/**
* https://jira.blazegraph.com/browse/BLZG-1267: Unable to bind result
* of EXISTS operator -> we also need to setup subqueries for value
* expression nodes in assignment nodes.
*/
if (child instanceof AssignmentNode) {
final AssignmentNode bind = (AssignmentNode)child;
/**
* BLZG-1475: there are cases where we have nested FILTER
* EXISTS or FILTER NOT EXISTS expressions; in such cases, we
* rewrite the inner expressions first
*/
final IValueExpressionNode vexp = bind.getValueExpressionNode();
if (vexp!=null) {
final Object gpGroup =
child.get(0).getProperty(Annotations.GRAPH_PATTERN, null);
if (gpGroup instanceof GraphPatternGroup) {
rewrite(sa, exogenousVars, query,
(GraphPatternGroup<IGroupMemberNode>) gpGroup);
}
}
// rewrite filter.
rewrite(sa, exogenousVars, query, p, bind, vexp);
}
}
}
/**
* Look for {@link ExistsNode} or {@link NotExistsNode} in FILTER. If we
* find such a node, we lift its group graph pattern onto the parent.
*
* @param p
* The group in which the filter was found (aka the parent).
* @param filterOrAssignment
* The FILTER or BIND node in which an {@link ExistsNode} or
* {@link NotExistsNode} might appears.
* @param ve
* Part of the value expression for that filter.
*/
private void rewrite(final StaticAnalysis sa,
final Set<IVariable<?>> exogenousVars,
final QueryBase query,
final GraphPatternGroup<IGroupMemberNode> p,
final GroupMemberValueExpressionNodeBase filterOrAssignment,
final IValueExpressionNode ve) {
if (ve instanceof SubqueryFunctionNodeBase) {
final SubqueryFunctionNodeBase subqueryFunction = (SubqueryFunctionNodeBase) ve;
final GraphPatternGroup<IGroupMemberNode> graphPattern = subqueryFunction
.getGraphPattern();
if (graphPattern != null) {
if ((subqueryFunction instanceof ExistsNode)
|| (subqueryFunction instanceof NotExistsNode)) {
final SubqueryRoot subquery = new SubqueryRoot(QueryType.ASK);
/**
* Propagate the FILTER EXISTS mode query hint to the ASK
* subquery.
*
* @see <a href="http://trac.blazegraph.com/ticket/988"> bad
* performance for FILTER EXISTS </a>
*/
subquery.setFilterExistsMode(subqueryFunction.getFilterExistsMode());
// delegate pipelined hash join annotation to subquery
final String pipelinedHashJoinHint =
filterOrAssignment.getQueryHint(QueryHints.PIPELINED_HASH_JOIN);
if (pipelinedHashJoinHint!=null) {
subquery.setQueryHint(
QueryHints.PIPELINED_HASH_JOIN, pipelinedHashJoinHint);
}
final ProjectionNode projection = new ProjectionNode();
subquery.setProjection(projection);
/*
* The anonymous variable used to communicate the outcome of
* the graph pattern.
*/
final VarNode anonVar = (VarNode) subqueryFunction.get(0);
subquery.setAskVar(anonVar.getValueExpression());
//projection.addProjectionVar((VarNode) subqueryFunction.get(0));
/*
* Anything which is visible in the scope in which the
* FILTER appears. All we need to know is anything
* exogenous, plus anything MAYBE incoming, plus anything
* MAYBE bound in the graphPattern, retaining anything used
* within the EXISTS graphPattern.
*/
final LinkedHashSet<IVariable<?>> vars = new LinkedHashSet<IVariable<?>>();
{
vars.addAll(exogenousVars);
sa.getMaybeIncomingBindings(p, vars);
sa.getMaybeProducedBindings(p, vars, true/* recursive */);
final Set<IVariable<?>> usedVars = sa
.getSpannedVariables(graphPattern,
new LinkedHashSet<IVariable<?>>());
vars.retainAll(usedVars);
}
// * Note: This may not be the best way to gather those
// * variables. First, [query] is not being passed into this
// * method in a manner which is sensitive to how we enter
// * SPARQL 1.1 subqueries nodes. Second, the order in which
// * the FILTER appears with respect to the other nodes in the
// * parent has not yet been settled, but getProjectedVars()
// * is paying attention to that order. Really,
//
// final Set<IVariable<?>> vars = sa.getProjectedVars(filter,
// graphPattern, query, exogenousVars,
// new LinkedHashSet<IVariable<?>>());
for(IVariable<?> var : vars) {
projection.addProjectionVar(new VarNode(var.getName()));
}
/*
* Note: This makes the anonymous variable appear as if it
* is used by the ASK subquery. That is important for the
* bottom up analysis, which will otherwise identify the
* anonymous variable as one which is provably not bound in
* the filter.
*/
projection.addProjectionVar(anonVar);
subquery.setWhereClause(graphPattern);
// lift the SubqueryRoot into the parent.
p.addChild(subquery);
}
}
}
final int arity = ((BOp) ve).arity();
for (int i = 0; i < arity; i++) {
final BOp child = ((BOp) ve).get(i);
if (child instanceof IValueExpressionNode) {
// Recursion.
rewrite(sa, exogenousVars, query, p, filterOrAssignment,
(IValueExpressionNode) child);
}
}
}
}