/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 10, 2011
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import org.openrdf.query.algebra.StatementPattern.Scope;
import com.bigdata.bop.IBindingSet;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet;
import com.bigdata.rdf.sparql.ast.DatasetNode;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.FunctionNode;
import com.bigdata.rdf.sparql.ast.GlobalAnnotations;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IGroupNode;
import com.bigdata.rdf.sparql.ast.IQueryNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.TermNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpJoins;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.eval.DataSetSummary;
import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
/**
* Handles a variety of special constructions related to graph graph groups.
*
* <dl>
* <dt>GRAPH ?foo</dt>
* <dd>
* Anything nested (even if a subquery) is constrained to be from
* <code>?foo</code>. All nested statement patterns must have <code>?foo</code>
* as their context, even if they occur within a subquery. (This is not true for
* a named subquery which just projects its solutions but does not inherit the
* parent's graph context. However, if we lifted the named subquery out, e.g.,
* for bottom up evaluation semantics, then we need to impose the GRAPH
* constraint on the named subquery which means running this optimizer before
* the one which lifts out the named subquery.)</dd>
* <dt>GRAPH ?foo { GRAPH ?bar } }</dt>
* <dd>The easy way to enforce this constraint when there are nested graph
* patterns is with a <code>SameTerm(?foo,?bar)</code> constraint inside of the
* nested graph pattern.
* <p>
* The problem with this is that it does not enforce the constraint as soon as
* possible under some conditions. A rewrite of the variable would have that
* effect but the rewrite needs to be aware of variable scope rules so we do not
* rewrite the variable within a subquery if it is not projected by that
* subquery. We would also have to add a BIND(?foo AS ?bar) to make ?bar visible
* in the scope of parent groups.
* <p>
* However, there is an INCLUDE problem too. That could be handled by moving the
* INCLUDE into a subgroup with a BIND to renamed the variable or by adding a
* "projection" to the INCLUDE so we could rename the variable there.
* <p>
* Since this construction of nested graph patterns is rare, and since it is
* complicated to make it more efficient, we are going with the SameTerm()
* constraint for now.</dd>
* <dt>GRAPH uri</dt>
* <dd>
* This is only allowed if the uri is in the named data set (or if no data set
* was given). Translation time error.</dd>
* <dt>GRAPH uri { ... GRAPH uri2 ... }</dt>
* <dd>It is an query error if a <code>GRAPH uri</code> is nested within another
* <code>GRAPH uri</code> for distinct IRIs.</dd>
* <dt>GRAPH uri { ... GRAPH ?foo ... }</dt>
* <dd>The outer graph imposes a constant constraint. The inner graph needs to
* inherit that constraint. Either a SameTerm() constraint must be added to the
* inner graph or context for the inner graph could be rewritten using
* Constant/2. Again, this is an optimization which may not contribute much
* value except in very rare cases. Unlike the case below, we do need to impose
* a SameTerm() constraint to make this case correct.</dd>
* <dt>GRAPH ?foo { ... GRAPH uri ... }</dt>
* <dd>If a constant is nested within a <i>non-optional</i>
* <code>GRAPH uri</code> then that constant could be lifted up and bound using
* Constant/2 on the outer graph pattern. Again, this is an optimization which
* may not contribute much value except in very rare cases. We do not need to do
* anything additional to make this case correct.</dd>
* <dt>GRAPH ?g {}</dt>
* <dd>This matches the distinct named graphs in the named graph portion of the
* data set (special case). There are several variations on this which need to
* be handled:
* <ul>
* <li>If ?g might be bound or is not bound:
* <ul>
* <li>If there is no data set, then this should be translated into
* sp(_,_,_,?g)[filter=distinct] that should be recognized and evaluated using a
* distinct term advancer on CSPO.</li>
* <li>If the named graphs are explicitly given, then annotate
* {@link StatementPatternNode} with an "IN" for <code>(?g,namedGraphs)</code>.</li>
* </ul>
* Either way, if there is a filter then apply the filter to the scan/list (this
* happens in AST2BOPUtility#toPredicate()).</li>
* <li>If <code>?g</code> is known bound coming into <code>graph ?g {}</code>
* then we want to test for the existence of at least one statement on the CSPO
* index for <code>?g</code>.This is basically ASK sp(_,_,_,uri) LIMIT 1, but we
* must run this for each binding on <code>?g</code>.</li>
* </ul>
* </dd>
* <dt>GRAPH uri {}</dt>
* <dd>This is an existence test for the graph. This is a CSPO iterator with C
* bound and a limit of one. Lift this into a named subquery since we only want
* to run it once. (This is basically ASK sp(_,_,_,uri) LIMIT 1.)</dd>
* </dl>
*
* Note: This optimizer MUST run before optimizers which lift out named
* subqueries in order to correctly impose the GRAPH constraints on the named
* subquery.
*
* @see ASTEmptyGroupOptimizer, which handles <code>{}</code> for non-GRAPH
* groups.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id: ASTEmptyGroupOptimizer.java 5177 2011-09-12 17:49:44Z
* thompsonbry $
*
* TODO If <code>?g</code> can be statically analyzed as being bound to
* a specific constant then we would rewrite <code>?g</code> using
* Constant/2 and then handle this as <code>GRAPH uri {}</code>
* <p>
* This is basically what {@link AST2BOpJoins} does when it follows the
* decision tree for named and default graphs. So, maybe that logic can
* be lifted into this class as a rewrite?
*/
public class ASTGraphGroupOptimizer implements IASTOptimizer {
// private static final Logger log = Logger
// .getLogger(ASTGraphGroupOptimizer.class);
@Override
public QueryNodeWithBindingSet optimize(
final AST2BOpContext context, final QueryNodeWithBindingSet input) {
final IQueryNode queryNode = input.getQueryNode();
final IBindingSet[] bindingSets = input.getBindingSets();
if (!(queryNode instanceof QueryRoot))
return new QueryNodeWithBindingSet(queryNode, bindingSets);
final QueryRoot queryRoot = (QueryRoot) queryNode;
// The data set node (if any).
final DatasetNode dataSet = queryRoot.getDataset();
{
// WHERE clause for named subqueries.
if (queryRoot.getNamedSubqueries() != null) {
for (NamedSubqueryRoot namedSubquery : queryRoot
.getNamedSubqueries()) {
visitGroups(context, dataSet,
namedSubquery.getWhereClause(), null/* context */);
}
}
// Top-level WHERE clause.
visitGroups(context, dataSet, queryRoot.getWhereClause(),
null/* context */);
}
return new QueryNodeWithBindingSet(queryNode, bindingSets);
}
/**
* Visit groups, applying and verifying GRAPH constraints.
* <p>
* Note: This will NOT visit stuff inside of SERVICE calls. If those graph
* patterns get rewritten it has to be by the SERVICE, not us.
* <p>
* Note: This <em>will</em> visit stuff inside of subqueries. A GRAPH
* constraint outside of a subquery applies within the subquery as well.
*
* @param context
* @param dataSet
* @param group
* @param graphContext
* @param parent
*/
@SuppressWarnings("unchecked")
private void visitGroups(
//
final IEvaluationContext context,//
final DatasetNode dataSet,//
final IGroupNode<IGroupMemberNode> group, //
TermNode graphContext//
) {
if (group instanceof JoinGroupNode && group.getContext() != null) {
final TermNode innerGraphContext = group.getContext();
if (innerGraphContext.isConstant()) {
/*
* If there is a named graphs data set, then verify that the
* given URI is a member of that data set.
*/
assertGraphInNamedDataset(
(BigdataURI) ((TermNode) innerGraphContext).getValue(),
dataSet);
}
if (graphContext == null) {
/*
* Top-most GRAPH group in this part of the query.
*/
graphContext = innerGraphContext;
} else {
/*
* There is an existing GRAPH context.
*
* Make sure the constraints are compatible and/or enforced.
*/
if (graphContext.isConstant() && innerGraphContext.isConstant()) {
/*
* GRAPH uri { ... GRAPH uri { ... } ... }
*/
assertSameURI(graphContext, innerGraphContext);
} else if (graphContext.isVariable()
&& innerGraphContext.isVariable()
&& !graphContext.equals(innerGraphContext)) {
/*
* GRAPH ?foo { ... GRAPH ?bar { ... } ... }
*
* Adds a SameTerm(foo,bar) constraint to the inner GRAPH
* pattern.
*/
final FilterNode filterNode = new FilterNode(
FunctionNode.sameTerm(graphContext,
innerGraphContext));
final GlobalAnnotations globals = new GlobalAnnotations(
context.getLexiconNamespace(),
context.getTimestamp()
);
AST2BOpUtility.toVE(context.getBOpContext(), globals,
filterNode.getValueExpressionNode());
group.addChild(filterNode);
}
/*
* TODO GRAPH ?foo { ... GRAPH uri ... } could be handled here
* (optimization, not correctness).
*/
}
/**
* Handle edge cases GRAPH ?g { } and GRAPH <uri>, which require
* special handling (if not rewritten, they will return wrong
* results in some cases).
*/
if (group.isEmpty() && graphContext.isVariable()) {
// our approach is to wrap around a dummy graph pattern with
// a distinct term scan annotation
final StatementPatternNode sp =
new StatementPatternNode(
VarNode.freshVarNode(),
VarNode.freshVarNode(),
VarNode.freshVarNode(),
graphContext, Scope.NAMED_CONTEXTS);
sp.setDistinctTermScanVar((VarNode)graphContext);
group.addChild(sp);
} else if (group.isEmpty() && graphContext.isConstant()) {
/**
* We need to verify that there is one or more stmt in that
* named graph. We do that using an ASK subquery.
*
* Note that it is *not* safe to drop the whole construct, even
* if we statically detect that the graphContext IV is not in
* the dictionary. As a counter example, consider the query
*
* SELECT * {
* GRAPH <http://uri.not.in.dictionary> { }
* }
*
* The expected result is the empty set, but when removing the
* GRAPH pattern completely, we get SELECT * WHERE {}, which
* gives the empty binding set as result. Catching cases where
* the pattern can be dropped seems not worth the effort, in
* particular considering that the ASK query for the simple
* pattern should be quite efficient anyways.
*/
final StatementPatternNode sp =
new StatementPatternNode(
VarNode.freshVarNode(),
VarNode.freshVarNode(),
VarNode.freshVarNode(),
graphContext, Scope.NAMED_CONTEXTS);
final SubqueryRoot subquery = new SubqueryRoot(QueryType.ASK);
final ProjectionNode projection = new ProjectionNode();
subquery.setProjection(projection);
subquery.addArg(new JoinGroupNode(sp));
group.addChild(sp);
}
}
/*
* Visit all direct children of this group.
*
* Note: The group might not be a GRAPH, but context will be non-null if
* the group is bounded by a GRAPH.
*/
for (IGroupMemberNode child : group) {
if (child instanceof ServiceNode) {
/*
* Do NOT translate SERVICE nodes (unless they are a bigdata
* service).
*/
continue;
}
if (graphContext != null) {
if (child instanceof StatementPatternNode) {
/*
* All statement patterns within a GRAPH {...} MUST have a
* constraint on [c] and MUST specify NAMED_CONTEXTS as
* their scope.
*/
final StatementPatternNode sp = (StatementPatternNode) child;
final Scope scope = sp.getScope();
if (scope == null) {
// This is a required annotation.
throw new AssertionError("No scope? " + sp);
}
switch (scope) {
case NAMED_CONTEXTS:
break;
case DEFAULT_CONTEXTS:
throw new AssertionError(
"Statement pattern bounded by GRAPH but has default context scope: "
+ sp);
}
if (sp.c() == null) {
/*
* Impose the context if it is missing.
*
* TODO Should it be an error if this is not bound? Who
* really has responsibility for attaching the [c]
* constraint? The code generating the SP or this code?
*/
sp.setArg(3/* c */, graphContext);
}
}
}
if (!(child instanceof IGroupNode<?>))
continue;
/*
* Recursion.
*/
visitGroups(context, dataSet, (IGroupNode<IGroupMemberNode>) child,
graphContext);
}
}
/**
* Assert that the contexts are the same URI.
*
* @param context
* @param innerContext
*/
private void assertSameURI(final TermNode context,
final TermNode innerContext) {
// GRAPH uri1 { ... GRAPH uri2 {...} ... }
if (!context.getValue().equals(innerContext.getValue())) {
// uri1 != uri2
throw new InvalidGraphContextException("Conflicting GRAPH IRIs: "
+ context + " and " + innerContext.getValue());
}
}
/**
* Assert that the given URI is in the named data set.
*
* @param uri
* A URI.
* @param dataSet
* The dataset.
*/
private void assertGraphInNamedDataset(final BigdataURI uri,
final DatasetNode dataSet) {
if (dataSet == null) {
/*
* The data set was not explicitly specified.
*/
return;
}
if (uri == null)
throw new IllegalArgumentException();
final DataSetSummary namedGraphs = dataSet.getNamedGraphs();
if (namedGraphs == null) {
/*
* No constraint on the named graphs (or just a filter, which will
* get applied at runtime).
*/
return;
}
// GRAPH uri
if (!namedGraphs.getGraphs().contains(uri.getIV())) {
// uri is not in the named graphs.
// throw new RuntimeException("URI not in named graphs: " + uri);
}
}
}