/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 10, 2011
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.openrdf.model.URI;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IValueExpression;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.BindingsClause;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IJoinNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude;
import com.bigdata.rdf.sparql.ast.PropertyPathUnionNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.ZeroLengthPathNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.service.ServiceFactory;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.service.ServiceRegistry;
/**
* This optimizer simply puts each type of {@link IGroupMemberNode} within a
* {@link JoinGroupNode} in the right order with respect to the other types.
*
* TODO TEST SUITE!
*
* This optimizer is deprecated. It can enabled using the query hint
* {@link QueryHints#DEFAULT_OLD_JOIN_ORDER_OPTIMIZER}. The new optimizer
* replacing this one is the {@link ASTJoinGroupOrderOptimizer}.
*/
@Deprecated
public class ASTJoinOrderByTypeOptimizer extends AbstractJoinGroupOptimizer
implements IASTOptimizer {
// private static final Logger log = Logger
// .getLogger(ASTJoinOrderByTypeOptimizer.class);
// @Override
// public IQueryNode optimize(AST2BOpContext context, IQueryNode queryNode,
// IBindingSet[] bindingSets) {
//
// if (!(queryNode instanceof QueryRoot))
// return queryNode;
//
// final QueryRoot queryRoot = (QueryRoot) queryNode;
//
// final StaticAnalysis sa = new StaticAnalysis(queryRoot, context);
//
// // Main WHERE clause
// {
//
// @SuppressWarnings("unchecked")
// final GraphPatternGroup<IGroupMemberNode> whereClause =
// (GraphPatternGroup<IGroupMemberNode>) queryRoot.getWhereClause();
//
// if (whereClause != null) {
//
// optimize(context, sa, whereClause);
//
// }
//
// }
//
// // Named subqueries
// if (queryRoot.getNamedSubqueries() != null) {
//
// final NamedSubqueriesNode namedSubqueries = queryRoot
// .getNamedSubqueries();
//
// /*
// * Note: This loop uses the current size() and get(i) to avoid
// * problems with concurrent modification during visitation.
// */
// for (NamedSubqueryRoot namedSubquery : namedSubqueries) {
//
// @SuppressWarnings("unchecked")
// final GraphPatternGroup<IGroupMemberNode> whereClause =
// (GraphPatternGroup<IGroupMemberNode>) namedSubquery.getWhereClause();
//
// if (whereClause != null) {
//
// optimize(context, sa, whereClause);
//
// }
//
// }
//
// }
//
// // log.error("\nafter rewrite:\n" + queryNode);
//
// return queryNode;
//
// }
//
// private void optimize(final IEvaluationContext ctx, final StaticAnalysis sa,
// final GraphPatternGroup<?> op) {
//
// if (op instanceof JoinGroupNode) {
//
// final JoinGroupNode joinGroup = (JoinGroupNode) op;
//
// if (ASTStaticJoinOptimizer.isStaticOptimizer(ctx, joinGroup)) {
//
// doOrderByType(ctx, joinGroup, sa);
//
// }
//
// } // is JoinGroupNode
//
// /*
// * Recursion, but only into group nodes (including within subqueries).
// */
// final int arity = op.arity();
//
// for (int i = 0; i < arity; i++) {
//
// final BOp child = op.get(i);
//
// if (child instanceof GraphPatternGroup<?>) {
//
// @SuppressWarnings("unchecked")
// final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) child;
//
// optimize(ctx, sa, childGroup);
//
// } else if (child instanceof QueryBase) {
//
// final QueryBase subquery = (QueryBase) child;
//
// @SuppressWarnings("unchecked")
// final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) subquery
// .getWhereClause();
//
// optimize(ctx, sa, childGroup);
//
// }
//
// }
//
// }
/**
* Get the group member nodes into the right order:
* <pre>
* 1. Pre-filters
* 2. In-filters
* x. Assignments with a constant
*
* Required joins:
*
* 3. Some Service calls (e.g. Bigdata SEARCH)
* 4. Subquery-includes
* 5. Statement patterns
* 7. Sparql11 subqueries
* 8. Non-optional subgroups
* 9. Other service calls
*
* TODO: the placement of OPTIONALS should really be more complicated than this.
* e.g. consider interaction with SERVICE calls etc.
* Optional joins:
* 10. Simple optionals & optional subgroups
*
* 11. Assignments
* 12. Post-conditionals
*
* </pre>
* Most of this logic was lifted out of {@link AST2BOpUtility}.
* <p>
* Note: Join filters are now attached to {@link IJoinNode}s.
*/
@Override
protected void optimizeJoinGroup(final AST2BOpContext ctx,
final StaticAnalysis sa, final IBindingSet[] bSets, final JoinGroupNode joinGroup) {
if (!ASTStaticJoinOptimizer.isStaticOptimizer(ctx, joinGroup))
return;
final List<IGroupMemberNode> ordered = new LinkedList<IGroupMemberNode>();
final List<AssignmentNode> assignments = joinGroup.getAssignments();
final List<ServiceNode> serviceNodes = joinGroup.getServiceNodes();
final List<SubqueryRoot> askSubqueries = new LinkedList<SubqueryRoot>();
for (BindingsClause values : joinGroup.getChildren(BindingsClause.class)) {
ordered.add(values);
}
/*
* Assignments for a constant.
*
* Note: This supports query engines which use BIND() to convey
* a binding into a remote SPARQL end point (openrdf does this).
* For example, see their service09 test.
*/
{
final Iterator<AssignmentNode> aitr = assignments.iterator();
while (aitr.hasNext()) {
final AssignmentNode n = aitr.next();
@SuppressWarnings("rawtypes")
final IValueExpression<? extends IV> valExpr = n
.getValueExpression();
if (valExpr instanceof IConstant) {
ordered.add(n);
aitr.remove();
}
}
}
/*
* Add the pre-conditionals to the pipeline.
*
* TODO These filters should be lifted into the parent group (by a
* rewrite rule) so we can avoid starting a subquery only to have it
* failed by a filter. We will do less work if we fail the solution in
* the parent group.
*/
for (IGroupMemberNode n : sa.getPreFilters(joinGroup)) {
ordered.add(n);
}
/*
* FIXME We need to move away from the DataSetJoin class and replace it
* with an IPredicate to which we have attached an inline access path.
* That transformation needs to happen in a rewrite rule, which means
* that we will wind up removing the IN filter and replacing it with an
* AST node for that inline AP (something conceptually similar to a
* statement pattern but for a column projection of the variable for the
* IN expression). That way we do not have to magically "subtract" the
* known "IN" filters out of the join- and post- filters.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/233 (Replace
* DataSetJoin with an "inline" access path.)
*
* @see JoinGroupNode#getInFilters()
*/
for (IGroupMemberNode n : joinGroup.getInFilters()) {
ordered.add(n);
}
/*
* Required joins and non-optional subqueries.
*
* Note: SPARQL 1.1 style subqueries are currently always pipelined.
* Like named subquery includes, they are also never optional. However,
* there is no a-priori reason why we should run pipelined subqueries
* before named subquery includes and, really, no reason why we can not
* mix these with the required joins (above). I believe that this is
* being done solely for expediency (because the static query optimizer
* can not handle it).
*
* Also, note that named subquery includes are hash joins. We have an
* index. If the operator supported cutoff evaluation then we could
* easily reorder them with the other required joins using the RTO.
*
* Ditto for pipelined SPARQL 1.1 subquery. If it supported cutoff
* evaluation, then the RTO could reorder them with the required joins.
* This is even true when the subquery uses GROUP BY or ORDER BY, which
* imply the use of at once operators. While we must fully materialize
* the solutions for each evaluation of the subquery, the evaluation is
* based on the as-bound solutions flowing into the subquery. If the
* subquery is unselective, then clearly this will be painful and it
* might be better to lift such unselective subqueries into named
* subqueries in order to obtain a hash index over the entire subquery
* solution set when evaluated with an empty source binding set.
*
* Note: This logic was originally constructed before we had required
* joins other than on a statement pattern. This shaped how the FILTERs
* were attached and how the materialization pipeline was generated in
* order to have materialized RDF Values on hand for those FILTERs.
*
* We now have several kinds of required joins: pipelined statement
* pattern joins, SPARQL 1.1 subquery, named subquery include, subquery
* hash joins (when the subquery is optional), service call joins, etc.
*
* FIXME The code currently only handles the FILTER attachment and
* materialization pipeline for the required statement pattern joins.
* However, for efficiency, FILTERs MUST be attached to these joins as
* appropriate for ALL CASES and variables MUST be materialized as
* required for those filters to run.
*
* FIXME All of these joins can be reordered by either static analysis
* of cardinality (which has not been extended to handle this yet) or by
* the RTO. The filter attachment decisions (and the materialization
* pipeline generation) needs to be deferred until we actually evaluate
* the join graph (at least for the RTO).
*/
{ // begin required joins.
/*
* Run some service calls first (or as early as possible) and
* schedule service calls to be run last
*/
{
final Iterator<ServiceNode> sitr = serviceNodes.iterator();
while (sitr.hasNext()) {
final ServiceNode n = sitr.next();
if (n.getServiceRef().isConstant()) {
final URI serviceURI = (URI) n.getServiceRef()
.getValue();
final ServiceFactory f = ServiceRegistry.getInstance()
.get(serviceURI);
if (f!=null) {
/**
* Queue services in the beginning or in the end.
* Note that the query hint can be used to override
* the service defaults.
*/
if (f.getServiceOptions().isRunFirst()) {
ordered.add(n);
sitr.remove();
}
}
}
}
}
/*
* Add joins against named solution sets from WITH AS INCLUDE style
* subqueries for which there are NO join variables. Such includes
* will be a cross product so we want to run them as early as
* possible.
*
* Note: This corresponds to a very common use case where the named
* subquery is used to constrain the remainder of the join group.
*
* Note: If there ARE join variables then the named subquery include
* MUST NOT be run until after the join variables have been bound.
* Failure to observe this rule will cause the unbound variable to
* be included when computing the hash code of a solution and the
* join will not produce the correct solutions. [If it is desired to
* always run named subqueries first then you need to make sure that
* the join variables array is empty for the INCLUDE.]
*/
for (IGroupMemberNode child : joinGroup) {
if (child instanceof NamedSubqueryInclude) {
ordered.add(child);
}
}
/*
* Add required statement pattern joins, the filters on those
* joins, and property path stuff.
*
* Note: This winds up handling materialization steps as well (it
* calls through to Rule2BOpUtility).
*/
for (IGroupMemberNode child : joinGroup) {
if (child instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) child;
if (!sp.isOptional()) {
ordered.add(child);
}
} else if (child instanceof ArbitraryLengthPathNode ||
child instanceof ZeroLengthPathNode ||
child instanceof PropertyPathUnionNode) {
ordered.add(child);
}
}
/*
* TODO Why is this here?!? It should either be empty or run
* after the last required join, right?
*/
for (IGroupMemberNode n : sa.getJoinFilters(joinGroup)) {
ordered.add(n);
}
/*
* Add SPARQL 1.1 style subqueries which were not lifted out into
* named subqueries.
*/
for (IGroupMemberNode child : joinGroup) {
if (child instanceof SubqueryRoot) {
final SubqueryRoot subquery = (SubqueryRoot) child;
if (subquery.getQueryType() == QueryType.ASK) {
/**
* ASK subqueries are used for FILTER EXISTS and FILTER
* NOT EXISTS. They can not be run before the required
* join groups.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/515">
* Query with two "FILTER NOT EXISTS" expressions
* returns no results</a>
*/
askSubqueries.add(subquery);
continue;
}
ordered.add(child);
}
}
/*
* Do the non-optional sub-groups (Join groups and UNION).
*/
for (IGroupMemberNode child : joinGroup) {
if (!(child instanceof GraphPatternGroup<?>)) {
continue;
}
@SuppressWarnings("unchecked")
final GraphPatternGroup<?> subgroup = (GraphPatternGroup<?>) child;
if (subgroup.isOptional()) {
continue;
}
if (subgroup instanceof PropertyPathUnionNode) {
continue;
}
ordered.add(subgroup);
}
/*
* Run services which have constant URIs next.
*
* TODO These could be ordered by the #of unbound variables
* or some such. Simple triple patterns for which we can use
* ESTCARD could be ordered more precisely.
*/
{
final Iterator<ServiceNode> sitr = serviceNodes
.iterator();
while (sitr.hasNext()) {
final ServiceNode n = sitr.next();
if (!n.getServiceRef().isConstant())
continue;
sitr.remove();
ordered.add(n);
}
}
/*
* Run remaining service calls (those with a variable
* service ref that have not been scheduled as run first or run
* last).
*/
for (ServiceNode n : serviceNodes) {
ordered.add(n);
}
} // end of required joins.
// /*
// * Add the subqueries (individual optional statement patterns, optional
// * join groups, and nested union).
// */
/**
* Run the ASK subqueries (FILTER EXISTS, FILTER NOT EXISTS).
*
* TODO This should be (I think) a permissible placement for the ASK
* subqueries. However, we might still run into problems if FILTER (NOT)
* EXISTS is run for a variable which is only bound by an OPTIONAL.
*
* TODO There could also be a problem with the ordering of MINUS. Both
* FILTER (NOT) EXISTS and MINUS need further inspection of the
* constraints on when they may be evaluated, both in terms of
* efficiency and correctness. I believe that the correct constraint for
* FILTER (NOT) EXISTS is simply that for FILTER attachment: That is
* (a)for variables bound by required joins, no sooner than their filter
* variables are either known to be bound; and (b) for variables only
* bound by OPTIONALS, not until after the last point at which they
* MIGHT be bound.
*
* Note: While that while the change for ticket 515 fixes that query, it
* is possible that we still could get bad join orderings when the
* variables used by the filter are only bound by OPTIONAL joins. It is
* also possible that we could run the ASK subquery for FILTER (NOT)
* EXISTS earlier if the filter variables are bound by required joins.
* This is really identical to the join filter attachment problem. The
* problem in the AST is that both the ASK subquery and the FILTER are
* present. It seems that the best solution would be to attach the ASK
* subquery to the FILTER and then to run it immediately before the
* FILTER, letting the existing filter attachment logic decide where to
* place the filter. We would also have to make sure that the FILTER was
* never attached to a JOIN since the ASK subquery would have to be run
* before the FILTER was evaluated.
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/515">
* Query with two "FILTER NOT EXISTS" expressions returns no
* results</a>
*/
for (SubqueryRoot askSubquery : askSubqueries) {
ordered.add(askSubquery);
}
// /*
// * Next do the property paths.
// */
// for (PropertyPathNode pathNode : joinGroup.getChildren(PropertyPathNode.class)) {
//
// ordered.add(pathNode);
//
// }
/*
* Next do the optional sub-groups.
*/
for (IGroupMemberNode child : joinGroup) {
if (child instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) child;
if (sp.isOptional()) {
/*
* ASTSimpleOptionalOptimizer will recognize and lift out
* simple optionals into the parent join group. A simple
* optional is basically a single a statement pattern in an
* optional join group. If there were any FILTERs in the
* simple optional join group, then they were lifted out as
* well and attached to this StatementPatternNode. Such
* FILTER(s) MUST NOT have materialization requirements for
* variables which were not already bound before the
* optional JOIN on this statement pattern.
*/
ordered.add(sp);
}
}
if (!(child instanceof GraphPatternGroup<?>)) {
continue;
}
@SuppressWarnings("unchecked")
final GraphPatternGroup<?> subgroup = (GraphPatternGroup<?>) child;
if (!subgroup.isOptional()) {
continue;
}
if (subgroup instanceof PropertyPathUnionNode) {
continue;
}
ordered.add(subgroup);
}
/*
* Add the LET assignments to the pipeline.
*/
for (AssignmentNode n : assignments) {
ordered.add(n);
}
/*
* Add the post-conditionals to the pipeline.
*/
for (IGroupMemberNode n : sa.getPostFilters(joinGroup)) {
ordered.add(n);
}
final int arity = joinGroup.arity();
if (ordered.size() != arity) {
throw new AssertionError("should not be pruning any children");
}
// Replace the children with those in the [ordered] list.
for (int i = 0; i < arity; i++) {
joinGroup.setArg(i, (BOp) ordered.get(i));
}
}
}