/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 10, 2011
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.IBindingProducerNode;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IJoinNode;
import com.bigdata.rdf.sparql.ast.IQueryNode;
import com.bigdata.rdf.sparql.ast.IReorderableNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueriesNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot;
import com.bigdata.rdf.sparql.ast.QueryBase;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet;
import com.bigdata.rdf.sparql.ast.QueryOptimizerEnum;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.UnionNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext;
/**
* This is an AST optimizer port of the old "static" optimizer -
* {@link DefaultEvaluationPlan2}. This optimizer uses range counts and simple
* shared variable heuristics to order the statement patterns within a particular
* join group. This optimizer extends the old static optimizer in that child
* join groups consider the ordering of statement patterns in their parent and
* ancestors when deciding how their own order.
* <p>
* We want to optimize all {@link JoinGroupNode}s recursively, from the top down.
* This is because the join group needs to take into account its ancestral join
* ordering when deciding its own join ordering. A statement pattern with a
* shared variable with the ancestral groups should be preferred over one with
* no shared variables.
*/
public class ASTStaticJoinOptimizer implements IASTOptimizer {
public static final Logger log = Logger
.getLogger(ASTStaticJoinOptimizer.class);
public interface Annotations extends AST2BOpBase.Annotations {
/**
* The value of this query hint determines how optimistic the optimizer
* will be in selecting the join cardinality for its joins. Basically
* when there is a join that has both shared and unshared variables, the
* join cardinality will be somewhere in between the cardinality of the
* two sides (range count for a statement pattern, previous join
* cardinality for a predecessor join). The default value is
* <code>1.0</code>, which is the historical behavior.
* <p>
* For a value of <code>0.67</code> the optimizer takes a mostly
* optimistic view by default - the join cardinality will be <code>0.67
* * the MIN + 0.33 * the MAX</code>. This settting will eliminate some
* of the worst possible outcomes (ones where we guess wrong and get a
* very bad join order as a result).
* <p>
* BSBM BI Q1 is a good example of a query that benefits from the
* pessimistic approach, and LUBM Q2 is a good example of a query that
* benefits from the optimistic approach.
*/
String OPTIMISTIC = ASTStaticJoinOptimizer.class.getName()+".optimistic";
/**
* See {@link #OPTIMISTIC}.
*/
Double DEFAULT_OPTIMISTIC = 1.0d;
}
/**
* Return the exogenous bindings.
* <p>
* Note: This is considering only a single exogenous solution. It can not
* really use more than one solution to estimate the range counts unless it
* does the sum across all exogenous solutions and then somehow combines
* that information in order to make a decision on a single query plan which
* is "best" overall for those solutions.
* <p>
* This takes the simplifying assumption that each solution will have the
* same pattern of bindings. This is not true of necessity, but it will be
* true (for example) if the BINDINGS are from the openrdf API (just one
* exogenous solution) or if the BINDINGS are being sent with a SERVICE call
* and were generated by some pattern of non-optional JOINs.
* <p>
* This can get things wrong if there are variables which are only bound in
* some of the solutions. The RTO is insensitive to that because it will
* feed all source solutions into the first cutoff joins and thus capture
* the estimated costs for the data, the query, and the source bindings.
*
* @param bindingSets
* The given solutions (optional).
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/412
* (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
*/
static private IBindingSet getExogenousBindings(
final IBindingSet[] bindingSets) {
if (bindingSets == null || bindingSets.length == 0)
return null;
return bindingSets[0];
}
@Override
public QueryNodeWithBindingSet optimize(
final AST2BOpContext context, final QueryNodeWithBindingSet input) {
final IQueryNode queryNode = input.getQueryNode();
final IBindingSet[] bindingSets = input.getBindingSets();
// {
//
// final QueryOptimizerEnum optimizer =
// context == null || context.queryHints == null
// ? QueryOptimizerEnum.Static
// : QueryOptimizerEnum.valueOf(context.queryHints.getProperty(
// QueryHints.OPTIMIZER, QueryOptimizerEnum.Static
// .toString()));
//
// if (optimizer != QueryOptimizerEnum.Static)
// return queryNode;
//
// }
if (!(queryNode instanceof QueryRoot))
return new QueryNodeWithBindingSet(queryNode, bindingSets);
if (log.isDebugEnabled()) {
log.debug("before:\n"+queryNode);
}
final QueryRoot queryRoot = (QueryRoot) queryNode;
final IBindingSet exogenousBindings = getExogenousBindings(bindingSets);
// Named subqueries
if (queryRoot.getNamedSubqueries() != null) {
final NamedSubqueriesNode namedSubqueries = queryRoot
.getNamedSubqueries();
/*
* Note: This loop uses the current size() and get(i) to avoid
* problems with concurrent modification during visitation.
*/
for (NamedSubqueryRoot namedSubquery : namedSubqueries) {
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> whereClause =
(GraphPatternGroup<IGroupMemberNode>) namedSubquery.getWhereClause();
if (whereClause != null) {
optimize(context, exogenousBindings, queryRoot, new IJoinNode[] { }, whereClause);
}
}
}
// Main WHERE clause
{
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> whereClause =
(GraphPatternGroup<IGroupMemberNode>) queryRoot.getWhereClause();
if (whereClause != null) {
optimize(context, exogenousBindings, queryRoot, new IJoinNode[] { }, whereClause);
}
}
// log.error("\nafter rewrite:\n" + queryNode);
if (log.isDebugEnabled()) {
log.debug("after:\n"+queryNode);
}
return new QueryNodeWithBindingSet(queryNode, bindingSets);
}
/**
* Return <code>true</code> if the static join optimizer is enabled for the
* given join group.
*/
static boolean isStaticOptimizer(final IEvaluationContext context,
final JoinGroupNode joinGroup) {
return QueryOptimizerEnum.Static.equals(joinGroup.getQueryOptimizer());
// QueryOptimizerEnum optimizer = null;
//
// if (joinGroup.getProperty(QueryHints.OPTIMIZER) != null) {
//
//// optimizer = QueryOptimizerEnum.valueOf(joinGroup
//// .getQueryHint(QueryHints.OPTIMIZER));
// optimizer = (QueryOptimizerEnum) joinGroup
// .getProperty(QueryHints.OPTIMIZER);
//
// return optimizer == QueryOptimizerEnum.Static;
//
//// } else {
////
//// optimizer = context == null || context.queryHints == null ? QueryOptimizerEnum.Static
//// : QueryOptimizerEnum.valueOf(context.queryHints
//// .getProperty(QueryHints.OPTIMIZER,
//// QueryOptimizerEnum.Static.toString()));
//
// }
//
//// return optimizer == QueryOptimizerEnum.Static;
// return QueryHints.DEFAULT_OPTIMIZER == QueryOptimizerEnum.Static;
}
abstract private class GroupNodeOptimizer<T extends GraphPatternGroup<?>> {
final T op;
final AST2BOpContext ctx;
private final IBindingSet exogenousBindings;
final QueryRoot queryRoot;
public GroupNodeOptimizer(AST2BOpContext ctx,
IBindingSet exogenousBindings, QueryRoot queryRoot,
IBindingProducerNode[] ancestry, T op) {
this.op = op;
this.ctx = ctx;
this.exogenousBindings = exogenousBindings;
this.queryRoot = queryRoot;
}
public void optimizex() {
optimizeThisLevel();
optimizeRecursively();
}
private void optimizeRecursively() {
/*
* Recursion, but only into group nodes (including within subqueries).
*/
for (int i = 0; i < op.arity(); i++) {
final BOp child = op.get(i);
if (child instanceof GraphPatternGroup<?>) {
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) child;
optimize(ctx, exogenousBindings, queryRoot, getAncestry(), childGroup);
} else if (child instanceof QueryBase) {
final QueryBase subquery = (QueryBase) child;
@SuppressWarnings("unchecked")
final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) subquery
.getWhereClause();
/*
* Only the projected variables are in scope in the subquery.
*/
final Set<IVariable<?>> projectedVars = subquery
.getProjectedVars(new LinkedHashSet<IVariable<?>>());
final IVariable<?>[] variablesToKeep = BOpUtility
.toArray(projectedVars.iterator());
final IBindingSet tmp = exogenousBindings == null ? null
: exogenousBindings.copy(variablesToKeep);
/**
* See https://jira.blazegraph.com/browse/BLZG-1817:
*
* In the normal case, we pass in the current ancestry. There is one
* exception to this rule though: whenever we're recursing into complex
* subquery roots that will be translated into named subquery includes,
* there is no ancestry at all (named subquery includes will be evaluated
* independently, bottom up). This is, for instance, the case for subqueries
* with aggregation or slices.
*/
final boolean voidAncestry =
subquery instanceof SubqueryRoot &&
ASTSparql11SubqueryOptimizer.needsLifting((SubqueryRoot)subquery);
final IBindingProducerNode[] ancestry =
voidAncestry ? new IBindingProducerNode[0] : getAncestry();
optimize(ctx, tmp, queryRoot, ancestry, childGroup);
}
afterOptimizingChild(child);
}
}
abstract void afterOptimizingChild(BOp child);
abstract IBindingProducerNode[] getAncestry();
abstract void optimizeThisLevel() ;
}
private class JoinGroupNodeOptimizer extends GroupNodeOptimizer<JoinGroupNode> {
final List<IBindingProducerNode> ancestry;
public JoinGroupNodeOptimizer(AST2BOpContext ctx,
IBindingSet exogenousBindings, QueryRoot queryRoot,
IBindingProducerNode[] ancestry, JoinGroupNode joinGroup) {
super(ctx,exogenousBindings,queryRoot,ancestry,joinGroup);
this.ancestry = new LinkedList<IBindingProducerNode>(Arrays.asList(ancestry));
/*
* Look for service calls and named subquery includes, since they
* will get run before the statement pattern nodes. Add them into
* the ancestry.
*/
// TODO: imho, both assumptions are not valid anymore with the given refactoring
// -> we should compute the ancestry the same way we do in ASTJoinGroupOrderOptimizer
addToAncestry(joinGroup.getServiceNodes(),"service node");
addToAncestry(joinGroup.getNamedSubqueryIncludes(),"named subquery include");
}
private void addToAncestry(List<? extends IBindingProducerNode> moreAncestors, String dbgMessage) {
for (IBindingProducerNode ancestor : moreAncestors) {
if (log.isDebugEnabled()) {
log.debug("adding a "+dbgMessage+" to ancestry:" +ancestor);
}
ancestry.add(ancestor);
}
}
@Override
void afterOptimizingChild(BOp child) {
/*
* Update the ancestry for recursion. Only add the
* non-optional statement pattern nodes - the ones that we
* can count on to bind their variables.
*/
if (child instanceof IBindingProducerNode) {
if (child instanceof IJoinNode) {
IJoinNode ijn = (IJoinNode)child;
if (ijn.isOptional() || ijn.isMinus() ) {
return;
}
}
ancestry.add((IBindingProducerNode)child);
}
}
@Override
IBindingProducerNode[] getAncestry() {
return ancestry.toArray(new IBindingProducerNode[ancestry.size()]);
}
@Override
void optimizeThisLevel() {
if (isStaticOptimizer(ctx, op)) {
optimizeJoinGroup(ctx, queryRoot, getAncestry(), op);
}
}
}
private class UnionNodeOptimizer extends GroupNodeOptimizer<UnionNode> {
final IBindingProducerNode[] ancestry;
public UnionNodeOptimizer(AST2BOpContext ctx,
IBindingSet exogenousBindings, QueryRoot queryRoot,
IBindingProducerNode[] ancestry, UnionNode op) {
super(ctx,exogenousBindings,queryRoot,ancestry,op);
this.ancestry = ancestry;
}
@Override
void afterOptimizingChild(BOp child) {
// nothing to do
}
@Override
IBindingProducerNode[] getAncestry() {
return ancestry;
}
@Override
void optimizeThisLevel() {
// don't.
}
}
private GroupNodeOptimizer<?> createGroupNodeOptimizer(final AST2BOpContext ctx,
final IBindingSet exogenousBindings, final QueryRoot queryRoot,
IBindingProducerNode[] ancestry, final GraphPatternGroup<?> op) {
if (op instanceof JoinGroupNode) {
return new JoinGroupNodeOptimizer(ctx,exogenousBindings,queryRoot,ancestry,(JoinGroupNode)op);
} else if (op instanceof UnionNode) {
return new UnionNodeOptimizer(ctx,exogenousBindings,queryRoot,ancestry,(UnionNode)op);
} else {
throw new IllegalArgumentException("Unexpected subclass of GraphPatternGroup");
}
}
/**
*
* @param ctx
* @param exogenousBindings
* The exogenous bindings -or- <code>null</code> iff there are
* none.
* @param queryRoot
* @param ancestry The nodes that are known to have already bound their variables.
* @param op
*/
private void optimize(final AST2BOpContext ctx,
final IBindingSet exogenousBindings, final QueryRoot queryRoot,
IBindingProducerNode[] ancestry, final GraphPatternGroup<?> op) {
createGroupNodeOptimizer(ctx,exogenousBindings,queryRoot,ancestry,op).optimizex();
}
private void optimizeJoinGroup(final AST2BOpContext ctx,
final QueryRoot queryRoot, IBindingProducerNode[] ancestry,
final JoinGroupNode joinGroup) {
/*
* Let the optimizer handle the simple optionals too.
*/
final List<IReorderableNode> nodes = joinGroup.getReorderableChildren();
if (!nodes.isEmpty()) {
/*
* Find the "slots" where the reorderable nodes currently
* show up in the join group. We will later fill in these
* slots with the same nodes, but in a
* different (optimized) ordering.
*/
final int[] slots = new int[nodes.size()];
{
int j = 0;
for (int i = 0; i < joinGroup.arity() && j < nodes.size(); i++) {
if (joinGroup.get(i) == nodes.get(j)) {
slots[j++] = i;
}
}
}
final double optimistic = joinGroup.getProperty(
Annotations.OPTIMISTIC,
Annotations.DEFAULT_OPTIMISTIC);
final List<IReorderableNode> required =
new LinkedList<IReorderableNode>();
IReorderableNode runLast = null;
for (IReorderableNode sp : nodes) {
if (runLast == null && sp.getProperty(QueryHints.RUN_LAST, false)) {
runLast = sp;
} else {
required.add(sp);
}
}
/*
* Calculate the optimized join ordering for the required
* tails.
*/
final StaticOptimizer opt = new StaticOptimizer(queryRoot,
ctx, ancestry, required, optimistic);
final int[] order = opt.getOrder();
/*
* Reorder the statement pattern nodes within the join
* group.
*/
int i = 0;
for (int j = 0; j < required.size(); j++) {
final IReorderableNode sp = required.get(order[j]);
joinGroup.setArg(slots[i++], sp);
}
if (runLast != null) {
joinGroup.setArg(slots[i++], runLast);
}
}
}
// /**
// * Use the SPORelation from the database to grab the appropriate range
// * counts for the {@link StatementPatternNode}s. Only tries to attach them
// * if the annotation {@link Annotations#ESTIMATED_CARDINALITY} is not
// * already attached to the node. This makes it possible to write unit
// * tests without real data.
// */
// private final void attachRangeCounts(final AST2BOpContext ctx,
// final List<StatementPatternNode> spNodes,
// final IBindingSet exogenousBindings) {
//
// final AbstractTripleStore db = ctx.getAbstractTripleStore();
//
// for (StatementPatternNode sp : spNodes) {
//
// if (sp.getProperty(Annotations.ESTIMATED_CARDINALITY) == null) {
//
// final IV<?, ?> s = getIV(sp.s(), exogenousBindings);
// final IV<?, ?> p = getIV(sp.p(), exogenousBindings);
// final IV<?, ?> o = getIV(sp.o(), exogenousBindings);
// final IV<?, ?> c = getIV(sp.c(), exogenousBindings);
//
// final RangeNode rangeNode = sp.getRange();
// final RangeBOp range = rangeNode != null ? rangeNode.getRangeBOp() : null;
//
// final IAccessPath<?> ap = db.getAccessPath(s, p, o, c, range);
//
// final long cardinality = ap.rangeCount(false/* exact */);
//
// // Annotate with the fast range count.
// sp.setProperty(Annotations.ESTIMATED_CARDINALITY, cardinality);
//
// /*
// * Annotate with the index which would be used if we did not run
// * access path "as-bound". This is the index that will be used
// * if we wind up doing a hash join for this predicate.
// *
// * TODO It would make sense to lift this annotation into a
// * different AST optimizer so it is always present. An
// * optimization for index locality for as-bound evaluation
// * depends on the presence of this annotation.
// *
// * @see https://sourceforge.net/apps/trac/bigdata/ticket/150"
// * (Choosing the index for testing fully bound access paths
// * based on index locality)
// */
// sp.setProperty(Annotations.ORIGINAL_INDEX, ap.getKeyOrder());
//
// }
//
// }
//
// }
//
// /**
// * Helper method grabs the IV out of the TermNode, doing the appropriate
// * NULL and constant/var checks.
// *
// * @param term
// * @param exogenousBindings
// * The externally given bindings (optional).
// */
// @SuppressWarnings("rawtypes")
// private final IV getIV(final TermNode term,
// final IBindingSet exogenousBindings) {
//
// if (term != null && term.isVariable() && exogenousBindings != null) {
//
// @SuppressWarnings("unchecked")
// final IConstant<IV> c = (IConstant<IV>) exogenousBindings
// .get((IVariable) term.getValueExpression());
//
// if(c != null) {
//
// return c.get();
//
// }
//
// }
//
// if (term != null && term.isConstant()) {
//
// final IV iv = ((IConstant<IV>) term.getValueExpression()).get();
//
// if (iv == null) {
//
// throw new AssertionError("this optimizer cannot run with unknown IVs in statement patterns");
//
// }
//
// return iv;
//
// } else {
//
// return null;
//
// }
//
// }
//
}