/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on June 12, 2015
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.BindingsClause;
import com.bigdata.rdf.sparql.ast.GroupNodeVarBindingInfo;
import com.bigdata.rdf.sparql.ast.GroupNodeVarBindingInfoMap;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.explainhints.ExplainHint;
import com.bigdata.rdf.sparql.ast.explainhints.JoinOrderExplainHint;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.service.ServiceRegistry;
/**
* This optimizer brings a join group node into a valid order according to the
* SPARQL 1.1 semantics and optimizes the order of the nodes in the join group
* using various heuristics.
*
* @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a>
* @version $Id$
*/
public class ASTJoinGroupOrderOptimizer extends AbstractJoinGroupOptimizer
implements IASTOptimizer {
private final boolean assertCorrectnessOnly;
/**
* Default constructor, running the optimizer with optimizations turned on.
*/
public ASTJoinGroupOrderOptimizer() {
this(false);
}
/**
* Constructor allowing to run the optimizer in an "assert-correctness-only"
* mode that makes only minor modifications to the join order.
*/
public ASTJoinGroupOrderOptimizer(final boolean assertCorrectnessOnly) {
this.assertCorrectnessOnly = assertCorrectnessOnly;
}
@Override
protected void optimizeJoinGroup(AST2BOpContext ctx, StaticAnalysis sa,
IBindingSet[] bSets, JoinGroupNode joinGroup) {
final boolean reorderNodes =
ASTStaticJoinOptimizer.isStaticOptimizer(ctx, joinGroup);
/**
* Initialize summary containers with a single pass over the children;
* they allow for efficient lookup of required information in the
* following.
*/
// variables incoming externally (i.e. those variables definitely bound
// when evaluating this join group)
final Set<IVariable<?>> externallyIncoming =
sa.getDefinitelyIncomingBindings(
joinGroup, new HashSet<IVariable<?>>());
// easy-access information to FILTER [NOT] EXISTS constructs in this group
final ASTJoinGroupFilterExistsInfo fExInfo =
new ASTJoinGroupFilterExistsInfo(joinGroup);
// easy-access information about variable bindings for nodes in this group
final GroupNodeVarBindingInfoMap bindingInfoMap =
new GroupNodeVarBindingInfoMap(joinGroup, sa, fExInfo);
/**
* Setup helper class for proper placement of FILTER nodes in join group.
*/
final ASTFilterPlacer filterPlacer = new ASTFilterPlacer(joinGroup, fExInfo);
/**
* Set up the partitions, ignoring the FILTER nodes. FILTER nodes apply
* to the whole join group by semantics, so they are not considered here
* but will be added in the end.
*/
final ASTJoinGroupPartitions partitions =
new ASTJoinGroupPartitions(
filterPlacer.getNonFilterNodes(),
bindingInfoMap, externallyIncoming);
/**
* First, optimize across the partitions, trying to move forward
* non-optional non-minus patterns wherever possible. It is important to
* do this first, before reordering within partitions, as it shifts
* nodes around across them. See
* https://docs.google.com/document/d/1Fu0QLj1ML6CdaysREDFsJt8fT048zBWp0ssMDITAYt8
* for a formal explanation and justification of our approach.
*/
if (reorderNodes && !assertCorrectnessOnly) {
optimizeAcrossPartitions(joinGroup, partitions, bindingInfoMap, externallyIncoming);
}
/**
* Second, optimize within the individual partitions. This optimization
* is based on both hard constraints (w.r.t. the placement of nodes
* that require bindings) and heuristics (e.g. an order based on node
* types).
*
* Note: we may want to pass in (and use) information about existing
* filters, which might be valuable information for the optimizer.
*/
if (reorderNodes) {
optimizeWithinPartitions(partitions, bindingInfoMap, assertCorrectnessOnly);
}
/**
* Third, place the FILTERs at appropriate positions (across partitions).
* Note that this is required for correctness, and hence done even if
* reorderNodes is set to false (i.e., reordering is disabled through
* the query hint).
*/
filterPlacer.placeFiltersInPartitions(partitions);
/**
* Now, flatten the partitions again and replace the children of the
* join group with the new list.
*/
final LinkedList<IGroupMemberNode> nodeList =
partitions.extractNodeList(true /* includeOptionalOrMinusNode */);
for (int i = 0; i < joinGroup.arity(); i++) {
joinGroup.setArg(i, (BOp) nodeList.get(i));
}
}
/**
* Moves the nodes contained in the set as far to the beginning of the join
* group as possible without violating the semantics. In particular, this
* function takes care to not "skip" OPTIONAL and MINUS constructs when
* it would change the outcome of the query.
*
* @param set a set of nodes
*/
void optimizeAcrossPartitions(
final JoinGroupNode joinGroup,
final ASTJoinGroupPartitions partitions,
final GroupNodeVarBindingInfoMap bindingInfoMap,
final Set<IVariable<?>> externallyKnownProduced) {
final List<ASTJoinGroupPartition> partitionList =
partitions.getPartitionList();
/**
* In the following list, we store the variables that are definitely
* produced *before* evaluating a partition. We maintain this set
* for fast lookup.
*/
final List<Set<IVariable<?>>> definitelyProducedUpToPartition =
new ArrayList<Set<IVariable<?>>>(partitionList.size());
final Set<IVariable<?>> producedUpToPartition =
new HashSet<IVariable<?>>(externallyKnownProduced);
for (int i=0; i<partitionList.size();i++) {
// we start out with the second partition, so this will succeed
if (i>0) {
producedUpToPartition.addAll(
partitionList.get(i-1).getDefinitelyProduced());
}
definitelyProducedUpToPartition.add(
new HashSet<IVariable<?>>(producedUpToPartition));
}
/**
* Having initialized the map now, we iterate over the patterns in the
* partitions and try to shift them to the first possible partition.
* My intuition is that the algorithm is optimal in the sense that,
* after running it, every node is in the firstmost partition where
* it can be safely placed.
*/
for (int i=1; i<partitionList.size(); i++) {
final ASTJoinGroupPartition partition = partitionList.get(i);
final List<IGroupMemberNode> unmovableNodes =
new ArrayList<IGroupMemberNode>();
for (IGroupMemberNode candidate : partition.nonOptionalNonMinusNodes) {
// find the firstmost partition in which the node can be moved
Integer partitionForCandidate = null;
for (int j=i-1; j>=0; j--) {
final ASTJoinGroupPartition candidatePartition = partitionList.get(j);
/**
* Calculate the conflicting vars as the intersection of the
* maybe vars of the bordering OPTIONAL or MINUS with the maybe
* vars of the node to move around, minus the nodes that are
* known to be bound upfront.
*/
final Set<IVariable<?>> conflictingVars;
if (candidatePartition.optionalOrMinus == null) {
conflictingVars = new HashSet<IVariable<?>>();
} else {
conflictingVars =
new HashSet<IVariable<?>>(
bindingInfoMap.get(
candidatePartition.optionalOrMinus).getMaybeProduced());
}
final GroupNodeVarBindingInfo candidateBindingInfo =
bindingInfoMap.get(candidate);
conflictingVars.retainAll(candidateBindingInfo.getMaybeProduced());
conflictingVars.removeAll(
definitelyProducedUpToPartition.get(j+1));
if (conflictingVars.isEmpty() &&
definitelyProducedUpToPartition.get(j).containsAll(
candidateBindingInfo.getRequiredBound())) {
// record candidate and continue, maybe we can do even better...
partitionForCandidate = j;
} else {
// stop here, definitely can't place in prior part as well
// if we reach this code, there might actually be something
// wrong with the query: usually non-optional non-minus nodes
// can be executed before OPTIONAL or MINUS blocks -> we
// therefore append an EXPLAIN hint to the join group
final ExplainHint explainHint =
new JoinOrderExplainHint(
JoinOrderExplainHint.
ACROSS_PARTITION_REORDERING_PROBLEM, candidate);
joinGroup.addExplainHint(explainHint);
break;
}
}
if (partitionForCandidate!=null) { // if can be moved:
// add the node to the partition (removal will be done later on)
final ASTJoinGroupPartition partitionToMove =
partitionList.get(partitionForCandidate);
partitionToMove.addNonOptionalNonMinusNodeToPartition(candidate);
/**
* Given that the node has been moved to partitionForCandidate,
* the definitelyProducedUpToPartition needs to be updated for
* all partitions starting at the partition following the
* partitionForCandidate, up to the partition i, which contained
* the node before (later partitions carry this info already).
*
* Note that k<=i<partitions.size() guarantees that we don't run
* into an index out of bound exception.
*/
for (int k=partitionForCandidate+1; k<=i; k++) {
definitelyProducedUpToPartition.get(k).addAll(
bindingInfoMap.get(candidate).getDefinitelyProduced());
}
// the node will be removed from the current partition at the end
} else {
unmovableNodes.add(candidate);
}
}
// the nodes that remain in this position are the unmovable nodes
partition.replaceNonOptionalNonMinusNodesWith(unmovableNodes, true);
}
}
/**
* Optimize the order of nodes within the single partitions. The nice thing
* about partitions is that we can freely reorder the non-optional
* non-minus nodes within them.
*/
void optimizeWithinPartitions(
final ASTJoinGroupPartitions partitions,
final GroupNodeVarBindingInfoMap bindingInfoMap,
final boolean assertCorrectnessOnly) {
final List<ASTJoinGroupPartition> partitionList =
partitions.getPartitionList();
final Set<IVariable<?>> knownBoundFromPrevPartitions =
new HashSet<IVariable<?>>();
for (ASTJoinGroupPartition partition : partitionList) {
optimizeWithinPartition(
partition, assertCorrectnessOnly,
bindingInfoMap, knownBoundFromPrevPartitions);
}
}
/**
* Optimize the order of nodes within the given partition. The nice thing
* about partitions is that we can freely reorder the non-optional
* non-minus nodes within them.
*/
void optimizeWithinPartition(
final ASTJoinGroupPartition partition,
final boolean assertCorrectnessOnly,
final GroupNodeVarBindingInfoMap bindingInfoMap,
final Set<IVariable<?>> knownBoundFromPrevPartitions) {
final ASTTypeBasedNodeClassifier classifier =
new ASTTypeBasedNodeClassifier(
new Class<?>[] {
ServiceNode.class, /* see condition A */
AssignmentNode.class,
BindingsClause.class,
IGroupMemberNode.class /* see condition B */ });
/**
* ### Condition A:
*
* We only consider special service nodes for placement, all other service
* nodes are treated through a standard reorder.
*/
classifier.addConstraintForType(
ServiceNode.class,
new ASTTypeBasedNodeClassifierConstraint() {
@Override
boolean appliesTo(final IGroupMemberNode node) {
if (node instanceof ServiceNode) {
/**
* Return true if the service is not a SPARQL 1.1 SERVICE.
*/
final ServiceNode sn = (ServiceNode) node;
if (!sn.getResponsibleServiceFactory().equals(
ServiceRegistry.getInstance()
.getDefaultServiceFactory())) {
return true;
}
/**
* Return true if it is a SPARQL 1.1 SERVICE, but the
* constant is not bound.
*/
if (!sn.getServiceRef().isConstant()) {
return true;
}
}
// as a fallback return false
return false;
}
});
/**
* ### Condition B:
*
* Additional nodes that have binding requirements (e.g.
* { BIND ?x AS ?y } UNION { ... } that can be satisified through
* this partition.
*/
classifier.addConstraintForType(
IGroupMemberNode.class,
new ASTTypeBasedNodeClassifierConstraint() {
@Override
boolean appliesTo(final IGroupMemberNode node) {
// get the variables that are required bound in the node ...
final Set<IVariable<?>> reqBoundInNode =
new HashSet<IVariable<?>>(
bindingInfoMap.get(node).getRequiredBound());
// ... and substract those that maybe prodcued inside
reqBoundInNode.removeAll(
bindingInfoMap.get(node).getMaybeProduced());
// -> if this set has not become empty, we need to aim at
// binding variables of this node from this outside partition
return !reqBoundInNode.isEmpty();
}
});
classifier.registerNodes(
partition.extractNodeList(false /* includeOptionalOrMinus */));
/**
* In a first step, we remove service nodes, assignment nodes, and
* bindings clauses from the partition. They will be handled in a special
* way.
*/
final List<IGroupMemberNode> toRemove = new ArrayList<IGroupMemberNode>();
toRemove.addAll(classifier.get(ServiceNode.class));
toRemove.addAll(classifier.get(AssignmentNode.class));
toRemove.addAll(classifier.get(BindingsClause.class));
toRemove.addAll(classifier.get(IGroupMemberNode.class));
partition.removeNodesFromPartition(toRemove);
/**
* The remaining elements will be reordered based on their type. This is
* only done if optimization was turned on though. Otherwise, this method
* just asserts correct placement of nodes with binding requirements. This
* is where the optimization takes place.
*
* Note: this is the place where we want to integrate the StaticOptimizer.
*/
if (!assertCorrectnessOnly) {
final IASTJoinGroupPartitionReorderer reorderer =
new TypeBasedASTJoinGroupPartitionReorderer();
reorderer.reorderNodes(partition);
}
// the non-VALUE nodes that will be placed (some lines below)
final List<IGroupMemberNode> nonValueNodesToBePlaced =
new LinkedList<IGroupMemberNode>();
nonValueNodesToBePlaced.addAll(classifier.get(AssignmentNode.class));
nonValueNodesToBePlaced.addAll(classifier.get(ServiceNode.class));
nonValueNodesToBePlaced.addAll(classifier.get(IGroupMemberNode.class));
/**
* Place the VALUES nodes. Generally, it is desirable to place the VALUES
* clause at the first contributing position. However, in some cases
* (see e.g. https://jira.blazegraph.com/browse/BLZG-1463) the VALUES
* clause may introduce variables for the remainingToBePlaced constructs
* (e.g., a subsequent BIND node, as in the ticket example) s.t. it is
* desirable to place the VALUES clause early on: this clears the way
* for placing subsequent nodes earlier in the query execution plan.
*
* We therefore check the variables in the remainingToBePlaced array
* for dependencies towards variables introduced in our bindings clauses.
* If there are such dependencies, we place the VALUES clauses at the
* first possible position, otherwise we choose the first contributing
* position.
*/
for (IGroupMemberNode node : classifier.get(BindingsClause.class)) {
final BindingsClause bc = (BindingsClause)node;
final Set<IVariable<?>> declaredVars = bc.getDeclaredVariables();
final Set<IVariable<?>> intersectionWithNonValueNodesToBePlaced =
new HashSet<IVariable<?>>();
// start out with all non value nodes to be placed and intersect
for (final IGroupMemberNode cur : nonValueNodesToBePlaced) {
intersectionWithNonValueNodesToBePlaced.addAll(
bindingInfoMap.get(cur).leftToBeBound(knownBoundFromPrevPartitions));
}
intersectionWithNonValueNodesToBePlaced.retainAll(declaredVars);
if (intersectionWithNonValueNodesToBePlaced.isEmpty()) {
// case 1: no dependencies -> late placement
partition.placeAtFirstContributingPosition(node,
knownBoundFromPrevPartitions, false /* requires all bound */);
} else {
// case 1: dependencies -> early placement
partition.placeAtFirstPossiblePosition(node,
knownBoundFromPrevPartitions, false /* requires all bound */);
}
}
/**
* Place the BIND nodes: it is important that we bring them into the right
* order, e.g. if bind node 1 uses variables bound by bind node 2, then we
* must insert node 2 first in order to be able to place node 1 after the
* first bind node.
*/
final Set<IVariable<?>> knownBoundSomewhere = new HashSet<IVariable<?>>(
partition.definitelyProduced);
// ... order the bind and SERVICE nodes according to dependencies,
// essentially constructing a dependency graph over these nodes
final List<IGroupMemberNode> orderedNodes =
orderNodesByDependencies(
nonValueNodesToBePlaced, partition.bindingInfoMap,
knownBoundSomewhere);
// ... and place the bind nodes
for (IGroupMemberNode node : orderedNodes) {
/**
* We run service with runFirst query hint first (=as early as possible)
*/
boolean runFirst = false;
if (node instanceof ServiceNode) {
final ServiceNode sn = (ServiceNode)node;
runFirst =
sn.getResponsibleServiceFactory().
getServiceOptions().isRunFirst();
}
if (runFirst) {
partition.placeAtFirstPossiblePosition(node,
knownBoundFromPrevPartitions, false /* requiresAllBound */);
} else {
partition.placeAtFirstContributingPosition(node,
knownBoundFromPrevPartitions, false /* requiresAllBound */);
}
}
knownBoundFromPrevPartitions.addAll(partition.getDefinitelyProduced());
}
/**
* Brings the nodes in a correct order according to binding req dependencies
* that they have. Note that this is a best effort approach, which may
* fail in cases where we allow for liberate patterns that do not strictly
* follow the restriction of SPARQL semantics (e.g., for cyclic patterns
* such as BIND(?x AS ?y) . BIND(?y AS ?x)). We may want to check the query
* for such patterns statically and throw a pre-runtime exception, as the
* SPARQL 1.1 standard suggests.
*
* Failing means we return an order that is not guaranteed to be "safe",
* which might give us unexpected results in some exceptional cases.
* However, whenever the pattern is valid according to the SPARQL 1.1
* semantics, this method should return nodes in a valid order.
*
* @param bindNodes the list of BIND nodes to reorder
* @param bindingInfoMap hash map for binding info lookup
* @param knownBoundSomewhere variables that are known to be bound somewhere
* in the node list in which we want to place the BIND nodes
*
* @return the ordered node set if exists, otherwise a "best effort" order
*/
List<IGroupMemberNode> orderNodesByDependencies(
final List<IGroupMemberNode> nodes,
final GroupNodeVarBindingInfoMap bindingInfoMap,
final Set<IVariable<?>> knownBoundSomewhere) {
final List<IGroupMemberNode> ordered =
new ArrayList<IGroupMemberNode>(nodes.size());
/**
* Initially, all nodes must be placed
*/
final LinkedList<IGroupMemberNode> toBePlaced =
new LinkedList<IGroupMemberNode>(nodes);
final Set<IVariable<?>> knownBound =
new HashSet<IVariable<?>>(knownBoundSomewhere);
while (!toBePlaced.isEmpty()) {
for (int i=0 ; i<toBePlaced.size(); i++) {
final IGroupMemberNode node = toBePlaced.get(i);
final GroupNodeVarBindingInfo nodeBindingInfo =
bindingInfoMap.get(node);
/**
* The first condition is that the node can be safely placed. The
* second condition is a fallback, where we randomly pick the last
* node (even if it does not satisfy the condition).
*/
if (nodeBindingInfo.leftToBeBound(knownBound).isEmpty() ||
i+1==toBePlaced.size()) {
// add the node to the ordered set
ordered.add(node);
// remove it from the toBePlaced array
toBePlaced.remove(i);
// add its bound variables to the known bound set
knownBound.addAll(nodeBindingInfo.getDefinitelyProduced());
break;
}
}
}
return ordered;
}
}