/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on June 18, 2015
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.sparql.ast.GroupNodeVarBindingInfo;
import com.bigdata.rdf.sparql.ast.GroupNodeVarBindingInfoMap;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
/**
* Partition of a join group, defined by a (possibly empty) list of
* non-optional non-minus nodes, possibly closed by a single optional
* or minus node. Each partition maintains a set of variables that are
* definitely bound *after* evaluating the partition. Note that this
* list is equivalent to the nodes definitely bound *after* evaluating the
* non-optional non-minus nodes in the partition (i.e., the OPTIONAL/MINUS
* will not contribute to this list).
*
* @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a>
* @version $Id$
*/
public class ASTJoinGroupPartition {
final GroupNodeVarBindingInfoMap bindingInfoMap;
final LinkedList<IGroupMemberNode> nonOptionalNonMinusNodes;
/**
* Variables that are bound externally, i.e. from prior parts of the query
* plan. Note that this does not include variables that are bound by prior
* partitions (in case this partition is part of an
* {@link ASTJoinGroupPartitions} object. Once initialized, this should
* never be changed.
*/
final Set<IVariable<?>> externallyBound;
/**
* The optional of minus node marking the "border" (i.e., last node) of
* the partition.
*/
IGroupMemberNode optionalOrMinus;
/**
* The variables that are definitely produced by this partition, i.e.
* can be assumed being bound in subsequent partitions. This includes
* variables that are externally bound.
*/
Set<IVariable<?>> definitelyProduced;
/**
* Constructs a new join group partition.
*
* @param nonOptionalOrMinusNodes
* @param optionalOrMinus
* @param bindingInfoMap
* @param externallyBound
*/
ASTJoinGroupPartition(
final LinkedList<IGroupMemberNode> nonOptionalNonMinusNodes,
final IGroupMemberNode optionalOrMinus,
final GroupNodeVarBindingInfoMap bindingInfoMap,
final Set<IVariable<?>> externallyBound) {
this.nonOptionalNonMinusNodes = nonOptionalNonMinusNodes;
this.optionalOrMinus = optionalOrMinus;
this.bindingInfoMap = bindingInfoMap;
this.externallyBound = externallyBound;
recomputeDefinitelyProduced();
}
/**
* @return the flat (ordered) list of nodes in the partition
*/
public List<IGroupMemberNode> extractNodeList(
final boolean includeOptionalOrMinusNode) {
final List<IGroupMemberNode> nodeList =
new ArrayList<IGroupMemberNode>();
nodeList.addAll(nonOptionalNonMinusNodes);
if (includeOptionalOrMinusNode && optionalOrMinus!=null)
nodeList.add(optionalOrMinus);
return nodeList;
}
/**
* Adds a (non-optional non-minus) node to a join group partition and
* updates the set of definitely produced variables accordingly.
*/
public void addNonOptionalNonMinusNodeToPartition(IGroupMemberNode node) {
nonOptionalNonMinusNodes.add(node);
definitelyProduced.addAll(bindingInfoMap.get(node).getDefinitelyProduced());
}
/**
* @return the variables definitely produced by this partition
*/
public Set<IVariable<?>> getDefinitelyProduced() {
return definitelyProduced;
}
/**
* The new ordered list of non-optional non-minus nodes. If
* recomputedDefinitelyProduced variables is set to false, the definitely
* produced variables will not be recomputed (this is a performance tweak
* which can be exploited when reordering the nodes only, for instance).
*
* @param ordered
* @param recomputeDefinitelyProduced
*/
public void replaceNonOptionalNonMinusNodesWith(
final List<IGroupMemberNode> ordered,
final boolean recomputeDefinitelyProduced) {
nonOptionalNonMinusNodes.clear();
nonOptionalNonMinusNodes.addAll(ordered);
if (recomputeDefinitelyProduced) {
recomputeDefinitelyProduced();
}
}
/**
* Removes the given set of nodes and updates the internal data structures.
*/
public void removeNodesFromPartition(List<IGroupMemberNode> nodesToRemove) {
for (final IGroupMemberNode nodeToRemove : nodesToRemove) {
if (nodeToRemove!=null) {
if (!nonOptionalNonMinusNodes.remove(nodeToRemove)) {
if (nodeToRemove.equals(optionalOrMinus)) {
optionalOrMinus=null;
}
}
} // else ignore
}
recomputeDefinitelyProduced();
}
/**
* Places the given node at the first position where, for the subsequent
* child, at least one of the variables bound through the node is used.
* Also considers the fact that this node must not be placed *before*
* its first possible position according to the binding requirements.
*
* NOTE: requires the node to be contained in the partitions binding info map.
*/
void placeAtFirstContributingPosition(
final IGroupMemberNode node,
final Set<IVariable<?>> additionalKnownBound,
final boolean requiresAllBound) {
final Integer firstPossiblePosition =
getFirstPossiblePosition(node, additionalKnownBound, requiresAllBound);
/**
* Special case (which simplifies subsequent code, as it asserts that
* firstPossiblePosition indeed exists; if not, we skip analysis).
*/
if (firstPossiblePosition==null) {
placeAtPosition(node, firstPossiblePosition); // place at end
return;
}
/**
* The binding requirements for the given node
*/
final GroupNodeVarBindingInfo bindingInfo = bindingInfoMap.get(node);
final Set<IVariable<?>> maybeProducedByNode =
bindingInfo.getMaybeProduced();
/**
* If there is some overlap between the known bound variables and the
* maybe produced variables by this node, than it might be good to
* place this node right at the beginning, as this implies a join
* that could restrict the intermediate result set.
*/
final Set<IVariable<?>> intersectionWithExternallyIncomings =
new HashSet<IVariable<?>>();
intersectionWithExternallyIncomings.addAll(externallyBound);
intersectionWithExternallyIncomings.retainAll(maybeProducedByNode);
if (!intersectionWithExternallyIncomings.isEmpty()) {
placeAtPosition(node, firstPossiblePosition);
return;
}
/**
* If this is not the case, we watch out for the first construct using
* one of the variables that may be produced by this node and place
* the node right in front of it. This is a heuristics, of course,
* which may be refined based on experiences that we make over time.
*/
for (int i=0; i<nonOptionalNonMinusNodes.size(); i++) {
final Set<IVariable<?>> desiredBound =
bindingInfoMap.
get(nonOptionalNonMinusNodes.get(i)).getDesiredBound();
final Set<IVariable<?>> intersection = new HashSet<IVariable<?>>();
intersection.addAll(desiredBound);
intersection.retainAll(maybeProducedByNode);
// if no more variables need to be bound, place the node
if (!intersection.isEmpty()) {
/**
* If the first possible position differs from null and is
* larger than i, then place it there; if it is
* smaller than i, then i is where we place the node. So we're
* looking for the maximum of both.
*/
placeAtPosition(node, Math.max(i, firstPossiblePosition));
return;
}
}
/**
* As a fallback, we add the node at the end.
*/
nonOptionalNonMinusNodes.addLast(node);
}
/**
* Places the given node at the first possible position in the non-optional
* non-minus list of the partition, where the first possible position is
* derived from the binding requirements of the node.
*
* NOTE: requires the node to be contained in the partitions binding info map.
*/
void placeAtFirstPossiblePosition(
final IGroupMemberNode node,
final Set<IVariable<?>> additionalKnownBound,
final boolean requiresAllBound) {
placeAtPosition(node,
getFirstPossiblePosition(
node, additionalKnownBound, requiresAllBound));
definitelyProduced.addAll(bindingInfoMap.get(node).getDefinitelyProduced());
}
/**
* Places the node at the specified position in the list of non-optional
* non-minus nodes. If the position is null, the node is added at the end
* (this is, right in front of the bordering optional or minus node).
*/
void placeAtPosition(
final IGroupMemberNode node, final Integer positionToPlace) {
if (positionToPlace == null) {
nonOptionalNonMinusNodes.addLast(node);
} else {
nonOptionalNonMinusNodes.add(positionToPlace, node);
}
}
/**
* Computes for the given node, the first possible position in the partition
* according to its binding requirements. The first possible position is
* either the first position where all required variables of the node are
* known to be bound or where we know that none of its required variables
* may be bound anymore. The flag requiresAllBound can be used to turn off
* the last condition, requiring that all variables need to be bound (and
* returning null in case this condition is never satisfied). This flag
* is useful when distributing FILTERs across partitions.
*
* @param the node to place
* @param additional variables that are known to be bound
* @param requiresAllBound requires that all variables are bound, i.e. when
* set to true it is not sufficient that unbound variables can't
* be bound anymore
*
* @return the position ID as integer, null if no matching position was found
*/
Integer getFirstPossiblePosition(
final IGroupMemberNode node,
final Set<IVariable<?>> additionalKnownBound,
final boolean requiresAllBound) {
/**
* The binding requirements for the given node
*/
final GroupNodeVarBindingInfo bindingInfo = bindingInfoMap.get(node);
/**
* knownBound is the set of variables that are known to be bound at
* a certain point in time. Initially, it contains the externallyBound
* variables plus additionalKnownBound variable which can be passed in.
* Variables will be added as we iterate over the non-optional non-minus
* nodes in the partition,
*/
final HashSet<IVariable<?>> knownBound =
new HashSet<IVariable<?>>(externallyBound);
knownBound.addAll(additionalKnownBound);
/**
* remainingPossiblyBound is a multi set (where the integer represents
* the cardinality) initially counting, for each variable, how often
* it is maybe bound in the partition. Variables counts will be decreased
* (and entries will be removed once we reach 0) as we iterate over the
* non-optional non-minus nodes in the partition.
*/
final Map<IVariable<?>,Integer> remainingMaybeBound =
new HashMap<IVariable<?>,Integer>();
if (!requiresAllBound) { // save initialization effort if not used
// both non-optional non-minus nodes generate maybe bound mappings ...
for (final IGroupMemberNode nonmNode : nonOptionalNonMinusNodes) {
addMaybeProducedToMultiset(remainingMaybeBound, nonmNode);
}
// ... as well as the optional or minus node in the partition
if (optionalOrMinus!=null) {
addMaybeProducedToMultiset(remainingMaybeBound, optionalOrMinus);
}
}
/**
* Now let's iterate over the non-optional and non-minus nodes and try
* to iterate the first possible position for the node based on its
* binding requirements. We are allowed to place the node at the first
* position for which we know that either all of the node's required
* variables are bound or no more of it required variables can be bound.
* If no such position exists, the method returns null (in that case,
* only the end of the partition may be a safe place for the node).
*/
for (int i=0; i<nonOptionalNonMinusNodes.size(); i++) {
if (canBePlacedAtPosition(
requiresAllBound, bindingInfo, knownBound,remainingMaybeBound, i)) {
return i; // we're done
}
// updade knownBound
final IGroupMemberNode cur = nonOptionalNonMinusNodes.get(i);
final Set<IVariable<?>> definitelyProducedByCur =
bindingInfoMap.get(cur).getDefinitelyProduced();
knownBound.addAll(definitelyProducedByCur);
// update remainingMaybeBound
if (!requiresAllBound) {
final Set<IVariable<?>> maybeProducedByCur =
bindingInfoMap.get(cur).getMaybeProduced();
for (final IVariable<?> var : maybeProducedByCur) {
if (remainingMaybeBound.containsKey(var)) {
// decrease counter
remainingMaybeBound.put(var, remainingMaybeBound.get(var) - 1);
// and fully remove var if counter reached zero
if (remainingMaybeBound.get(var)<=0) {
remainingMaybeBound.remove(var);
}
}
}
}
}
/**
* Check again for the last position (not covered by the for loop)
*/
Integer lastPosition = nonOptionalNonMinusNodes.size();
if (canBePlacedAtPosition(
requiresAllBound, bindingInfo, knownBound, remainingMaybeBound,
lastPosition)) {
return lastPosition; // we're done
}
/**
* No suitable position found:
*/
return null;
}
/**
* Internal helper function to check whether a node can be placed at
* a given position if the passed parameter constellation is satisfied.
*/
private boolean canBePlacedAtPosition(final boolean requiresAllBound,
final GroupNodeVarBindingInfo bindingInfo,
final HashSet<IVariable<?>> knownBound,
final Map<IVariable<?>, Integer> remainingMaybeBound, int i) {
// if no more variables need to be bound, we can place the node
final Set<IVariable<?>> leftToBeBound =
bindingInfo.leftToBeBound(knownBound);
if (leftToBeBound.isEmpty()) {
return true;
}
// another case in which we can place the node is if *none* of the
// remaining variables can be bound anymore; so we try to identify
// a witness that *can* be bound
if (!requiresAllBound) {
boolean moreCanBeBound = false;
final Set<IVariable<?>> canBeBound = remainingMaybeBound.keySet();
for (IVariable<?> leftToBeBoundVar : leftToBeBound) {
moreCanBeBound |= canBeBound.contains(leftToBeBoundVar);
if (moreCanBeBound) {
break;
}
}
if (!moreCanBeBound) {
return true;
}
}
return false;
}
/**
* Adds the variables that are maybe produced in the node to the multi set.
*/
private void addMaybeProducedToMultiset(
final Map<IVariable<?>, Integer> multiset,
IGroupMemberNode node) {
final GroupNodeVarBindingInfo bi = bindingInfoMap.get(node);
for (IVariable<?> var : bi.getMaybeProduced()) {
if (!multiset.containsKey(var)) {
multiset.put(var, 1);
} else {
multiset.put(var, multiset.get(var) + 1);
}
}
}
/**
* Recompute the definitely produced variables by this partition.
*/
private void recomputeDefinitelyProduced() {
definitelyProduced = new HashSet<IVariable<?>>();
definitelyProduced.addAll(externallyBound);
for (IGroupMemberNode node : nonOptionalNonMinusNodes) {
definitelyProduced.addAll(bindingInfoMap.get(node).getDefinitelyProduced());
}
}
}