package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2;
import com.bigdata.journal.ITx;
import com.bigdata.rdf.sparql.ast.IBindingProducerNode;
import com.bigdata.rdf.sparql.ast.IReorderableNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.optimizers.ASTStaticJoinOptimizer.Annotations;
/**
* This is the old static optimizer code, taken directly from
* {@link DefaultEvaluationPlan2}, but lined up with the AST API instead of the
* Rule and IPredicate API.
*
*/
public final class StaticOptimizer {
private static final transient Logger log = ASTStaticJoinOptimizer.log;
private final StaticAnalysis sa;
private final IBindingProducerNode[] ancestry;
private final Set<IVariable<?>> ancestryVars;
private final List<IReorderableNode> nodes;
private final int arity;
/**
* This is computed by the optimizer, and is a guess!
*/
private final long cardinality;
private static final long NO_SHARED_VARS = Long.MAX_VALUE - 3;
/**
* The computed evaluation order. The elements in this array are the order
* in which each tail predicate will be evaluated. The index into the array
* is the index of the tail predicate whose evaluation order you want. So
* <code>[2,0,1]</code> says that the predicates will be evaluated in the
* order tail[2], then tail[0], then tail[1].
*/
private int[/* order */] order;
public int[] getOrder() {
if (order == null) {
/*
* This will happen if you try to use toString() during the ctor
* before the order has been computed.
*/
throw new IllegalStateException();
}
return order;
}
/**
* Cache of the computed range counts for the predicates in the tail. The
* elements of this array are initialized to -1L, which indicates that the
* range count has NOT been computed. Range counts are computed on demand
* and MAY be zero. Only an approximate range count is obtained. Such
* approximate range counts are an upper bound on the #of elements that are
* spanned by the access pattern. Therefore if the range count reports ZERO
* (0L) it is a real zero and the access pattern does not match anything in
* the data. The only other caveat is that the range counts are valid as of
* the commit point on which the access pattern is reading. If you obtain
* them for {@link ITx#READ_COMMITTED} or {@link ITx#UNISOLATED} views then
* they could be invalidated by concurrent writers.
*/
private long[/* tailIndex */] rangeCount;
private Tail[/* tailIndex */] tail;
/**
* Keeps track of which tails have been used already and which still need to
* be evaluated.
*/
private boolean[/* tailIndex */] used;
/**
* See {@link Annotations#OPTIMISTIC}.
*/
private final double optimistic;
public StaticOptimizer(StaticOptimizer parent, List<IReorderableNode> nodes) {
this(parent.sa, parent.ancestry, nodes, parent.optimistic);
}
StaticOptimizer(final QueryRoot queryRoot, final AST2BOpContext context,
final IBindingProducerNode[] ancestry,
final List<IReorderableNode> nodes, final double optimistic) {
this(new StaticAnalysis(queryRoot, context), ancestry, nodes,
optimistic);
}
private StaticOptimizer(final StaticAnalysis sa,
final IBindingProducerNode[] ancestry,
final List<IReorderableNode> nodes, final double optimistic) {
if (ancestry == null)
throw new IllegalArgumentException();
if (nodes == null)
throw new IllegalArgumentException();
this.sa = sa;
this.ancestry = ancestry;
this.ancestryVars = new LinkedHashSet<IVariable<?>>();
this.nodes = nodes;
this.arity = nodes.size();
if (log.isDebugEnabled()) {
log.debug("arity: " + arity);
for (int i = 0; i < arity; i++) {
final IReorderableNode node = nodes.get(i);
log.debug(node.getClass() +
", reorderable: " + node.isReorderable() +
", estcard: " + node.getEstimatedCardinality(this) +
", vars: " + getVars(i));
log.debug(node.getEstimatedCardinality(this));
log.debug(node.isReorderable());
}
}
this.optimistic = optimistic;
this.cardinality = calc();
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
for (int i = 0; i < arity; i++) {
ASTStaticJoinOptimizer.log.debug(order[i]);
}
}
}
/**
* Computes and sets the evaluation order, and returns an estimated
* cardinality.
*/
private long calc() {
if (order != null)
throw new IllegalStateException(
"calc should only be called from the constructor");
order = new int[arity];
rangeCount = new long[arity];
used = new boolean[arity];
tail = new Tail[arity];
// clear arrays.
for (int i = 0; i < arity; i++) {
order[i] = -1; // -1 is used to detect logic errors.
rangeCount[i] = -1L; // -1L indicates no range count yet.
used[i] = false; // not yet evaluated
}
if (arity == 0) {
return 1l;
}
if (arity == 1) {
order[0] = 0;
return cardinality(0);
}
/*
* Seems like the easiest way to handle the ancestry is the exact same
* way we handle the "run first" statement patterns (text search), which
* is that we collect up the variables that are bound and then give
* preferential treatment to the predicates that can join on those
* variables.
*/
for (IBindingProducerNode join : ancestry) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log
.debug("considering join node from ancestry: " + join);
}
sa.getDefinitelyProducedBindings(join, ancestryVars, true/* recursive */);
}
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("bindings from ancestry: "
+ Arrays.toString(ancestryVars.toArray()));
}
/*
* See if there is a best tail to run first. The "run first" query hint
* gets priority, then the tails that share variables with the ancestry.
*/
int preferredFirstTail = -1;
for (int i = 0; i < arity; i++) {
/*
* We need the optimizer to play nice with the run first hint.
* Choose the first "run first" tail we see.
*/
if (nodes.get(i).getProperty(QueryHints.RUN_FIRST, false)) {
preferredFirstTail = i;
break;
}
}
/*
* Always choose a ZERO cardinality tail first, regardless of ancestry.
*/
for (int i = 0; i < arity; i++) {
if (cardinality(i) == 0) {
preferredFirstTail = i;
break;
}
}
/*
* If there was no "run first" query hint, then go to the ancestry.
*/
if (preferredFirstTail == -1)
preferredFirstTail = getNextTailThatSharesVarsWithAncestry();
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("preferred first tail: "
+ preferredFirstTail);
}
// special case if there are only two tails
if (arity == 2) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("two tails left");
if (preferredFirstTail != -1) {
order[0] = preferredFirstTail;
order[1] = preferredFirstTail == 0 ? 1 : 0;
} else {
if (cardinality(0) == cardinality(1) &&
nodes.get(0) instanceof StatementPatternNode &&
nodes.get(1) instanceof StatementPatternNode) {
final VarNode sid0 = ((StatementPatternNode) nodes.get(0)).sid();
final VarNode sid1 = ((StatementPatternNode) nodes.get(1)).sid();
if (sid0 != null && sid1 == null) {
order[0] = 1;
order[1] = 0;
}
}
if (order[0] == -1) {
order[0] = cardinality(0) <= cardinality(1) ? 0 : 1;
order[1] = cardinality(0) <= cardinality(1) ? 1 : 0;
}
}
return computeJoinCardinality(getTail(0), getTail(1));
}
/*
* There will be (tails-1) joins, we just need to figure out what they
* should be.
*/
StaticOptimizer.Join join;
if (preferredFirstTail == -1)
join = getFirstJoin();
else
join = getFirstJoin(preferredFirstTail);
long cardinality = join.cardinality;
int t1 = ((Tail) join.getD1()).getTailIndex();
int t2 = ((Tail) join.getD2()).getTailIndex();
if (preferredFirstTail == -1) {
order[0] = cardinality(t1) <= cardinality(t2) ? t1 : t2;
order[1] = cardinality(t1) <= cardinality(t2) ? t2 : t1;
} else {
order[0] = t1;
order[1] = t2;
}
used[order[0]] = true;
used[order[1]] = true;
for (int i = 2; i < arity; i++) {
join = getNextJoin(join);
order[i] = ((Tail) join.getD2()).getTailIndex();
used[order[i]] = true;
}
return cardinality;
}
/**
* Start by looking at every possible initial join. Take every tail and
* match it with every other tail to find the lowest possible cardinality.
* See
* {@link #computeJoinCardinality(com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2.IJoinDimension, com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2.IJoinDimension)}
* for more on this.
*/
private StaticOptimizer.Join getFirstJoin() {
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("evaluating first join");
}
long minJoinCardinality = Long.MAX_VALUE;
long minTailCardinality = Long.MAX_VALUE;
long minOtherTailCardinality = Long.MAX_VALUE;
Tail minT1 = null;
Tail minT2 = null;
for (int i = 0; i < arity; i++) {
// only check unused tails
if (used[i]) {
continue;
}
Tail t1 = getTail(i);
long t1Cardinality = cardinality(i);
for (int j = 0; j < arity; j++) {
// check only non-same and unused tails
if (i == j || used[j]) {
continue;
}
Tail t2 = getTail(j);
long t2Cardinality = cardinality(j);
long joinCardinality = computeJoinCardinality(t1, t2);
long tailCardinality = Math.min(t1Cardinality, t2Cardinality);
long otherTailCardinality = Math.max(t1Cardinality,
t2Cardinality);
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("evaluating " + i + " X "
+ j + ": cardinality= " + joinCardinality);
if (joinCardinality < minJoinCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minTailCardinality = tailCardinality;
minOtherTailCardinality = otherTailCardinality;
minT1 = t1;
minT2 = t2;
} else if (joinCardinality == minJoinCardinality) {
if (tailCardinality < minTailCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log
.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minTailCardinality = tailCardinality;
minOtherTailCardinality = otherTailCardinality;
minT1 = t1;
minT2 = t2;
} else if (tailCardinality == minTailCardinality) {
if (otherTailCardinality < minOtherTailCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log
.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minTailCardinality = tailCardinality;
minOtherTailCardinality = otherTailCardinality;
minT1 = t1;
minT2 = t2;
}
}
}
}
}
// the join variables is the union of the join dimensions' variables
Set<String> vars = new HashSet<String>();
vars.addAll(minT1.getVars());
vars.addAll(minT2.getVars());
return new Join(minT1, minT2, minJoinCardinality, vars);
}
private StaticOptimizer.Join getFirstJoin(final int preferredFirstTail) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("evaluating first join");
}
long minJoinCardinality = Long.MAX_VALUE;
long minOtherTailCardinality = Long.MAX_VALUE;
Tail minT2 = null;
final int i = preferredFirstTail;
final Tail t1 = getTail(i);
for (int j = 0; j < arity; j++) {
// check only non-same and unused tails
if (i == j || used[j]) {
continue;
}
Tail t2 = getTail(j);
long t2Cardinality = cardinality(j);
long joinCardinality = computeJoinCardinality(t1, t2);
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("evaluating " + i + " X " + j
+ ": cardinality= " + joinCardinality);
if (joinCardinality < minJoinCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minOtherTailCardinality = t2Cardinality;
minT2 = t2;
} else if (joinCardinality == minJoinCardinality) {
if (t2Cardinality < minOtherTailCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minOtherTailCardinality = t2Cardinality;
minT2 = t2;
}
}
}
// the join variables is the union of the join dimensions' variables
Set<String> vars = new HashSet<String>();
vars.addAll(t1.getVars());
vars.addAll(minT2.getVars());
return new Join(t1, minT2, minJoinCardinality, vars);
}
private Tail getTail(int tailIndex) {
if (tail[tailIndex] == null) {
tail[tailIndex] = new Tail(tailIndex, rangeCount(tailIndex),
getVars(tailIndex));
}
return tail[tailIndex];
}
/**
* Similar to {@link #getFirstJoin()}, but we have one join dimension
* already calculated.
*
* @param d1
* the first join dimension
* @return the new join with the lowest cardinality from the remaining tails
*/
private StaticOptimizer.Join getNextJoin(IJoinDimension d1) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("evaluating next join");
}
long minJoinCardinality = Long.MAX_VALUE;
long minTailCardinality = Long.MAX_VALUE;
Tail minTail = null;
for (int i = 0; i < arity; i++) {
// only check unused tails
if (used[i]) {
continue;
}
Tail tail = getTail(i);
long tailCardinality = cardinality(i);
long joinCardinality = computeJoinCardinality(d1, tail);
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("evaluating "
+ d1.toJoinString() + " X " + i + ": cardinality= "
+ joinCardinality);
if (joinCardinality < minJoinCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minTailCardinality = tailCardinality;
minTail = tail;
} else if (joinCardinality == minJoinCardinality) {
if (tailCardinality < minTailCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ joinCardinality);
minJoinCardinality = joinCardinality;
minTailCardinality = tailCardinality;
minTail = tail;
}
}
}
/*
* If we are at the "no shared variables" tails, the first thing we do
* is look to the ancestry for the next tail.
*/
if (minJoinCardinality == NO_SHARED_VARS) {
final int i = getNextTailThatSharesVarsWithAncestry();
if (i >= 0) {
final long tailCardinality = cardinality(i);
minJoinCardinality = tailCardinality;
minTail = getTail(i);
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ tailCardinality);
}
}
/*
* If we are still at the "no shared variables" state, then simply order
* by range count and choose the min.
*/
if (minJoinCardinality == NO_SHARED_VARS) {
minJoinCardinality = Long.MAX_VALUE;
for (int i = 0; i < arity; i++) {
// only check unused tails
if (used[i]) {
continue;
}
Tail tail = getTail(i);
long tailCardinality = cardinality(i);
if (tailCardinality < minJoinCardinality) {
if (ASTStaticJoinOptimizer.log.isDebugEnabled())
ASTStaticJoinOptimizer.log.debug("found a new min: "
+ tailCardinality);
minJoinCardinality = tailCardinality;
minTail = tail;
}
}
}
// the join variables is the union of the join dimensions' variables
Set<String> vars = new HashSet<String>();
vars.addAll(d1.getVars());
vars.addAll(minTail.getVars());
return new Join(d1, minTail, minJoinCardinality, vars);
}
/**
* Return the range count for the predicate, ignoring any bindings. The
* range count for the tail predicate is cached the first time it is
* requested and returned from the cache thereafter. The range counts are
* requested using the "non-exact" range count query, so the range counts
* are actually the upper bound. However, if the upper bound is ZERO (0)
* then the range count really is ZERO (0).
*
* @param tailIndex
* The index of the predicate in the tail of the rule.
*
* @return The range count for that tail predicate.
*/
public long rangeCount(final int tailIndex) {
if (rangeCount[tailIndex] == -1L) {
final long rangeCount = (long) nodes.get(tailIndex).getEstimatedCardinality(this);
this.rangeCount[tailIndex] = rangeCount;
}
return rangeCount[tailIndex];
}
/**
* Return the cardinality of a particular tail, which is the range count if
* not optional and infinite if optional.
*/
public long cardinality(final int tailIndex) {
return rangeCount(tailIndex);
}
public String toString() {
return Arrays.toString(getOrder());
}
/**
* This is the secret sauce. There are three possibilities for computing the
* join cardinality, which we are defining as the upper-bound for solutions
* for a particular join. First, if there are no shared variables then the
* cardinality will just be the simple product of the cardinality of each
* join dimension. If there are shared variables but no unshared variables,
* then the cardinality will be the minimum cardinality from the join
* dimensions. If there are shared variables but also some unshared
* variables, then the join cardinality will be the maximum cardinality from
* each join dimension.
* <p>
* TODO: Any join involving an optional will have infinite cardinality, so
* that optionals get placed at the end.
*
* @param d1
* the first join dimension
* @param d2
* the second join dimension
* @return the join cardinality
*/
protected long computeJoinCardinality(IJoinDimension d1, IJoinDimension d2) {
// // two optionals is worse than one
// if (d1.isOptional() && d2.isOptional()) {
// return BOTH_OPTIONAL;
// }
// if (d1.isOptional() || d2.isOptional()) {
// return ONE_OPTIONAL;
// }
final boolean sharedVars = hasSharedVars(d1, d2);
final boolean unsharedVars = hasUnsharedVars(d1, d2);
final long joinCardinality;
if (sharedVars == false) {
// no shared vars - take the sum
// joinCardinality = d1.getCardinality() + d2.getCardinality();
// different approach - give preference to shared variables
joinCardinality = NO_SHARED_VARS;
} else {
if (unsharedVars == false) {
// shared vars and no unshared vars - take the min
joinCardinality = Math.min(d1.getCardinality(),
d2.getCardinality());
} else {
// shared vars and unshared vars - take the max
/*
* This modification to the join planner results in
* significantly faster queries for the bsbm benchmark (3x - 5x
* overall). It takes a more optimistic perspective on the
* intersection of two statement patterns, predicting that this
* will constraint, rather than increase, the multiplicity of
* the solutions. However, this COULD lead to pathological cases
* where the resulting join plan is WORSE than it would have
* been otherwise. For example, this change produces a 3x to 5x
* improvement in the BSBM benchmark results. However, it has a
* negative effect on LUBM Q2.
*
* Update: Ok so just to go into a little detail - yesterday's
* change means we choose the join ordering based on an
* optimistic view of the cardinality of any particular join. If
* you have two triple patterns that share variables but that
* also have unshared variables, then technically the maximum
* cardinality of the join is the maximum range count of the two
* tails. But often the true cardinality of the join is closer
* to the minimum range count than the maximum. So yesterday we
* started assigning an expected cardinality for the join of the
* minimum range count rather than the maximum. What this means
* is that a lot of the time when those joins move toward the
* front of the line the query will do a lot better, but
* occasionally (LUBM 2), the query will do much much worse
* (when the true cardinality is closer to the max range count).
*
* Today we put in an extra tie-breaker condition. We already
* had one tie-breaker - if two joins have the same expected
* cardinality we chose the one with the lower minimum range
* count. But the new tie-breaker is that if two joins have the
* same expected cardinality and minimum range count, we now
* chose the one that has the minimum range count on the other
* tail (the minimum maximum if that makes sense).
*
* 11/14/2011: The static join order optimizer should consider
* the swing in stakes when choosing between either the MIN or
* the MAX of the cardinality of two join dimensions in order to
* decide which join to schedule next. Historically it took the
* MAX, but there are counter examples to that decision such as
* LUBM Q2. Subsequently it was modified to take the MIN, but
* BSBM BI Q1 is a counter example for that.
*
* Modify the static optimizer to consider the swing in stakes
* between the choice of MAX versus MIN. I believe that this
* boils down to something like "If an incorrect guess of MIN
* would cause us to suffer a very bad MAX, then choose based on
* the MAX to avoid paying that penalty."
*/
joinCardinality = (long) ((long) (optimistic * Math.min(
d1.getCardinality(), d2.getCardinality())) + ((1.0d - optimistic) * Math
.max(d1.getCardinality(), d2.getCardinality())));
}
}
return joinCardinality;
}
/**
* Get the named variables for a given tail. Is there a better way to do
* this?
*
* @param tail
* the tail
* @return the named variables
*/
protected Set<String> getVars(int tail) {
final IReorderableNode node = nodes.get(tail);
// if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
// ASTStaticJoinOptimizer.log.debug(node);
// }
final Set<IVariable<?>> vars = new LinkedHashSet<IVariable<?>>();
/*
* Changed recursive to true so that we get the right produced
* bindings out of UnionNodes so that they can be reordered
* correctly.
*/
sa.getDefinitelyProducedBindings(node, vars, true);
final Set<String> varNames = new LinkedHashSet<String>();
for (IVariable<?> v : vars)
varNames.add(v.getName());
return varNames;
}
/**
* Look for shared variables.
*
* @param d1
* the first join dimension
* @param d2
* the second join dimension
* @return true if there are shared variables, false otherwise
*/
protected boolean hasSharedVars(IJoinDimension d1, IJoinDimension d2) {
for (String var : d1.getVars()) {
if (d2.getVars().contains(var)) {
return true;
}
}
return false;
}
/**
* Check to see if the specified tail shares any variables with the
* ancestry.
*/
protected boolean sharesVarsWithAncestry(final int tail) {
final Set<IVariable<?>> tailVars = sa
.getDefinitelyProducedBindings(nodes.get(tail),
new LinkedHashSet<IVariable<?>>(), true/* recursive */);
return !Collections.disjoint(ancestryVars, tailVars);
}
/**
* Get the next tail (unused, non-optional) that shares a var with the
* ancestry. If there are many, choose the one with the lowest cardinality.
* Return -1 if none are found.
*/
protected int getNextTailThatSharesVarsWithAncestry() {
int nextTail = -1;
long minCardinality = Long.MAX_VALUE;
// give preferential treatment to a tail that shares variables with the
// ancestry. collect all of them up and then choose the one
// that has the lowest cardinality
for (int i = 0; i < arity; i++) {
// only check unused tails
if (used[i]) {
continue;
}
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("considering tail: "
+ nodes.get(i));
}
if (ASTStaticJoinOptimizer.log.isDebugEnabled()) {
ASTStaticJoinOptimizer.log.debug("vars: "
+ Arrays.toString(getVars(i).toArray()));
}
if (sharesVarsWithAncestry(i)) {
/*
* We have a shared var with the ancestry.
*/
final long tailCardinality = cardinality(i);
if (tailCardinality < minCardinality) {
nextTail = i;
minCardinality = tailCardinality;
}
}
}
return nextTail;
}
/**
* Look for unshared variables.
*
* @param d1
* the first join dimension
* @param d2
* the second join dimension
* @return true if there are unshared variables, false otherwise
*/
protected boolean hasUnsharedVars(IJoinDimension d1, IJoinDimension d2) {
for (String var : d1.getVars()) {
if (d2.getVars().contains(var) == false) {
return true;
}
}
for (String var : d2.getVars()) {
if (d1.getVars().contains(var) == false) {
return true;
}
}
return false;
}
/**
* A join dimension can be either a tail, or a previous join. Either way we
* need to know its cardinality, its variables, and its tails.
*/
private interface IJoinDimension {
long getCardinality();
Set<String> getVars();
String toJoinString();
// boolean isOptional();
}
/**
* A join implementation of a join dimension. The join can consist of two
* tails, or one tail and another join. Theoretically it could be two joins
* as well, which might be a future optimization worth thinking about.
*/
private static class Join implements IJoinDimension {
private final IJoinDimension d1, d2;
private final long cardinality;
private final Set<String> vars;
public Join(IJoinDimension d1, IJoinDimension d2, long cardinality,
Set<String> vars) {
this.d1 = d1;
this.d2 = d2;
this.cardinality = cardinality;
this.vars = vars;
}
public IJoinDimension getD1() {
return d1;
}
public IJoinDimension getD2() {
return d2;
}
public Set<String> getVars() {
return vars;
}
public long getCardinality() {
return cardinality;
}
public String toJoinString() {
return d1.toJoinString() + " X " + d2.toJoinString();
}
}
/**
* A tail implementation of a join dimension.
*/
private class Tail implements IJoinDimension {
private final int tailIndex;
private final long cardinality;
private final Set<String> vars;
public Tail(int tail, long cardinality, Set<String> vars) {
this.tailIndex = tail;
this.cardinality = cardinality;
this.vars = vars;
}
public int getTailIndex() {
return tailIndex;
}
public long getCardinality() {
return cardinality;
}
public Set<String> getVars() {
return vars;
}
// public boolean isOptional() {
// return nodes.get(tail).isOptional();
// }
public String toJoinString() {
return String.valueOf(tailIndex);
}
}
public long getCardinality() {
return cardinality;
}
}