/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s):
* DefinablePrefixAnnotation contributed by Netymon Pty Ltd on behalf of
* The Australian Commonwealth Government under contract 4500507038.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.store.tuples;
// Java 2 standard packages
import java.util.*;
// Log4j
import org.apache.log4j.*;
// Local packages
import org.mulgara.query.*;
import org.mulgara.query.filter.Filter;
import org.mulgara.query.filter.Inverse;
import org.mulgara.query.filter.RDFTerm;
import org.mulgara.resolver.spi.*;
/**
* TQL answer. An answer is a set of solutions, where a solution is a mapping of
* {@link Variable}s to {@link Value}s.
*
* @created 2003-01-30
* @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a>
* @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
* @copyright © 2003 <A href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</A>
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public abstract class TuplesOperations {
/** Logger. This is named after the class. */
private final static Logger logger = Logger.getLogger(TuplesOperations.class.getName());
/** The factory used to generate new {@link Tuples} instances. */
private static TuplesFactory tuplesFactory = TuplesFactory.newInstance();
/**
* Create a proposition which is always false. This is the additive identity
* of the relational algebra: appending the empty value to a tuples leaves it
* unchanged. By duality, it's also the multiplicative zero: joining the empty
* value to a tuples generates an empty result.
*
* @return the expression which is never satisfied, no matter what value any
* variable takes
*/
public static StoreTuples empty() {
return EmptyTuples.getInstance();
}
/**
* Create a proposition which is always true.
*
* This is the multiplicative
* identity of the relational algebra: joining the unconstrained value to a
* tuples leaves it unchanged. By duality, it's also the additive zero:
* appending the unconstrained value to a tuples generates an unconstrained
* result.
*
* @return the expression which is always true, for any value of any variables
*/
public static StoreTuples unconstrained() {
return UnconstrainedTuples.getInstance();
}
/**
* Assign a value to a variable, representing the binding as a tuples with one
* row and one column.
*
* @param variable The variable to bind
* @param value The value in local space to bind the variable to
* @return A Tuples with the variable bound to the given value.
*/
public static Tuples assign(Variable variable, long value) {
return (value == Tuples.UNBOUND) ? (Tuples)unconstrained()
: new Assignment(variable, value);
}
/**
* This is approximately a disjunction.
*
* @param lhs The first Tuples to be used in the result.
* @param rhs The second Tuples to be used in the result.
* @return A new Tuples containing all of the bindings from the lhs and rhs parameters.
* @throws TuplesException if the append fails
*/
public static Tuples append(Tuples lhs, Tuples rhs) throws TuplesException {
return append(Arrays.asList(new Tuples[] { lhs, rhs }));
}
/**
* Creates a new Tuples which contains all of the bindings of the Tuples in the argument list.
* If any tuples contains variables not found in the other Tuples, then those values will
* remain unbound for the bindings of those Tuples missing those variables.
* @param args A list of the Tuples to be used in the result.
* @return A Tuples containing all of the bindings from the args list.
* @throws TuplesException If the data could not be appended.
*/
public static Tuples append(List<? extends Tuples> args) throws TuplesException {
if (logger.isDebugEnabled()) logger.debug("Appending " + args);
HashSet<Variable> variableSet = new HashSet<Variable>();
List<Variable> variables = new ArrayList<Variable>();
boolean unionCompat = true;
Variable[] leftVars = null;
List<Tuples> operands = new ArrayList<Tuples>();
Iterator<? extends Tuples> i = args.iterator();
while (i.hasNext()) {
Tuples operand = i.next();
if (operand.isUnconstrained()) {
closeOperands(operands);
if (logger.isDebugEnabled()) logger.debug("Returning unconstrained from append.");
return unconstrained();
} else if (operand.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Ignoring empty append operand " + operand);
continue;
}
operands.add((Tuples)operand.clone());
Variable[] vars = operand.getVariables();
if (leftVars == null) leftVars = vars;
else unionCompat = unionCompat && Arrays.equals(leftVars, vars);
for (int j = 0; j < vars.length; j++) {
if (!variableSet.contains(vars[j])) {
variableSet.add(vars[j]);
variables.add(vars[j]);
}
}
}
if (logger.isDebugEnabled()) logger.debug("Operands after append-unification: " + operands);
if (operands.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Returning empty from append.");
return empty();
}
if (operands.size() == 1) {
if (logger.isDebugEnabled()) logger.debug("Returning singleton from append.");
return operands.get(0);
}
if (unionCompat) {
if (logger.isDebugEnabled()) {
logger.debug("Columns are union-compatible");
logger.debug("Returning OrderedAppend from Union compatible append.");
}
Tuples result = new OrderedAppend(operands.toArray(new Tuples[operands.size()]));
closeOperands(operands);
return result;
} else {
List<Tuples> projected = new ArrayList<Tuples>();
for (Tuples operand: operands) {
Tuples proj = project(operand, variables, true);
Tuples sorted = sort(proj);
projected.add(sorted);
proj.close();
operand.close();
}
if (logger.isDebugEnabled()) logger.debug("Returning OrderedAppend from Non-Union compatible append.");
Tuples result = new OrderedAppend(projected.toArray(new Tuples[projected.size()]));
closeOperands(projected);
return result;
}
}
/**
* Creates a new Tuples which contains all of the bindings of the Tuples in the argument list.
* If any tuples contains variables not found in the other Tuples, then those values will
* remain unbound for the bindings of those Tuples missing those variables.
* @param args A list of the Tuples to be used in the result.
* @return A Tuples containing all of the bindings from the args list.
* @throws TuplesException If the data could not be appended.
*/
public static Tuples unorderedAppend(List<? extends Tuples> args) throws TuplesException {
if (logger.isDebugEnabled()) logger.debug("Appending " + args);
HashSet<Variable> variableSet = new HashSet<Variable>();
List<Variable> variables = new ArrayList<Variable>();
boolean unionCompat = true;
Variable[] leftVars = null;
List<Tuples> operands = new ArrayList<Tuples>();
Iterator<? extends Tuples> i = args.iterator();
while (i.hasNext()) {
Tuples operand = i.next();
if (operand.isUnconstrained()) {
closeOperands(operands);
if (logger.isDebugEnabled()) logger.debug("Returning unconstrained from append.");
return unconstrained();
} else if (operand.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Ignoring append operand " + operand + " with rowcount = " + operand.getRowCount());
continue;
}
operands.add((Tuples)operand.clone());
Variable[] vars = operand.getVariables();
if (leftVars == null) leftVars = vars;
else unionCompat = unionCompat && Arrays.equals(leftVars, vars);
for (int j = 0; j < vars.length; j++) {
if (!variableSet.contains(vars[j])) {
variableSet.add(vars[j]);
variables.add(vars[j]);
}
}
}
if (logger.isDebugEnabled()) logger.debug("Operands after unordered-append-unification: " + operands);
if (operands.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Returning empty from unorderedAppend.");
return empty();
}
if (operands.size() == 1) {
if (logger.isDebugEnabled()) logger.debug("Returning singleton from unorderedAppend.");
return operands.get(0);
}
if (unionCompat) {
if (logger.isDebugEnabled()) {
logger.debug("Columns are union-compatible");
logger.debug("Returning OrderedAppend from Union compatible append.");
}
Tuples result = new UnorderedAppend(operands.toArray(new Tuples[operands.size()]));
closeOperands(operands);
return result;
} else {
List<Tuples> projected = new ArrayList<Tuples>();
for (Tuples operand: operands) {
Tuples proj = project(operand, variables, false);
projected.add(proj);
operand.close();
}
if (logger.isDebugEnabled()) logger.debug("Returning OrderedAppend from Non-Union compatible unorderedAppend.");
Tuples result = new UnorderedAppend(projected.toArray(new Tuples[projected.size()]));
closeOperands(projected);
return result;
}
}
/**
* Creates a new Tuples which contains all of the bindings of the Tuples in the argument list.
* All tuples must have an identical pattern, and come directly from the store.
* @param args A list of the Tuples directly backed by the store to be used in the result.
* @return A StoreTuples containing all of the bindings from the args list.
* @throws TuplesException If the data could not be appended.
*/
public static StoreTuples appendCompatible(List<StoreTuples> args) throws TuplesException {
if (logger.isDebugEnabled()) logger.debug("Compatible append of " + args);
Variable[] vars = null;
List<StoreTuples> operands = new ArrayList<StoreTuples>();
for (StoreTuples arg: args) {
// test for empty or unconstrained data
if (arg.isUnconstrained()) {
closeOperands(operands);
if (logger.isDebugEnabled()) logger.debug("Returning unconstrained from append.");
return unconstrained();
} else if (arg.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Ignoring empty append operand " + arg);
continue;
}
operands.add((StoreTuples)arg.clone());
// test for tuples compatibility
if (vars == null) vars = arg.getVariables();
else if (!Arrays.equals(vars, arg.getVariables())) {
throw new IllegalArgumentException("Incompatible arguments to appendCompatible");
}
}
if (logger.isDebugEnabled()) logger.debug("Operands after compatible-append-unification: " + operands);
if (operands.isEmpty()) {
if (logger.isDebugEnabled()) logger.debug("Returning empty from append.");
return empty();
}
if (operands.size() == 1) {
if (logger.isDebugEnabled()) logger.debug("Returning singleton from append.");
return operands.get(0);
}
StoreTuples result = new OrderedStoreAppend(operands.toArray(new StoreTuples[operands.size()]));
closeOperands(operands);
return result;
}
/**
* Convenience method for doing a binary {@link #join(List)}.
* @param lhs The first argument to be joined.
* @param rhs The second argument to be joined.
* @return A Tuples containing the conjunction of lhs and rhs.
*/
public static Tuples join(Tuples lhs, Tuples rhs) throws TuplesException {
return join(Arrays.asList(new Tuples[] { lhs, rhs }));
}
/**
* This is approximately a conjunction. Returns a set of bindings containing all the variables
* from both parameters. The only bindings returned are those where all the matching variables
* in each argument are bound to the same values.
* @param args The Tuples to be joined together.
* @return A Tuples containing the conjunction of all the arguments.
*/
public static Tuples join(List<? extends Tuples> args) throws TuplesException {
try {
if (logger.isDebugEnabled()) logger.debug(printArgs("Flattening args:", args));
List<Tuples> operands = flattenOperands(args);
if (logger.isDebugEnabled()) logger.debug(printArgs("Unifying args: ", operands));
List<Tuples> unified = unifyOperands(operands);
if (logger.isDebugEnabled()) logger.debug(printArgs("Sorting args:", unified));
List<Tuples> sorted = sortOperands(unified);
if (logger.isDebugEnabled()) logger.debug(printArgs("Preparing result: ", sorted));
switch (sorted.size()) {
case 0:
if (logger.isDebugEnabled()) logger.debug("Short-circuit empty");
return empty();
case 1:
if (logger.isDebugEnabled()) logger.debug("Short-circuit singleton");
return sorted.get(0);
default:
if (logger.isDebugEnabled()) logger.debug("return UnboundJoin");
Tuples result = new UnboundJoin(sorted.toArray(new Tuples[sorted.size()]));
closeOperands(sorted);
return result;
}
} catch (RuntimeException re) {
logger.warn("RuntimeException thrown in join", re);
throw re;
} catch (TuplesException te) {
logger.warn("TuplesException thrown in join", te);
throw te;
}
}
/**
* This is approximately a subtraction. The subtrahend is matched against the minuend in the same
* way as a conjunction, and the matching lines removed from the minuend. The remaining lines in
* the minuend are the result. Parameters are not closed during this operation.
* @param minuend The tuples to subtract from.
* @param subtrahend The tuples to match against the minuend for removal.
* @return The contents from the minuend, excluding those rows which match against the subtrahend.
* @throws TuplesException If there are no matching variables between the minuend and subtrahend.
*/
public static Tuples subtract(Tuples minuend, Tuples subtrahend) throws TuplesException {
try {
if (logger.isDebugEnabled()) logger.debug("subtracting " + subtrahend + " from " + minuend);
// get the matching columns
Set<Variable> matchingVars = getMatchingVars(minuend, subtrahend);
if (matchingVars.isEmpty()) {
// check to see if the subtrahend is empty
if (subtrahend.getVariables().length == 0 || minuend.getVariables().length == 0) {
return (Tuples)minuend.clone();
}
throw new TuplesException("Unable to subtract: no common variables.");
}
// double check that the variables are not equal
if (subtrahend.isEmpty() || minuend.isEmpty()) {
logger.debug("Found an empty Tuples with bound variables");
return (Tuples)minuend.clone();
}
// reorder the subtrahend as necessary
Tuples sortedSubtrahend;
// check if there are variables which should not be considered when sorting
if (checkForExtraVariables(subtrahend, matchingVars)) {
// yes, there are extra variables
logger.debug("removing extra variables not needed in subtraction");
// project out the extra variables (sorting happens in projection)
sortedSubtrahend = project(subtrahend, new ArrayList<Variable>(matchingVars), true);
} else {
// there were no extra variables in the subtrahend
logger.debug("All variables needed");
// check if the data is already sorted
sortedSubtrahend = (null == subtrahend.getComparator()) ? sort(subtrahend) : subtrahend;
}
// return the difference
try {
return new Difference(minuend, sortedSubtrahend);
} finally {
if (sortedSubtrahend != subtrahend) sortedSubtrahend.close();
}
} catch (RuntimeException re) {
logger.warn("RuntimeException thrown in subtraction", re);
throw re;
} catch (TuplesException te) {
logger.warn("TuplesException thrown in subtraction", te);
throw te;
}
}
/**
* Does a left-outer-join between two tuples. Parameters are not closed during this
* operation.
* @param standard The standard pattern that appears in all results.
* @param optional The optional pattern that may or may not be bound in each result
* @param context The query evaluation context to evaluate any nested fitlers in.
* @return A Tuples containing variables from both parameters. All variables from
* <em>standard</em> will be bound at all times. Variables from <em>optional</em>
* may or may not be bound.
*/
public static Tuples optionalJoin(Tuples standard, Tuples optional, Filter filter, QueryEvaluationContext context) throws TuplesException {
try {
if (logger.isDebugEnabled()) logger.debug("optional join of " + standard + " optional { " + optional + " }");
// get the matching columns
Set<Variable> matchingVars = getMatchingVars(standard, optional);
// check for empty parameters
if (logger.isDebugEnabled() && standard.getRowCardinality() == Cursor.ZERO) logger.debug("Nothing to the left of an optional");
// Checks if there is nothing on the RHS of the optional join
if (optional.isEmpty() || optional.getRowCardinality() == 0) {
// need to return standard, projected out to the extra variables
if (optional.getNumberOfVariables() == 0) {
// This may be empty due to having zero rows (since the columns are truncated in this case)
return (Tuples)standard.clone();
} else {
return project(standard, optional.getVariables());
}
}
// If the Optional clause does not have matching variables with the LHS
// then this is the equivalent to a normal join as a cartesian product
if (matchingVars.isEmpty()) {
// get the cartesian product
Tuples filteredProduct = filter(join(standard, optional), filter, context);
Tuples invertedStd;
if (intersects(optional.getVariables(), filter.getVariables())) {
invertedStd = new LeftFiltered(standard, optional, filter, context);
} else {
// this is correct, since 'optional' is independent of the filter
invertedStd = filter(standard, new Inverse(filter), context);
}
return append(filteredProduct, invertedStd);
}
// check if there are variables which should not be considered when sorting
if (!checkForExtraVariables(optional, matchingVars)) {
// there were no extra variables in the optional
logger.debug("All variables needed");
// if all variables match, then the result is the same as the LHS
return (Tuples)standard.clone();
}
// yes, there are extra variables
if (logger.isDebugEnabled()) logger.debug("sorting on the common variables: " + matchingVars);
// re-sort the optional according to the matching variables
// reorder the optional as necessary
Tuples sortedOptional = reSort(optional, new ArrayList<Variable>(matchingVars));
// return the difference
try {
return new LeftJoin(standard, sortedOptional, filter, context);
} finally {
sortedOptional.close();
}
} catch (RuntimeException re) {
logger.warn("RuntimeException thrown in optional", re);
throw re;
} catch (TuplesException te) {
logger.warn("TuplesException thrown in optional", te);
throw te;
}
}
/**
* Convenience method to see if an array and a collection share any elements in common.
* @param <T> The type of elements in both containers.
* @param lhs An array of elements.
* @param rhs A collection of elements.
* @return <code>true</code> iff there is 1 or more elements present in both containers.
*/
private static final <T> boolean intersects(T[] lhs, Collection<T> rhs) {
for (T elt: lhs) if (rhs.contains(elt)) return true;
return false;
}
/**
* Flattens any nested joins to allow polyadic join operations.
* @param operands A list of Tuples which may in turn be nested operations.
* @return A flattened list of flattened Tuples.
*/
private static List<Tuples> flattenOperands(List<? extends Tuples> operands) throws TuplesException {
List<Tuples> result = new ArrayList<Tuples>();
for (Tuples operand: operands) result.addAll(flattenOperand(operand));
return result;
}
/**
* Flattens a Tuples into a list of Tuples. This means that joins will be expanded into their components.
* @param operand The Tuples to flatten
* @return A flattened list.
* @throws TuplesException If the Tuples could not be accessed.
*/
private static List<Tuples> flattenOperand(Tuples operand) throws TuplesException {
List<Tuples> operands = new ArrayList<Tuples>();
if (operand instanceof UnboundJoin) {
for (Tuples op: operand.getOperands()) operands.add((Tuples)op.clone());
} else {
operands.add((Tuples)operand.clone());
}
return operands;
}
/**
* Unifies bound variables in operands.
* Prepends a LiteralTuples containing constrained variable bindings.
* If any operand returns 0-rows returns EmptyTuples.
* @param operands List of Tuples to unify. Consumed by this function.
* @return List of operands remaining after full unification.
*/
private static List<Tuples> unifyOperands(List<Tuples> operands) throws TuplesException {
Map<Variable,Long> bindings = new HashMap<Variable,Long>();
if (!bindSingleRowOperands(bindings, operands)) {
closeOperands(operands);
logger.debug("Returning empty due to shortcircuiting initial bindSingleRowOperands");
return new ArrayList<Tuples>(Collections.singletonList(empty()));
}
List<Tuples> result = extractNonReresolvableTuples(operands);
// operands is now effectively a List<ReresolvableResolution>
List<ReresolvableResolution> reresolved;
do {
reresolved = resolveNewlyBoundFreeNames(operands, bindings);
if (!bindSingleRowOperands(bindings, reresolved)) {
closeOperands(operands);
closeOperands(result);
closeOperands(reresolved);
logger.debug("Returning empty due to shortcircuiting progressive bindSingleRowOperands");
// wrap in an Array list to convert the generic type
return new ArrayList<Tuples>(Collections.singletonList(empty()));
}
operands.addAll(reresolved);
} while (reresolved.size() != 0);
result.addAll(operands);
result.add(createTuplesFromBindings(bindings));
return result;
}
/**
* Extracts all bound names from workingSet into bindings.
*/
private static boolean bindSingleRowOperands(Map<Variable,Long> bindings, List<? extends Tuples> workingSet) throws TuplesException {
Iterator<? extends Tuples> iter = workingSet.iterator();
while (iter.hasNext()) {
Tuples tuples = iter.next();
if (tuples.isEmpty()) return false;
switch ((int)tuples.getRowCardinality()) {
case Cursor.ZERO:
return false;
case Cursor.ONE:
Variable[] vars = tuples.getVariables();
tuples.beforeFirst();
if (tuples.next()) {
for (int i = 0; i < vars.length; i++) {
Long value = new Long(tuples.getColumnValue(tuples.getColumnIndex(vars[i])));
Long oldValue = (Long)bindings.put(vars[i], value);
if (oldValue != null && !value.equals(oldValue)) return false;
}
} else {
// This should not happen.
// If the call to getRowCardinality returns > 0 then beforeFirst,
// and then next should return true too.
logger.error("No rows but getRowCardinality returned Cursor.ONE: (class=" +
tuples.getClass().getName() + ") " + tuples.toString());
throw new AssertionError("No rows but getRowCardinality returned Cursor.ONE");
}
iter.remove();
tuples.close();
break;
case Cursor.MANY:
continue;
default:
throw new TuplesException("getRowCardinality() returned other than ZERO, ONE, or MANY");
}
}
return true;
}
private static List<Tuples> extractNonReresolvableTuples(List<Tuples> workingSet) throws TuplesException {
List<Tuples> nonReresolvable = new ArrayList<Tuples>(workingSet.size());
Iterator<Tuples> iter = workingSet.iterator();
while (iter.hasNext()) {
Tuples operand = iter.next();
if (!(operand instanceof ReresolvableResolution)) {
nonReresolvable.add(operand);
iter.remove();
}
}
return nonReresolvable;
}
/**
* Compares the free names in the working-set against the current bindings
* and resolves any constraints found with bindings.
* @param workingSet A set of ReresolvableResolution, though it will be represented as a set of Tuples
* @return List of ConstrainedTuples resulting from any resolutions required.
*/
private static List<ReresolvableResolution> resolveNewlyBoundFreeNames(List<Tuples> workingSet, Map<Variable,Long> bindings) throws TuplesException {
List<ReresolvableResolution> reresolved = new ArrayList<ReresolvableResolution>();
Iterator<Tuples> iter = workingSet.iterator();
while (iter.hasNext()) {
ReresolvableResolution tuples = (ReresolvableResolution)iter.next();
ReresolvableResolution updated = tuples.reresolve(bindings);
if (updated != null) {
reresolved.add(updated);
tuples.close();
iter.remove();
}
}
return reresolved;
}
private static Tuples createTuplesFromBindings(Map<Variable,Long> bindings) throws TuplesException {
if (bindings.isEmpty()) return unconstrained();
Set<Variable> keys = bindings.keySet();
Variable[] vars = keys.toArray(new Variable[keys.size()]);
long[] values = new long[vars.length];
for (int i = 0; i < values.length; i++) values[i] = bindings.get(vars[i]);
LiteralTuples tuples = new LiteralTuples(vars);
tuples.appendTuple(values);
return tuples;
}
/**
* Calls close on all tuples in operands list.
*/
private static void closeOperands(List<? extends Tuples> operands) throws TuplesException {
for (Tuples op: operands) op.close();
}
/**
* Sorts operands by weighted row count in-place.
* Each row count is discounted by the number of free-names bound to its left.
* Weighted-row-count = row-count ^ (free-after-binding / free-before-binding)
*/
private static List<Tuples> sortOperands(List<Tuples> operands) throws TuplesException {
Set<Variable> boundVars = new HashSet<Variable>();
List<Tuples> result = new ArrayList<Tuples>();
while (!operands.isEmpty()) {
Tuples bestTuples = removeBestTuples(operands, boundVars);
DefinablePrefixAnnotation definable =
(DefinablePrefixAnnotation)bestTuples.getAnnotation(DefinablePrefixAnnotation.class);
if (definable != null) definable.definePrefix(boundVars);
// Add all variables that don't contain UNBOUND to boundVars set.
// Note: the inefficiency this introduces for distributed results
// can only be eliminated by propagating isColumnEverUnbound through Answer.
// Note: this is required to ensure that a subsequent operand will not
// rely on this variable when selecting an index as if it is UNBOUND in a
// left-operand it becomes unprefixed.
Variable[] vars = bestTuples.getVariables();
for (int i = 0; i < vars.length; i++) {
if (!bestTuples.isColumnEverUnbound(i)) boundVars.add(vars[i]);
}
result.add(bestTuples);
}
return result;
}
// FIXME: Method too long. Refactor.
private static Tuples removeBestTuples(List<Tuples> operands, Set<Variable> boundVars) throws TuplesException {
ListIterator<Tuples> iter = operands.listIterator();
Tuples minTuples = null;
double minRowCount = Double.MAX_VALUE;
int minIndex = -1;
assert(iter.hasNext());
logger.debug("removeBestTuples");
while (iter.hasNext()) {
Tuples tuples = (Tuples)iter.next();
if (logger.isDebugEnabled()) logger.debug("tuples: " + tuplesSummary(tuples));
// Check tuples meets any mandatory left bindings.
MandatoryBindingAnnotation bindingRequirements =
(MandatoryBindingAnnotation)tuples.getAnnotation(MandatoryBindingAnnotation.class);
if (bindingRequirements != null && !bindingRequirements.meetsRequirement(boundVars)) continue;
Variable[] vars = tuples.getVariables();
int numLeftBindings = calculateNumberOfLeftBindings(tuples, boundVars);
if (logger.isDebugEnabled()) logger.debug("numLeftBindings: " + numLeftBindings);
// Basic formula assumes uniform distribution. So number of rows is the
// product of the length of each variable taken seperately, hence expected
// row count for n from m bindings is expected(0 from m)**((m - n) / m).
// This fails to consider the effect on performance of worst case so we
// incorporate weighted terms to allow for possible skew on each column.
// We assume a reducing probability of compounded failure so weight each
// term by 100**term (0-indexed), this is a fudge factor that needs proper
// analysis.
double weightedRowCount = 0.0;
for (int weight = 0; weight < numLeftBindings + 1; weight++) {
double term = vars.length > 0
? Math.pow(tuples.getRowExpectedCount(), (double)(vars.length - (numLeftBindings - weight)) / vars.length)
: tuples.getRowExpectedCount();
weightedRowCount += term / Math.pow(100.0, weight);
}
if (logger.isDebugEnabled()) {
logger.debug("weightedRowCount: " + weightedRowCount);
logger.debug("minRowCount: " + minRowCount);
}
if (weightedRowCount < minRowCount) {
minRowCount = weightedRowCount;
minTuples = tuples;
minIndex = iter.nextIndex() - 1;
}
}
if (minTuples == null) {
if (logger.isDebugEnabled()) {
logger.debug("Unable to meet ordering constraints with bindings: " + boundVars);
for (Tuples op: operands) logger.debug(" Operand: " + tuplesSummary(op));
}
throw new TuplesException("Unable to meet ordering constraints");
}
if (logger.isDebugEnabled()) logger.debug("Selected: " + tuplesSummary(minTuples) + " with weightedRowCount: " + minRowCount);
operands.remove(minIndex);
return minTuples;
}
private static int calculateNumberOfLeftBindings(Tuples tuples, Set<Variable> boundVars) throws TuplesException {
int numLeftBindings = 0;
Variable[] vars = tuples.getVariables();
// If the tuples supports defining a prefix then
if (tuples.getAnnotation(DefinablePrefixAnnotation.class) != null) {
for (int i = 0; i < vars.length; i++) {
if (boundVars.contains(vars[i])) numLeftBindings++;
}
} else {
for (int i = 0; i < vars.length; i++) {
if (boundVars.contains(vars[i])) numLeftBindings++;
else break;
}
}
return numLeftBindings;
}
/**
* Relational projection. This eliminates any columns not in the specified
* list, and eliminates any duplicate rows that result.
*
* @param tuples The original tuples to project
* @param variableList the list of {@link Variable}s to project on
* @param distinct indicates that duplicate rows should be removed
* @return The tuples, with only the required columns, and possibly with duplicates removed
* @throws TuplesException if the projection operation fails
*/
public static Tuples project(Tuples tuples, List<Variable> variableList, boolean distinct) throws TuplesException {
try {
boolean noVariables = (variableList == null) || (variableList.size() == 0);
if (tuples.isUnconstrained() || (noVariables && tuples.getRowCardinality() != Cursor.ZERO)) {
return unconstrained();
} else if (tuples.isEmpty()) {
return empty();
}
if (logger.isDebugEnabled()) logger.debug("Projecting to " + variableList);
// Perform the actual projection
Tuples originalTuples = tuples;
tuples = new UnorderedProjection(tuples, variableList);
assert tuples != originalTuples;
// Test whether creating an unordered projects has removed variables.
if (tuples.isUnconstrained()) {
tuples.close();
return TuplesOperations.unconstrained();
}
// Eliminate any duplicates
if (distinct) {
Tuples oldTuples = tuples;
tuples = removeDuplicates(tuples);
assert tuples != oldTuples;
if (tuples == oldTuples) {
logger.warn("removeDuplicates does not change the underlying tuples");
} else {
oldTuples.close();
}
assert tuples.hasNoDuplicates();
}
return tuples;
} catch (TuplesException e) {
throw new TuplesException("Couldn't perform projection", e);
}
}
/**
* Project a tuples out to extra columns that will always be unbound.
* @param tuples The original tuples to expand.
* @param expansionVars The new set of variables to expand to.
* These may intersect the existing variables, but this is unexpected.
* @return A Tuples with the original bindings, plus any specified new columns that will be unbound.
*/
public static Tuples project(Tuples tuples, Variable[] expansionVars) {
if (tuples == null) throw new IllegalArgumentException("Projection on Null \"tuples\"");
if (expansionVars == null) throw new IllegalArgumentException("Projection with Null expansion variables");
// test if no expansion, and short circuit if there isn't one
if (expansionVars.length == 0) return (Tuples)tuples.clone();
// test for overlapping variables
Variable[] opVars = tuples.getVariables();
List<Variable> newVars = new ArrayList<Variable>();
for (Variable v: expansionVars) newVars.add(v);
for (Variable v: opVars) newVars.remove(v);
// test again for no expansion, and short circuit if there isn't one
if (newVars.isEmpty()) return (Tuples)tuples.clone();
return new ExpandedProjection(tuples, newVars);
}
/**
* Creates a new restriction tuples, based on a normal Tuples and a restriction predicate.
* @param tuples The tuples to restrict.
* @param pred The predicate describing the restriction.
* @return A new Tuples whose bindings only match the restriction.
* @throws TuplesException If the Tuples could not be accessed.
*/
public static Tuples restrict(Tuples tuples, RestrictPredicate pred) throws TuplesException {
return new RestrictionTuples(tuples, pred);
}
/**
* Filter a Tuples according to a {@link org.mulgara.query.filter.Filter} test.
* @param tuples The Tuples to be filtered.
* @param filter The Filter to apply to the tuples.
* @param context The context in which the Filter is to be resolved. This can go beyond
* what has already been determined for the tuples parameter.
* @return A new Tuples which is a subset of the provided Tuples.
* @throws IllegalArgumentException If tuples is <code>null</code>
*/
public static Tuples filter(Tuples tuples, Filter filter, QueryEvaluationContext context) {
// The incoming context needs to be updated for the tuples, so that clones are not inadvertantly used
return new FilteredTuples(tuples, filter, context);
}
/**
* Assign a variable to an expression, with variables coming from a provided tuples.
* @param tuples The Tuples to provide the variable context.
* @param var The variable to be bound.
* @param expr The expression to bind the variable to.
* @param context The context in which the expression is to be resolved. This can go beyond
* what has already been determined for the tuples parameter.
* @return A new Tuples which expands the provided Tuples to include the new variable.
* @throws IllegalArgumentException If tuples is <code>null</code>
*/
public static Tuples assign(Tuples tuples, Variable var, RDFTerm expr, QueryEvaluationContext context) {
return new LetTuples(tuples, var, expr, context);
}
/**
* Sort into default order, based on the columns and local node numbers.
* @param tuples the tuples to sort
* @return A new Tuples with the bindings sorted.
* @throws TuplesException if the sorting can't be accomplished
*/
public static Tuples sort(Tuples tuples) throws TuplesException {
if (tuples.getComparator() == null) {
if (tuples.isUnconstrained()) {
return TuplesOperations.unconstrained();
} else if (tuples.isEmpty()) {
tuples = empty();
} else {
if (logger.isDebugEnabled()) logger.debug("Sorting " + tuples.getRowCount() + " rows");
tuples = tuplesFactory.newTuples(tuples);
assert tuples.getComparator() != null;
}
if (logger.isDebugEnabled()) logger.debug("Sorted " + tuples.getRowCount() + " rows");
return tuples;
} else {
return (Tuples) tuples.clone();
}
}
/**
* Sort into a specified order.
*
* @param tuples the tuples to sort
* @param rowComparator the ordering
* @return A Tuples with bindings sorted according to the rowComparator.
* @throws TuplesException if the sorting can't be accomplished
*/
public static Tuples sort(Tuples tuples, RowComparator rowComparator) throws TuplesException {
if (!rowComparator.equals(tuples.getComparator())) {
tuples = tuplesFactory.newTuples(tuples, rowComparator);
if (logger.isDebugEnabled()) logger.debug("Sorted: " + tuples + " (using supplied row comparator)");
return tuples;
} else {
return (Tuples) tuples.clone();
}
}
/**
* Sort into an order given by the list of variables. The parameter is not closed, and this
* method will create and return a new tuples.
*
* @param tuples The parameter to sort. This will be not be closed.
* @param variableList the list of {@link Variable}s to sort by
* @return A {@link Tuples} that meets the sort criteria. This may be the original tuples parameter.
* @throws TuplesException if the sort operation fails
*/
public static Tuples reSort(Tuples tuples, List<Variable> variableList) throws TuplesException {
try {
// if there is nothing to sort on, then tuples meets the criteria
if ((variableList == null) || (variableList.size() == 0)) return (Tuples)tuples.clone();
// if there is nothing to sort, then just return that nothing
if (tuples.isUnconstrained()) {
if (logger.isDebugEnabled()) logger.debug("Returning Unconstrained Tuples.");
return TuplesOperations.unconstrained();
} else if (tuples.isEmpty()) {
return empty();
}
// initialise the mapping of column names to tuples columns.
int[] varMap = new int[variableList.size()];
boolean sortNeeded = false;
// iterate over the variables to do the mapping
for (int varCol = 0; varCol < variableList.size(); varCol++) {
Variable var = variableList.get(varCol);
// get the index of the variable in the tuples
int ti = tuples.getColumnIndex(var);
// check that it is within the prefix columns. If not, then sorting is needed
if (ti >= varMap.length) sortNeeded = true;
// map the tuples index of the variable to the column index
varMap[varCol] = ti;
}
if (!sortNeeded) {
if (logger.isDebugEnabled()) logger.debug("No sort needed on tuples.");
return (Tuples)tuples.clone();
}
if (logger.isDebugEnabled()) logger.debug("Sorting on " + variableList);
// append the remaining variables to the list of variables to sort on
List<Variable> fullVarList = new ArrayList<Variable>(variableList);
for (Variable v: tuples.getVariables()) {
if (!variableList.contains(v)) fullVarList.add(v);
}
assert fullVarList.containsAll(Arrays.asList(tuples.getVariables()));
// Reorder the columns - the projection here does not remove any columns
Tuples projectedTuples = new UnorderedProjection(tuples, fullVarList);
assert projectedTuples != tuples;
// Perform the actual sort
Tuples sortedTuples = tuplesFactory.newTuples(projectedTuples);
assert sortedTuples != projectedTuples;
projectedTuples.close();
return sortedTuples;
} catch (TuplesException e) {
throw new TuplesException("Couldn't perform projection", e);
}
}
/**
* Truncate a tuples to have no more than a specified number of rows. This
* method removes rows from the end of the tuples; to remove rows from the
* start of the tuples, the {@link #offset} method can be used. If the limit
* is larger than number of rows, the result is unchanged.
*
* @param tuples the instance to limit
* @param rowCount the number of leading rows to retain
* @return the truncated tuples
* @throws TuplesException If there was an error accessing the Tuples.
*/
public static Tuples limit(Tuples tuples, long rowCount) throws TuplesException {
return new LimitedTuples((Tuples) tuples.clone(), rowCount);
}
/**
* If a tuples is virtual, evaluate and store it.
*
* @param tuples the instance to materialize
* @return A set of Tuples with any virtual bindings converted into actual bindings.
* @throws TuplesException If there was an error evaluating the virtual bindings
*/
public static Tuples materialize(Tuples tuples) throws TuplesException {
if (tuples.isMaterialized()) {
return (Tuples)tuples.clone();
} else {
return tuplesFactory.newTuples(tuples);
}
}
/**
* Skip a specified number of rows from the beginning of a tuples. This method
* removes rows from the beginning of the tuples; to remove rows from the end
* of the tuples, the {@link #limit} method can be used. If more rows are
* removed than are present, an empty tuples is produced.
*
* @param tuples the instance to offset
* @param rowCount the number of leading rows to remove
* @return the remaining rows, if any
* @throws TuplesException If there was an error accessing the tuples.
*/
public static Tuples offset(Tuples tuples, long rowCount) throws TuplesException {
return new OffsetTuples((Tuples)tuples.clone(), rowCount);
}
/**
* Filter out duplicate rows.
*
* @param tuples The tuples to filter.
* @return An equivalent Tuples, but with duplicate bindings removed.
* @throws TuplesException If there was an error accessing the tuples.
*/
public static Tuples removeDuplicates(Tuples tuples) throws TuplesException {
if (tuples.hasNoDuplicates()) {
if (logger.isDebugEnabled()) logger.debug("Didn't need to remove duplicates");
return (Tuples)tuples.clone();
}
if (logger.isDebugEnabled()) logger.debug("Removing duplicates");
if (tuples.getComparator() == null) {
Tuples oldTuples = tuples;
tuples = sort(tuples);
assert tuples != oldTuples;
// leave the original tuples. We may not touch it.
if (!tuples.hasNoDuplicates()) {
oldTuples = tuples;
tuples = new DistinctTuples(tuples);
assert tuples != oldTuples;
oldTuples.close();
}
return tuples;
} else {
if (logger.isDebugEnabled()) logger.debug("Already sorted: " + tuples);
Tuples result = new DistinctTuples(tuples);
return result;
}
}
public static String formatTuplesTree(Tuples tuples) {
return indentedTuplesTree(tuples, "").toString();
}
public static StringBuilder tuplesSummary(Tuples tuples) {
StringBuilder buff = new StringBuilder();
buff.append(tuples.getClass().toString());
buff.append("<" + System.identityHashCode(tuples) + ">");
buff.append("[");
if (!tuples.isMaterialized()) buff.append("~");
else buff.append("=");
try {
buff.append(tuples.getRowUpperBound());
buff.append(" (~").append(tuples.getRowExpectedCount());
buff.append(")]");
} catch (TuplesException et) {
buff.append(et.toString()).append("]");
}
buff.append(" {");
Variable[] vars = tuples.getVariables();
if (vars.length > 0) {
buff.append(vars[0].toString());
for (int i = 1; i < vars.length; i++) buff.append(", " + vars[i].toString());
}
buff.append("}");
try {
MandatoryBindingAnnotation mba = (MandatoryBindingAnnotation)tuples.getAnnotation(MandatoryBindingAnnotation.class);
if (mba != null) buff.append(" :: MBA{ " + mba.requiredVariables() + " }");
} catch (TuplesException et) {
logger.error("Failed to obtain annotation", et);
}
return buff;
}
/**
* Calculates a consistent hash code for a tuples.
* @param t The tuples to get the hash code for.
* @return The hash code value.
*/
public static int hashCode(Tuples t) {
t = (Tuples)t.clone();
int result = t.getVariables().hashCode();
try {
t.beforeFirst();
int cols = t.getNumberOfVariables();
while (t.next()) {
for (int i = 0; i < cols; i++) {
long val = t.getColumnValue(i);
result ^= (int)(val ^ (val >>> 32));
}
}
} catch (TuplesException e) {
throw new RuntimeException(e.toString(), e);
} finally {
try {
if (t != null) t.close();
} catch (TuplesException ex) {
throw new RuntimeException(ex.toString(), ex);
}
}
return result;
}
/**
* Find the list of variables which appear in both the lhs and rhs tuples.
*
* @param lhs The first tuples to check the variables of.
* @param rhs The second tuples to check the variables of.
* @return A set containing all of the shared variables from lhs and rhs.
*/
static Set<Variable> getMatchingVars(Tuples lhs, Tuples rhs) {
// get all the variables from the lhs
Set<Variable> commonVarSet = new HashSet<Variable>(Arrays.asList(lhs.getVariables()));
// get all the variables from the rhs
Set<Variable> rhsVars = new HashSet<Variable>(Arrays.asList(rhs.getVariables()));
// find the intersecting set of variables
commonVarSet.retainAll(rhsVars);
return commonVarSet;
}
/**
* Compares a tuples' variables to a set of variables.
*
* @param tuples The tuples to check the variables of.
* @param vars The variables to check for.
* @return <code>true</code> when all of the tuples' variables are in <code>vars</code>.
*/
private static boolean checkForExtraVariables(Tuples tuples, Collection<Variable> vars) {
// get the variable list
Variable[] sv = tuples.getVariables();
for (int i = 0; i < sv.length; i++) {
if (!vars.contains(sv[i])) return true; // extra variable
}
return false;
}
/**
* Convert a list of Tuples into a string.
* @param header The header for the returned string.
* @param args The tuples to print.
* @return The string containing the full list of tuples.
*/
private static String printArgs(String header, List<? extends Tuples> args) {
StringBuilder buff = new StringBuilder(header);
buff.append("[");
boolean first = true;
for (Tuples arg: args) {
if (!first) {
buff.append(", ");
first = false;
}
buff.append(tuplesSummary(arg));
}
buff.append("]");
return buff.toString();
}
private static StringBuilder indentedTuplesTree(Tuples tuples, String indent) {
StringBuilder buff = new StringBuilder();
buff.append("\n").append(indent).append("(").append(tuplesSummary(tuples));
for (Tuples t: tuples.getOperands()) buff.append(" ").append(indentedTuplesTree(t, indent + ". "));
buff.append(")");
return buff;
}
}