/* * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is the Kowari Metadata Store. * * The Initial Developer of the Original Code is Plugged In Software Pty * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002 * Plugged In Software Pty Ltd. All Rights Reserved. * * Contributor(s): * Various bug fixes copyright Netymon Pty Ltd (info@netymon.com) under * contract to The Topaz Foundation (info@topazproject.org) * * [NOTE: The text of this Exhibit A may differ slightly from the text * of the notices in the Source Code files of the Original Code. You * should use the text of this Exhibit A rather than the text found in the * Original Code Source Code for Your Modifications.] * */ package org.mulgara.store.tuples; // Java 2 standard packages import java.math.BigInteger; import java.util.*; // Third party packages import org.apache.log4j.Logger; // Locally written packages import org.mulgara.query.TuplesException; import org.mulgara.query.Variable; import org.mulgara.store.tuples.AbstractTuples; /** * Logical conjunction implemented as a relational join operation. * * The join is performed using a series of nested loops, with the * lower-indexed elements of the {@link #operands} array forming the outer loops * and the higher-indexed forming the inner loops. If the sort ordering of the * operand columns is such that it can be taken advantage of, this can be very * efficient. If not, it degrades to the equivalent of an inner-outer loop * join (Cartesian product). This class is not responsible for optimizing the * order of the operands presented to it; that responsibility falls to * {@link TuplesOperations#join}. * * @created 2003-09-01 * * @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a> * * @version $Revision: 1.10 $ * * @modified $Date: 2005/03/07 19:42:40 $ * * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A> * * @copyright © 2003 <A href="http://www.PIsoftware.com/">Plugged In * Software Pty Ltd</A> * * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a> */ public class UnboundJoin extends AbstractTuples { /** Logger. */ private static final Logger logger = Logger.getLogger(UnboundJoin.class.getName()); /** * Version of {@link #operandBinding}} including only columns to the left of * the first unbound column. */ protected long[][] operandBindingPrefix; /** * For each column of the joined result, which operand contains the first * occurrence of that variable. */ protected int[] mapOperand; /** * For each column of the joined result, which column of the operand * determined by {@link #mapOperand} contains the first occurrence of that * variable. */ protected int[] mapColumn; /** * Magic value within the {@link #fooOperand} array, indicating that a column * is bound to one of the columns of the <var>prefix</var> parameter to * {@link #next}. */ protected static final int PREFIX = -1; /** * For each column of each operand, which operand contains the first * occurrence of that variable, or {@link #PREFIX} if the prefix specified * to {@link #next} contains the occurrence. */ protected int[][] fooOperand; /** * For each column of each operand, which column of the operand determined by * {@link #fooOperand} contains the first occurrence of that variable, or if * the corresponding value of {@link #fooOperand} is {@link #PREFIX}, which * column of the prefix specified to {@link #next} contains the occurrence. */ protected int[][] fooColumn; /** * Whether each column of this instance might contain {@link #UNBOUND} rows. */ protected boolean[] columnEverUnbound; /** * The propositions to conjoin. */ protected Tuples[] operands; /** * The required values of the columns of each operand. A value of {@link * Tuples#UNBOUND} indicates that the column is free to vary. */ protected long[][] operandBinding; /** * For each operand, for each variable, which output column contains the same variable. */ protected int[][] operandOutputMap; /** * Do any of the operands with variables matching this output variable contain UNBOUND? */ protected boolean[][] columnOperandEverUnbound; /** * Flag indicating that the cursor is before the first row. */ protected boolean isBeforeFirst = true; /** * Flag indicating that the cursor is after the last row. */ protected boolean isAfterLast = false; /** * Do any of the operands contain duplicates. Used to shortcircuit hasNoDuplicates. */ protected boolean operandsContainDuplicates; /** * The prefix of the index. */ protected long[] prefix = null; /** * The variable groups formed in this operation. If more than one there will be a cartesian product in the result. */ protected List<VarGroup> varGroups = null; /** * Conjoin a list of propositions. * * @param operands the propositions to conjoin; the order affects efficiency, * but not the logical value of the result * @throws IllegalArgumentException if <var>operands</var> is * <code>null</code> * @throws TuplesException EXCEPTION TO DO */ UnboundJoin(Tuples[] operands) throws TuplesException { // Validate "operands" parameter if (operands == null) { throw new IllegalArgumentException("Null \"operands\" parameter"); } // Initialize fields this.operands = clone(operands); operandBinding = new long[operands.length][]; operandBindingPrefix = new long[operands.length][]; this.operandsContainDuplicates = false; for (int i = 0; i < operands.length; i++) { // Debug if (logger.isDebugEnabled()) { logger.debug("Operands " + i + " : " + operands[i]); logger.debug("Operands variables " + i + " : " + Arrays.asList(operands[i].getVariables())); logger.debug("Operands types " + i + " : " + operands[i].getClass()); } operandBinding[i] = new long[operands[i].getVariables().length]; if (!operands[i].hasNoDuplicates()) { this.operandsContainDuplicates = true; } } fooOperand = new int[operands.length][]; fooColumn = new int[operands.length][]; operandOutputMap = new int[operands.length][]; // Calculate the variables present and their mappings from operand // columns to result columns List<Variable> variableList = new ArrayList<Variable>(); List<Integer> mapOperandList = new ArrayList<Integer>(); List<Integer> mapColumnList = new ArrayList<Integer>(); List<Integer> fooOperandList = new ArrayList<Integer>(); List<Integer> fooColumnList = new ArrayList<Integer>(); for (int i = 0; i < operands.length; i++) { fooOperandList.clear(); fooColumnList.clear(); Variable[] operandVariables = operands[i].getVariables(); operandOutputMap[i] = new int[operandVariables.length]; for (int j = 0; j < operandVariables.length; j++) { int k = variableList.indexOf(operandVariables[j]); if (k == -1) { mapOperandList.add(new Integer(i)); mapColumnList.add(new Integer(j)); fooOperandList.add(new Integer(PREFIX)); fooColumnList.add(new Integer(variableList.size())); variableList.add(operandVariables[j]); operandOutputMap[i][j] = j; } else { fooOperandList.add(mapOperandList.get(k)); fooColumnList.add(mapColumnList.get(k)); operandOutputMap[i][j] = k; } } // Convert per-operand lists into arrays assert fooOperandList.size() == fooColumnList.size(); fooOperand[i] = new int[fooOperandList.size()]; fooColumn[i] = new int[fooColumnList.size()]; for (int j = 0; j < fooOperand[i].length; j++) { fooOperand[i][j] = ((Integer) fooOperandList.get(j)).intValue(); fooColumn[i][j] = ((Integer) fooColumnList.get(j)).intValue(); } } // Convert column mappings from lists to arrays setVariables(variableList); mapOperand = new int[mapOperandList.size()]; mapColumn = new int[mapColumnList.size()]; for (int i = 0; i < mapOperand.length; i++) { mapOperand[i] = ((Integer) mapOperandList.get(i)).intValue(); mapColumn[i] = ((Integer) mapColumnList.get(i)).intValue(); } // Determine which columns are ever unbound columnEverUnbound = new boolean[variableList.size()]; columnOperandEverUnbound = new boolean[operands.length][variableList.size()]; Arrays.fill(columnEverUnbound, true); for (int i = 0; i < operands.length; i++) { Arrays.fill(columnOperandEverUnbound[i], false); Variable[] variables = operands[i].getVariables(); for (int j = 0; j < variables.length; j++) { if (!operands[i].isColumnEverUnbound(j)) { columnEverUnbound[getColumnIndex(variables[j])] = false; } else { columnOperandEverUnbound[i][getColumnIndex(variables[j])] = true; } } } buildVarGroups(); } /** * @return {@inheritDoc} This occurs if and only if every one of the * {@link #operands} is unconstrained. * @throws TuplesException {@inheritDoc} */ public boolean isUnconstrained() throws TuplesException { for (int i = 0; i < operands.length; i++) { if (!operands[i].isUnconstrained()) { return false; } } return true; } public List<Tuples> getOperands() { return Arrays.asList(operands); } public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException { if (prefix == null) { throw new IllegalArgumentException("Null \"prefix\" parameter"); } if (suffixTruncation != 0) { throw new TuplesException("Suffix truncation not implemented"); } assert operands != null; assert operandBinding != null; isBeforeFirst = true; isAfterLast = false; this.prefix = prefix; } // // Methods implementing Tuples // /** * @param column {@inheritDoc} * @return {@inheritDoc} * @throws TuplesException {@inheritDoc} */ public long getColumnValue(int column) throws TuplesException { if ((column < 0) || (column >= getNumberOfVariables())) { throw new TuplesException("Invalid column: " + column); } long result = operands[mapOperand[column]].getColumnValue(mapColumn[column]); if (result != Tuples.UNBOUND) { return result; } // Brute force search for a bound instance of variable in operands. // Note: No operands to the left of the mapOperand[column] contain desired variable. Variable desired = getVariables()[column]; for (int i = mapOperand[column] + 1; i < operands.length; i++) { Variable[] v = operands[i].getVariables(); for (int j = 0; j < v.length; j++) { if (v[j].equals(desired)) { result = operands[i].getColumnValue(j); if (result != Tuples.UNBOUND) { return result; } } } } return Tuples.UNBOUND; } /** * @return {@inheritDoc} This is estimated as the size of the Cartesian * product, by multiplying the row counts of all the {@link #operands}. * @throws TuplesException {@inheritDoc} */ public long getRowUpperBound() throws TuplesException { if (operands.length == 0) return 0; if (operands.length == 1) return operands[0].getRowUpperBound(); BigInteger rowCount = BigInteger.valueOf(operands[0].getRowUpperBound()); for (int i = 1; i < operands.length; i++) { rowCount = rowCount.multiply(BigInteger.valueOf(operands[i].getRowUpperBound())); if (rowCount.bitLength() > 63) return Long.MAX_VALUE; } return rowCount.longValue(); } /** * @return {@inheritDoc} This is estimated as the size of the minumum * of the row counts of all the {@link #operands}. * @throws TuplesException {@inheritDoc} */ public long getRowExpectedCount() throws TuplesException { if (operands.length == 0) return 0; if (operands.length == 1) return operands[0].getRowExpectedCount(); // simple joined group. Get the minimum as a guess. if (varGroups.size() == 1) { long result = operands[0].getRowExpectedCount(); for (int i = 1; i < operands.length; i++) { result = Math.min(result, operands[i].getRowExpectedCount()); } return result; } else { // cartesian product. Get the simple joins, and multiply. BigInteger rowCount = null; for (VarGroup vg: varGroups) { // calculate the size of this group List<Integer> ops = vg.getOps(); long groupResult = operands[ops.get(0)].getRowExpectedCount(); for (int i = 1; i < ops.size(); i++) { groupResult = Math.min(groupResult, operands[ops.get(i)].getRowExpectedCount()); } // merge the current group into the running total if (rowCount == null) rowCount = BigInteger.valueOf(groupResult); else rowCount = rowCount.multiply(BigInteger.valueOf(groupResult)); } return (rowCount == null) ? 0L : rowCount.longValue(); } } public boolean isEmpty() throws TuplesException { for (Tuples op : operands) { if (op.isEmpty()) return true; } return false; } public boolean isColumnEverUnbound(int column) throws TuplesException { try { return columnEverUnbound[column]; } catch (ArrayIndexOutOfBoundsException e) { throw new TuplesException("No such column " + column, e); } } /** * @return {@inheritDoc} * @throws TuplesException {@inheritDoc} */ public boolean next() throws TuplesException { // Validate parameters if (prefix == null) { throw new IllegalArgumentException("Null \"prefix\" parameter"); } // Short-circuit execution if this tuples' cursor is after the last row if (isAfterLast) { return false; } if (isBeforeFirst) { // Flag that we're no longer before the first row isBeforeFirst = false; // The first row has to be advanced from leftmost to rightmost operand in // order to initialize the leftward dependencies of the operand prefixes for (int i = 0; i < operands.length; i++) { updateOperandPrefix(i); operands[i].beforeFirst(operandBindingPrefix[i], 0); if (!advance(i)) { return false; } } return true; } else { // We know at this point that we're on a row satisfying the current // prefix. Advance the rightmost operand and let rollover do any // right-to-left advancement required boolean b = advance(operands.length - 1); assert b || isAfterLast; return b; } } public boolean hasNoDuplicates() { return operandsContainDuplicates == false; } /** * Closes all the {@link #operands}. * * @throws TuplesException if any of the {@link #operands} can't be closed */ public void close() throws TuplesException { close(operands); } /** * @return {@inheritDoc} */ public Object clone() { UnboundJoin cloned = (UnboundJoin)super.clone(); // Copy immutable fields by reference cloned.operandBinding = operandBinding; cloned.operandBindingPrefix = operandBindingPrefix; cloned.mapOperand = mapOperand; cloned.mapColumn = mapColumn; cloned.fooOperand = fooOperand; cloned.fooColumn = fooColumn; cloned.prefix = prefix; // Copy mutable fields by value cloned.operands = clone(operands); cloned.isBeforeFirst = isBeforeFirst; cloned.isAfterLast = isAfterLast; return cloned; } /** * Get the number of groups in this join, based on their shared variables. * This indicates the number of cartesian products required. * @return The number of variable groups discovered between the operands */ int getNrGroups() { return varGroups.size(); } // // Internal methods // /** * Calculate the correct value for one of the elements of {@link * #operandBinding} and its corresponding {@link #operandBindingPrefix}. This * method has no return value, only side-effects upon {@link #operandBinding} * and {@link #operandBindingPrefix}. * * @param i the index of the element in the {@link #operandBinding} array to * calculate * @throws TuplesException if the {@link #operands} can't be accessed */ private void updateOperandPrefix(int i) throws TuplesException { assert i >= 0; assert i < operandBinding.length; for (int j = 0; j < operandBinding[i].length; j++) { if (fooOperand[i][j] == PREFIX) { // Variable first bound to a next method parameter prefix column passed to beforeFirst. operandBinding[i][j] = (fooColumn[i][j] < prefix.length) ? prefix[fooColumn[i][j]] : Tuples.UNBOUND; } else { // Variable first bound to a leftward operand column operandBinding[i][j] = operands[fooOperand[i][j]].getColumnValue(fooColumn[i][j]); } } // Determine the length of the advancement prefix int prefixLength = 0; while ((prefixLength < operandBinding[i].length) && (operandBinding[i][prefixLength] != Tuples.UNBOUND) && (columnOperandEverUnbound[i][operandOutputMap[i][prefixLength]] == false)) { prefixLength++; } assert prefixLength >= 0; assert prefixLength <= operandBinding[i].length; // Generate the advancement prefix assert operandBindingPrefix != null; if ((operandBindingPrefix[i] == null) || (operandBindingPrefix[i].length != prefixLength)) { operandBindingPrefix[i] = new long[prefixLength]; } System.arraycopy(operandBinding[i], 0, operandBindingPrefix[i], 0, prefixLength); } /** * Advance one of the joined operands. * * @param i the index of the operand to advance * @return whether a row was found to satisfy * @throws TuplesException if the {@link #operands} can't be accessed */ private final boolean advance(int i) throws TuplesException { assert i >= 0; assert i < operands.length; assert!isAfterLast; B:while (true) { if (!operands[i].next()) { // Roll this column... if (i == 0) { isAfterLast = true; prefix = null; return false; } else { // roll the leftward row if (!advance(i - 1)) { return false; } // reset the current row updateOperandPrefix(i); operands[i].beforeFirst(operandBindingPrefix[i], 0); continue B; } } // Check that any suffix conditions are satisfied for (int j = operandBindingPrefix[i].length; j < operandBinding[i].length; j++) { if ((operandBinding[i][j] != Tuples.UNBOUND) && (operandBinding[i][j] != operands[i].getColumnValue(j)) && (operands[i].getColumnValue(j) != Tuples.UNBOUND)) { continue B; } } return true; } } /** * Creates the groupings of variables formed during this join. * An inner join will form if there is just one group. */ void buildVarGroups() { varGroups = new LinkedList<VarGroup>(); // go over all the operands G: for (int i = 0; i < operands.length; i++) { Variable[] vars = operands[i].getVariables(); // test if any group already matches this operand for (VarGroup v: varGroups) { if (v.joinsTo(vars)) { // found a match, so add it v.addOperand(i); // this may join in other groups, so test Iterator<VarGroup> vgi = varGroups.iterator(); while (vgi.hasNext()) { VarGroup ov = vgi.next(); // don't test if this group joins to itself if (ov == v) continue; if (v.joinsTo(ov)) { // groups join, so merge them v.merge(ov); vgi.remove(); } } // we've matched this operand in, so move to the next operand continue G; } } // no matches, so create a new group varGroups.add(new VarGroup(i)); } } /** * A class to record a group of variables and the operands they are associated with. */ class VarGroup { /** The variables for the group */ HashSet<Variable> variables = new HashSet<Variable>(); /** The operands this group's variables can be found in */ ArrayList<Integer> opList = new ArrayList<Integer>(); /** * Create a group, starting with a given operand. * @param opIndex The index of the operand to seed the group with. */ public VarGroup(int opIndex) { addOperand(opIndex); } /** * Adds a new operand's variables to the group. * @param i The index of the operand to add. */ public void addOperand(int i) { assert !opList.contains(operands[i]); opList.add(i); for (Variable v: operands[i].getVariables()) { variables.add(v); } } /** * Adds another group to this one, based on shared variables. * @param v The other variable group to merge. */ @SuppressWarnings("unchecked") public void merge(VarGroup v) { // check that some variables are shared assert ((HashSet<Variable>)variables.clone()).removeAll(v.variables); // check that no operands are shared assert !((ArrayList<Integer>)opList.clone()).removeAll(v.opList); variables.addAll(v.variables); opList.addAll(v.opList); } /** * Tests if this group joins to a given set of variables. * @param vars An array of variables to test. * @return <code>true</code> if the variables join to this group. */ public boolean joinsTo(Variable[] vars) { for (Variable v: vars) { if (variables.contains(v)) return true; } return false; } /** * Tests if this group joins to another group. * @param og The other group. * @return <code>true</code> if the groups share variables. */ @SuppressWarnings("unchecked") public boolean joinsTo(VarGroup og) { return ((HashSet<Variable>)variables.clone()).removeAll(og.variables); } /** * Get the list of operands for this group. * @return The operand list. */ public List<Integer> getOps() { return opList; } } }