/*
* Copyright 2008 Fedora Commons
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mulgara.store.tuples;
// Java 2 standard packages
import java.util.*;
// Third party packages
import org.apache.log4j.*;
// Locally written packages
import org.mulgara.query.Constraint;
import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
import org.mulgara.store.tuples.AbstractTuples;
/**
* Difference operation.
*
* This difference is defined as all the rows in the minuend which are not matched
* by rows in the subtrahend. Matching is defined by rows where each pair of shared
* variables is bound to the same pair of values.
*
* The join is performed by iterating over the minuend, and searching on the
* subtrahend for matching rows. For efficient searching, the subtrahend must be
* ordered according to the matching variables. This class is not responsible for
* ensuring the sort order of the subtrahend; that responsibility falls to
* {@link TuplesOperations#subtract}.
*
* @created March, 2005
* @author Paula Gearon
* @licence <a href="{@docRoot}/../LICENCE.txt">Apache License, Version 2.0</a>
*/
public class Difference extends AbstractTuples {
@SuppressWarnings("unused")
private static final Logger logger = Logger.getLogger(Difference.class.getName());
/** The set of tuples to subtract from. */
protected Tuples minuend;
/** The set of tuples to remove from the subtrahend. */
protected Tuples subtrahend;
/** The set of variables common to both the minuend and the subtrahend. */
protected Set<Variable> commonVars;
/** An array of the matching variables' columns within the minuend, indexed by the subtrahend position. */
protected int[] varMap;
/**
* Configure a subtraction operation for lazy evaluation.
*
* @param minuend The original tuples, including the rows to be removed.
* @param subtrahend The tuples defining the rows to be removed from the minuend.
* @throws IllegalArgumentException If the <var>minuend</var> and <var>subtrahend</var>
* contain no variables in common.
*/
Difference(Tuples minuend, Tuples subtrahend) throws TuplesException, IllegalArgumentException {
// store the operands
this.minuend = (Tuples)minuend.clone();
this.subtrahend = (Tuples)subtrahend.clone();
// get the variables to subtract with. TODO: get the correct type back from getMatchingVars
commonVars = Collections.unmodifiableSet((Set<Variable>)TuplesOperations.getMatchingVars(minuend, subtrahend));
if (commonVars.isEmpty()) {
throw new IllegalArgumentException("tuples must have variables in common for subtraction to occur");
}
// initialise the mapping of minuend columns to subtrahend columns
varMap = new int[commonVars.size()];
// iterate over the variables to do the mapping
for (Variable var: commonVars) {
// get the index of the variable in the subtrahend
int si = subtrahend.getColumnIndex(var);
// check that it is within the prefix columns. If not, then the subtrahend is not properly sorted.
if (si >= varMap.length) {
String op = "common= " + commonVars.toString();
op += "; var= " + var + "; index in sub= " + si +"; subtrahend= [ ";
Variable[] v = subtrahend.getVariables();
for (int k = 0; k < v.length; k++) {
op += v[k] + " ";
}
op += "]";
// usually this would be an assertion, but it's too important to miss
throw new IllegalArgumentException("Subtracted tuples not sorted correctly: " + op);
}
// map the subtrahend index of the variable to the minuend index
varMap[si] = minuend.getColumnIndex(var);
}
}
//
// Methods implementing Tuples
//
/**
* @param column {@inheritDoc}
* @return {@inheritDoc}
* @throws TuplesException {@inheritDoc}
*/
public long getColumnValue(int column) throws TuplesException {
return minuend.getColumnValue(column);
}
/**
* @return {@inheritDoc} This is estimated as the size of the minuend,
* though it will probably be smaller.
* @throws TuplesException {@inheritDoc}
*/
public long getRowUpperBound() throws TuplesException {
return minuend.getRowUpperBound();
}
/**
* This is a factor that we can expect the subtrahend to match on the minuend.
* 1.0 indicates that the subtrahend is a subset of the minuend. 0.0 indicates
* there is no match at all.
* TODO: update this value statistically, rather than using a constant value.
*/
private static final double MATCH_RATIO = 0.25;
/**
* @return {@inheritDoc} This is estimated as the size of the minuend,
* though it will probably be smaller.
* @throws TuplesException {@inheritDoc}
*/
public long getRowExpectedCount() throws TuplesException {
long minCount = minuend.getRowExpectedCount();
long subCount = subtrahend.getRowExpectedCount();
long guess = minCount - (long)(MATCH_RATIO * subCount);
// if the guess is large enough (by some fudge factor), then we'll use it
if (guess > minCount / 2) return guess;
return (long)(minCount * MATCH_RATIO);
}
/**
* {@inheritDoc} Relies on the minuend of the difference.
*/
public boolean isColumnEverUnbound(int column) throws TuplesException {
return minuend.isColumnEverUnbound(column);
}
/**
* {@inheritDoc}
*/
public Variable[] getVariables() {
return minuend.getVariables();
}
/**
* {@inheritDoc}
*/
public int getColumnIndex(Variable variable) throws TuplesException {
return minuend.getColumnIndex(variable);
}
/**
* {@inheritDoc}
* @return Always <code>false</code>.
*/
public boolean isMaterialized() {
return false;
}
/**
* {@inheritDoc}
*/
public boolean isEmpty() throws TuplesException {
return minuend.isEmpty();
}
/**
* {@inheritDoc}
*/
public boolean hasNoDuplicates() throws TuplesException {
return minuend.hasNoDuplicates();
}
/**
* {@inheritDoc}
*/
public RowComparator getComparator() {
return minuend.getComparator();
}
/**
* {@inheritDoc}
*/
public List<Tuples> getOperands() {
return Collections.unmodifiableList(Arrays.asList(new Tuples[] {minuend, subtrahend}));
}
/**
* {@inheritDoc}
*/
public boolean isUnconstrained() throws TuplesException {
return minuend.isUnconstrained();
}
/**
* {@inheritDoc}
*/
public void renameVariables(Constraint constraint) {
minuend.renameVariables(constraint);
}
/**
* {@inheritDoc}
*/
public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
minuend.beforeFirst(prefix, suffixTruncation);
}
/**
* @return {@inheritDoc}
* @throws TuplesException {@inheritDoc}
*/
public boolean next() throws TuplesException {
do {
// move to the next on the minuend
boolean currentNext = minuend.next();
// Short-circuit execution if this tuples' cursor is after the last row
if (!currentNext) {
return false;
}
// check if the subtrahend matches the current row on the minuend
} while (findMatch());
return true;
}
/**
* Closes all the operands.
*
* @throws TuplesException If either the minuend or the subtrahend can't be closed.
*/
public void close() throws TuplesException {
minuend.close();
subtrahend.close();
}
/**
* @return {@inheritDoc}
*/
public Object clone() {
Difference cloned = (Difference)super.clone();
// Copy mutable fields by value
cloned.minuend = (Tuples)minuend.clone();
cloned.subtrahend = (Tuples)subtrahend.clone();
return cloned;
}
//
// Internal methods
//
/**
* Searches for an entry in the subtrahend that matches the current row in the minuend.
*
* @return <code>true</code> if there is a row in the subtrahend that matches the minuend
* for all the variables. <code>false</code> otherwise.
*/
private boolean findMatch() throws TuplesException {
long[] prefix = new long[varMap.length];
// copy the variables from the current minuend row into the prefix
for (int i = 0; i < varMap.length; i++) {
prefix[i] = minuend.getColumnValue(varMap[i]);
}
// find the entry in the subtrahend
subtrahend.beforeFirst(prefix, 0);
// return true if the search found anything
return subtrahend.next();
}
}