/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.xxql;
import java.util.Iterator;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.Cursors;
import xxl.core.functions.Function;
import xxl.core.predicates.Equal;
import xxl.core.predicates.Predicate;
/**
* A nested-loops implementation of the intersection operator. The nested-loops
* intersection operator is based on a loop iteration and therefore it has no
* special conditions with regard to the order of the elements contained in the
* two input iterations. The input iteration <code>input0</code> is traversed
* in the "outer" loop (only for one time) and the input iteration
* <code>input1</code> is repeatedly consumed in the "inner" loop (for a
* maximum of times determined by the elements of input iteration
* <code>input0</code>). An user defined predicate is used to decide whether
* two elements of the input iterations are equal concerning their values.
*
* <p><b>Note:</b> When the given input iteration only implements the interface
* {@link Iterator} it is wrapped to a cursor by a call to the static method
* {@link Cursors#wrap(Iterator) wrap}.</p>
*
* <p><b>Example usage (1):</b>
* <code><pre>
* NestedLoopsIntersection<Integer> intersection = new NestedLoopsIntersection<Integer>(
* Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10).iterator(),
* Arrays.asList(0, 2, 4, 6, 8, 10).iterator(),
* new Function<Object, Iterator<Integer>>() {
* public Iterator<Integer> invoke() {
* return Arrays.asList(0, 2, 4, 6, 8, 10).iterator();
* }
* }
* );
*
* intersection.open();
*
* Cursors.println(intersection);
*
* intersection.close();
* </pre></code>
* The input iteration for the intersection operation are given by two
* list-iterators. The the first one contains all integer elements of the
* interval [0, 10]. The second one delivers only the even elements of the same
* interval. So the intersection operator should return the same integer values
* as contained in the second input cursor. Because an iterator is not
* resetable, a function reseting the second input cursor has to be provided,
* because the second input cursor is traversed in the inner loop and has to be
* reseted as often as the first input iterator contains elements. The reseting
* function has to implement the parameterless <code>invoke</code> method and
* in this case it delivers a newlist iterator iterating over the second input
* list.</p>
*
* <p><b>Example usage (2):</b>
* <code><pre>
* intersection = new NestedLoopsIntersection<Integer>(
* Arrays.asList(2, 2).iterator(),
* Arrays.asList(2, 2, 2).iterator(),
* new Function<Object, Iterator<Integer>>() {
* public Iterator<Integer> invoke() {
* return Arrays.asList(2, 2, 2).iterator();
* }
* }
* );
*
* intersection.open();
*
* Cursors.println(intersection);
*
* intersection.close();
* </pre></code>
* The second example usage computes the intersection of the input iterations
* {2,2} and {2,2,2}. The intersection is created in the same way as in the
* previous example. Conforming with the correct duplicate handling the result
* will be {2,2}.</p>
*
* @param <E> the type of the elements returned by this intersection.
* @see java.util.Iterator
* @see xxl.core.cursors.Cursor
* @see xxl.core.cursors.AbstractCursor
* @see xxl.core.cursors.intersections.SortBasedIntersection
* @see xxl.core.relational.cursors.NestedLoopsIntersection
*/
public class Intersection<E> extends AbstractCursor<E> {
/**
* The first (or "outer") input iteration of the intersection operator.
*/
protected Cursor<? extends E> input0;
/**
* The second (or "inner") input iteration of the intersection operator.
*/
protected Cursor<? extends E> input1;
/**
* A parameterless that resets the "inner" loop (the iteration
* <code>input1</code>). Such a function must be specified, if the "inner"
* iteration is not resetable, i.e., the <code>reset</code> method of
* <code>input1</code> will cause an
* {@link java.lang.UnsupportedOperationException}. A call to the
* <code>invoke</code> method of this function must deliver the "inner"
* iteration again, if it has to be traversed for an other time.
*/
protected Function<?, ? extends Iterator<? extends E>> resetInput1;
/**
* A binary predicate that selects the matching tuples, i.e., the tuples
* that will be returned when the <code>next</code> method is called.
* Therfore the predicate is evaluated for an element of each input
* iteration. Only the tuples, where the predicate's evaluation result is
* <code>true</code>, have been qualified to be a result of the
* intersection operation.
*/
protected Predicate<? super E> equals;
/**
* A boolean flag indicating whether the second input iteration should be
* reseted next time.
*/
protected boolean reset = false;
/**
* A bit set storing for every element in the "inner" loop (the iteration
* <code>input1</code>) a single bit. The <code>n</code>-th bit of the bit
* set is set to <code>1</code> if the <code>n</code>-th element of the
* "inner" loop has found a matching element in the "outer" loop.
*/
//protected BitSet bitVector = null;
/**
* The position of the "inner" loop, i.e., if <code>position</code> is set
* to <code>n</code>, the <code>n</code>-th element of the iteration
* <code>input1</code> is actually tested for a matching.
*/
protected int position = 0;
private E cur0;
private boolean nextComputed;
/**
* Creates a new nested-loops intersection backed on two iterations using a
* user defined predicate to decide whether two tuples match. This
* constructor also supports the handling of a non-resetable input
* iteration <code>input1</code>, because a parameterless function can be
* defined that returns this input iteration again.
*
* @param input0 the first input iteration that is traversed in the "outer"
* loop.
* @param input1 the second input iteration that is traversed in the
* "inner" loop.
* @param resetInput1 a parameterless function that delivers the second
* input iteration again, when it cannot be reseted, i.e., the
* <code>reset</code> method of <code>input1</code> will cause a
* {@link java.lang.UnsupportedOperationException}. If the second
* input iteration supports the <code>reset</code> operation this
* argument can be set to <code>null</code>.
* @param equals the binary predicate evaluated for each tuple of elements
* backed on one element of each input iteration in order to select
* them. Only these tuples where the predicate's evaluation result
* is <code>true</code> have been qualified to be a result of the
* intersection operation.
*/
public Intersection(Iterator<? extends E> input0, Iterator<? extends E> input1, Function<?, ? extends Iterator<? extends E>> resetInput1, Predicate<? super E> equals) {
this.input0 = Cursors.wrap(input0);
this.input1 = Cursors.wrap(input1);
this.resetInput1 = resetInput1;
this.equals = equals;
// int counter = 0;
// for (; input1.hasNext(); counter++)
// input1.next();
// bitVector = new BitSet(counter);
// resetInput1();
}
/**
* Creates a new nested-loops intersection backed on two iterations using a
* default {@link Equal equality} predicate to decide whether two tuples
* match. This constructor also supports the handling of a non-resetable
* input iteration <code>input1</code>, because a parameterless function
* can be defined that returns this input iteration again.
*
* @param input0 the first input iteration that is traversed in the "outer"
* loop.
* @param input1 the second input iteration that is traversed in the
* "inner" loop.
* @param resetInput1 a parameterless function that delivers the second
* input iteration again, when it cannot be reseted, i.e., the
* <code>reset</code> method of <code>input1</code> will cause a
* {@link java.lang.UnsupportedOperationException}. If the second
* input iteration supports the <code>reset</code> operation this
* argument can be set to <code>null</code>.
*/
public Intersection(Iterator<? extends E> input0, Iterator<? extends E> input1, Function<?, ? extends Iterator<? extends E>> resetInput1) {
this(input0, input1, resetInput1, Equal.DEFAULT_INSTANCE);
}
/**
* Creates a new nested-loops intersection backed on two iterations using a
* user defined predicate to decide whether two tuples match. This
* constructor does not support the handling of a non-resetable input
* iteration <code>input1</code>, so the <code>reset</code> operation must
* be guaranteed.
*
* @param input0 the first input iteration that is traversed in the "outer"
* loop.
* @param input1 the second input iteration that is traversed in the
* "inner" loop.
* @param equals the binary predicate evaluated for each tuple of elements
* backed on one element of each input iteration in order to select
* them. Only these tuples where the predicate's evaluation result
* is <code>true</code> have been qualified to be a result of the
* intersection operation.
*/
public Intersection(Iterator<? extends E> input0, Cursor<? extends E> input1, Predicate<? super E> equals) {
this(input0, input1, null, equals);
}
/**
* Creates a new nested-loops intersection backed on two iterations using a
* default {@link Equal equality} predicate to decide whether two tuples
* match. This constructor does not support the handling of a non-resetable
* input iteration <code>input1</code>, so the <code>reset</code> operation
* must be guaranteed.
*
* @param input0 the first input iteration that is traversed in the "outer"
* loop.
* @param input1 the second input iteration that is traversed in the
* "inner" loop.
*/
public Intersection(Iterator<? extends E> input0, Cursor<? extends E> input1) {
this(input0, input1, null, Equal.DEFAULT_INSTANCE);
}
/**
* Resets the "inner" loop (the iteration <code>input1</code>) of the
* nested-loops intersection operator. If the function
* <code>resetInput1</code> is specified, it is invoked, else the
* <code>reset</code> method of <code>input1</code> is called.
*/
private void resetInput1() {
if (resetInput1 != null)
input1 = Cursors.wrap(resetInput1.invoke());
else
input1.reset();
position = 0;
}
/**
* Opens the intersection operator, i.e., signals the cursor to reserve
* resources, open the input iteration, etc. Before a cursor has been
* opened calls to methods like <code>next</code> or <code>peek</code> are
* not guaranteed to yield proper results. Therefore <code>open</code> must
* be called before a cursor's data can be processed. Multiple calls to
* <code>open</code> do not have any effect, i.e., if <code>open</code> was
* called the cursor remains in the state <i>opened</i> until its
* <code>close</code> method is called.
*
* <p>Note, that a call to the <code>open</code> method of a closed cursor
* usually does not open it again because of the fact that its state
* generally cannot be restored when resources are released respectively
* files are closed.</p>
*/
public void open() {
if (isOpened)
return;
super.open();
input0.open();
input1.open();
}
/**
* Closes the intersection operator, i.e., signals the cursor to clean up
* resources, close the input iterations, etc. When a cursor has been
* closed calls to methods like <code>next</code> or <code>peek</code> are
* not guaranteed to yield proper results. Multiple calls to
* <code>close</code> do not have any effect, i.e., if <code>close</code>
* was called the cursor remains in the state <i>closed</i>.
*
* <p>Note, that a closed cursor usually cannot be opened again because of
* the fact that its state generally cannot be restored when resources are
* released respectively files are closed.</p>
*/
public void close() {
if (isClosed)
return;
super.close();
input0.close();
input1.close();
}
/**
* Returns <code>true</code> if the iteration has more elements. (In other
* words, returns <code>true</code> if <code>next</code> or
* <code>peek</code> would return an element rather than throwing an
* exception.)
*
* <p>The implementation of this method is as follows:
* <pre>
* E next0;
* while (input0.hasNext()) {
* next0 = input0.next();
* if (reset)
* resetInput1();
* while (input1.hasNext()) {
* if (equals.invoke(next0, input1.next()) && !bitVector.get(position)) {
* next = next0;
* return true;
* }
* position++;
* }
* reset = true;
* }
* return false;
* </pre>
* The complete second input iteration is checked in the inner loop against
* each object of the first input iteration for matches.</p>
*
* @return <code>true</code> if the intersection operator has more
* elements.
*/
protected boolean hasNextObject() {
if(nextComputed){
return true;
}else{
lookupNext();
return nextComputed;
}
}
private void lookupNext(){
E next0;
while (input0.hasNext()) {
next0 = input0.next();
if (reset)
resetInput1();
while (input1.hasNext()) {
if (equals.invoke(next0, input1.next())) {
next = next0;
nextComputed = true;
reset = true;
return;
}
}
}
nextComputed = false;
//return false;
}
/**
* Returns the next element in the iteration. This element will be
* accessible by some of the intersection operator's methods, e.g.,
* <code>update</code> or <code>remove</code>, until a call to
* <code>next</code> or <code>peek</code> occurs. This is calling
* <code>next</code> or <code>peek</code> proceeds the iteration and
* therefore its previous element will not be accessible any more.
*
* @return the next element in the iteration.
*/
protected E nextObject() {
if(nextComputed){
nextComputed = false;
return next;
}else{
lookupNext();
nextComputed = false;
return next;
}
}
/**
* Resets the intersection operator to its initial state such that the
* caller is able to traverse the join again without constructing a new
* intersection operator (optional operation).
*
* <p>Note, that this operation is optional and might not work for all
* cursors.</p>
*
* @throws UnsupportedOperationException if the <code>reset</code>
* operation is not supported by the intersection operator.
*/
public void reset() throws UnsupportedOperationException {
super.reset();
input0.reset();
resetInput1();
reset = false;
}
/**
* Returns <code>true</code> if the <code>reset</code> operation is
* supported by the intersection operator. Otherwise it returns
* <code>false</code>.
*
* @return <code>true</code> if the <code>reset</code> operation is
* supported by the intersection operator, otherwise <code>false</code>.
*/
public boolean supportsReset() {
return input0.supportsReset();
}
}