/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.cursors.intersections;
import java.util.Comparator;
import java.util.Iterator;
import xxl.core.collections.sweepAreas.SortMergeEquiJoinSA;
import xxl.core.collections.sweepAreas.SweepAreaImplementor;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.Cursors;
import xxl.core.predicates.Equal;
import xxl.core.predicates.Predicate;
/**
* A sort-based implementation of the intersection operator. The sort-based
* intersection operator is based on a step-by-step processing of the two input
* iterations in consideration of their sort-order. The sweep-line status
* structure, here called sweep-area, consists of a bag with an additional
* method for reorganisation and is used to store the elements of the first
* input iteration. When an element of the second input iteration is processed,
* it is used to query the sweep-area for matching elements that can be
* returned as result of the intersection operator. Therefor an user defined
* predicate is used to decide whether two elements of the input iterations are
* equal concerning their values.
*
* <p><b>Precondition:</b> The input cursors have to be sorted!</p>
*
* <p><b>Note:</b> When the given input iteration only implements the interface
* {@link Iterator} it is wrapped to a cursor by a call to the static method
* {@link Cursors#wrap(Iterator) wrap}.</p>
*
* <p><b>Example usage (1):</b>
* <code><pre>
* SortBasedIntersection<Integer> intersection = new SortBasedIntersection<Integer>(
* Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10).iterator(),
* Arrays.asList(0, 2, 4, 6, 8, 10).iterator(),
* new ListSAImplementor<Integer>(),
* ComparableComparator.INTEGER_COMPARATOR
* );
*
* intersection.open();
*
* Cursors.println(intersection);
*
* intersection.close();
* </pre></code>
* The input iteration for the intersection operation are given by two
* list-iterators. The the first one contains all integer elements of the
* interval [0, 10]. The second one delivers only the even elements of the same
* interval. So the intersection operator should return the same integer values
* as contained in the second input cursor.</p>
*
* <p><b>Example usage (2):</b>
* <code><pre>
* intersection = new SortBasedIntersection<Integer>(
* Arrays.asList(1, 2, 2).iterator(),
* Arrays.asList(2, 2, 2).iterator(),
* new ListSAImplementor<Integer>(),
* ComparableComparator.INTEGER_COMPARATOR
* );
*
* intersection.open();
*
* Cursors.println(intersection);
*
* intersection.close();
* </pre></code>
* The second example usage computes the intersection of the input iterations
* {1,2,2} and {2,2,2}. The intersection is created in the same way as in the
* previous example. Conforming with the correct duplicate handling the result
* will be {2,2}.</p>
*
* @param <E> the type of the elements returned by this iteration.
* @see java.util.Iterator
* @see xxl.core.cursors.Cursor
* @see xxl.core.cursors.intersections.NestedLoopsIntersection
* @see xxl.core.relational.cursors.SortBasedIntersection
* @see xxl.core.collections.sweepAreas.SweepArea
*/
public class SortBasedIntersection<E> extends AbstractCursor<E> {
/**
* The two sorted input iteration of the sort-based intersection operator.
*/
@SuppressWarnings("unchecked") // only object of type Cursor<? extends E> are stored inside the array
protected Cursor<? extends E>[] inputs = new Cursor[2];
/**
* The sweep-area that is used for storing the elements of the first input
* iteration (<code>inputs[0]</code>) and that is probed with elements of
* the second input iteration (<code>inputs[1]</code>).
*/
protected SortMergeEquiJoinSA<E> sweepArea;
/**
* The comparator used to compare the elements of the two input iterations.
*/
protected Comparator<? super E> comparator;
/**
* A binary predicate evaluated for each tuple of elements backed on one
* element of each input iteration in order to select them. Only these
* tuples where the predicate's evaluation result is <code>true</code> have
* been qualified to be a result of the intersection operation.
*/
protected Predicate<? super E> equals;
/**
* Creates a new sort-based intersection operator backed on two sorted
* input iterations using the given sweep-area to store the elements of the
* first input iteration and probe with the elements of the second one for
* matchings.
*
* <p><b>Precondition:</b> The input iterations have to be sorted!</p>
*
* <p>The given binary predicate to decide whether two tuples match will be
* used along with the sweep-area implementor to create a new sweep-area.
* Every iterator given to this constructor is wrapped to a cursor.</p>
*
* @param sortedInput0 the first sorted input iteration to be intersected.
* @param sortedInput1 the second sorted input iteration to be intersected.
* @param impl the sweep-area implementor used for storing elements of the
* first sorted input iteration (<code>sortedInput0</code>).
* @param comparator the comparator that is used for comparing elements of
* the two sorted input iterations.
* @param equals the binary predicate evaluated for each tuple of elements
* backed on one element of each input iteration in order to select
* them. Only these tuples where the predicate's evaluation result
* is <code>true</code> have been qualified to be a result of the
* intersection operation.
*/
public SortBasedIntersection(Iterator<? extends E> sortedInput0, Iterator<? extends E> sortedInput1, SweepAreaImplementor<E> impl, Comparator<? super E> comparator, Predicate<? super E> equals) {
this.inputs[0] = Cursors.wrap(sortedInput0);
this.inputs[1] = Cursors.wrap(sortedInput1);
this.sweepArea = new SortMergeEquiJoinSA<E>(impl, 0, 2, equals);
this.comparator = comparator;
this.equals = equals;
}
/**
* Creates a new sort-based intersection operator backed on two sorted
* input iterations using the given sweep-area to store the elements of the
* first input iteration and probe with the elements of the second one for
* matchings.
*
* <p><b>Precondition:</b> The input iterations have to be sorted!</p>
*
* <p>A default {@link xxl.core.predicates.Equal equality} predicate to
* decide whether two tuples match will be used along with the given
* sweep-area implementor to create a new sweep-area. Every iterator given
* to this constructor is wrapped to a cursor.</p>
*
* @param sortedInput0 the first sorted input iteration to be intersected.
* @param sortedInput1 the second sorted input iteration to be intersected.
* @param impl the sweep-area implementor used for storing elements of the
* first sorted input iteration (<code>sortedInput0</code>).
* @param comparator the comparator that is used for comparing elements of
* the two sorted input iterations.
*/
public SortBasedIntersection(Iterator<? extends E> sortedInput0, Iterator<? extends E> sortedInput1, SweepAreaImplementor<E> impl, Comparator<? super E> comparator) {
this(sortedInput0, sortedInput1, impl, comparator, Equal.DEFAULT_INSTANCE);
}
/**
* Opens the cursor, i.e., signals the cursor to reserve resources, open
* files, etc. Before a cursor has been opened calls to methods like
* <code>next</code> or <code>peek</code> are not guaranteed to yield
* proper results. Therefore <code>open</code> must be called before a
* cursor's data can be processed. Multiple calls to <code>open</code> do
* not have any effect, i.e., if <code>open</code> was called the cursor
* remains in the state <i>opened</i> until its <code>close</code> method
* is called.
*
* <p>Note, that a call to the <code>open</code> method of a closed cursor
* usually does not open it again because of the fact that its state
* generally cannot be restored when resources are released respectively
* files are closed.</p>
*/
public void open() {
if (isOpened)
return;
super.open();
inputs[0].open();
inputs[1].open();
}
/**
* Closes the cursor, i.e., signals the cursor to clean up resources, close
* files, etc. When a cursor has been closed calls to methods like
* <code>next</code> or <code>peek</code> are not guaranteed to yield
* proper results. Multiple calls to <code>close</code> do not have any
* effect, i.e., if <code>close</code> was called the cursor remains in the
* state <i>closed</i>.
*
* <p>Note, that a closed cursor usually cannot be opened again because of
* the fact that its state generally cannot be restored when resources are
* released respectively files are closed.</p>
*/
public void close() {
if (isClosed)
return;
super.close();
inputs[0].close();
inputs[1].close();
sweepArea.close();
}
/**
* Returns <code>true</code> if the iteration has more elements. (In other
* words, returns <code>true</code> if <code>next</code> or
* <code>peek</code> would return an element rather than throwing an
* exception.)
*
* <p>The implementation of this method is as follows:
* <code><pre>
* while (inputs[0].hasNext() || inputs[1].hasNext()) {
* int j = !inputs[0].hasNext() ?
* 1 :
* !inputs[1].hasNext() ?
* 0 :
* comparator.compare(inputs[0].peek(), inputs[1].peek()) <= 0 ?
* 0 :
* 1;
* E queryObject = inputs[j].next();
* sweepArea.reorganize(queryObject, j);
* if (j == 0)
* sweepArea.insert(queryObject);
* else {
* Iterator<? extends E> results = sweepArea.query(queryObject, j);
* if (results.hasNext()) {
* next = results.next();
* results.remove();
* return true;
* }
* }
* }
* return false;
* </pre></code>
* The int value <code>j</code> holds the index of the input iteration that
* delivers the next object to be proceeded (according to the sort-order of
* the input iterations) and <code>queryObject</code> stores this object.
* Thereafter the sweep-area is reorganized in order to remove object that
* cannot find a match in the second input iteration any more. Finally
* <code>queryObject</code> will be inserted into the sweep-area, if it
* comes from the first input iteration, or it will be used to query the
* sweep-area for matches, if it comes from the second one.</p>
*
* @return <code>true</code> if the intersection operator has more
* elements.
*/
protected boolean hasNextObject() {
while (inputs[0].hasNext() || inputs[1].hasNext()) {
int j = !inputs[0].hasNext() ?
1 :
!inputs[1].hasNext() ?
0 :
comparator.compare(inputs[0].peek(), inputs[1].peek()) <= 0 ?
0 :
1;
E queryObject = inputs[j].next();
sweepArea.reorganize(queryObject, j);
if (j == 0)
sweepArea.insert(queryObject);
else {
Iterator<? extends E> results = sweepArea.query(queryObject, j);
if (results.hasNext()) {
next = results.next();
results.remove();
return true;
}
}
}
return false;
}
/**
* Returns the next element in the iteration. This element will be
* accessible by some of the cursor's methods, e.g., <code>update</code> or
* <code>remove</code>, until a call to <code>next</code> or
* <code>peek</code> occurs. This is calling <code>next</code> or
* <code>peek</code> proceeds the iteration and therefore its previous
* element will not be accessible any more.
*
* @return the next element in the iteration.
*/
protected E nextObject() {
return next;
}
/**
* Resets the cursor to its initial state such that the caller is able to
* traverse the underlying data structure again without constructing a new
* cursor (optional operation). The modifications, removes and updates
* concerning the underlying data structure, are still persistent.
*
* <p>Note, that this operation is optional and does not work for this
* cursor.</p>
*
* @throws UnsupportedOperationException if the <code>reset</code>
* operation is not supported by the cursor.
*/
public void reset() throws UnsupportedOperationException {
super.reset();
inputs[0].reset();
inputs[1].reset();
sweepArea.clear();
}
/**
* Returns <code>true</code> if the <code>reset</code> operation is
* supported by the cursor. Otherwise it returns <code>false</code>.
*
* <p>The current implementation of this method is as follows:
* <code><pre>
* public boolean supportsReset() {
* return false;
* }
* </pre></code></p>
*
* @return <code>true</code> if the <code>reset</code> operation is
* supported by the cursor, otherwise <code>false</code>.
*/
public boolean supportsReset() {
return inputs[0].supportsReset() && inputs[1].supportsReset();
}
}