/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.cursors.differences; import java.util.Comparator; import java.util.Iterator; import xxl.core.cursors.AbstractCursor; import xxl.core.cursors.Cursor; import xxl.core.cursors.Cursors; /** * A sort-based implementation of the difference operator * (<code>input1 - input2</code>). This operation can be performed in two * different ways, namely the first realization removes an element of * <code>input1</code> if the same element exists in <code>input2</code>. The * second way of processing removes all elements of <code>input1</code> that * match with an element of <code>input2</code>. This second approch implies * that no duplicates will be returned by the difference operator, whereas the * first solution may contain duplicates if the number of equal elements in * cursor <code>input1</code> is greater than that of <code>input2</code>. * * <p>The boolean flag <code>all</code> signals if all elements of cursor * <code>input1</code> that are equal to an element of <code>input2</code> will * be removed. In contrast to setting the flag to <code>false</code>, only one * element will be removed. So depending on this flag the result of the * difference operation can be a <i>set</i>, i.e., if <code>all</code> is * <code>true</code> all duplicates will be removed in the output of this * cursor, otherwise only one element is removed and the result may be a * <i>multi-set</i>, i.e., duplicates may occur in the output.</p> * * <p><b>Precondition:</b> The input cursors have to be sorted!</p> * * <p><b>Note:</b> If an input iteration is given by an object of the class * {@link Iterator}, i.e., it does not support the <code>peek</code> operation, * it is internally wrapped to a cursor.</p> * * <p><b>Example usage (1):</b> * <code><pre> * SortBasedDifference<Integer> difference = new SortBasedDifference<Integer>( * new Enumerator(21), * new Filter<Integer>( * new Enumerator(21), * new Predicate<Integer>() { * public boolean invoke(Integer next) { * return next % 2 == 0; * } * } * ), * ComparableComparator.INTEGER_COMPARATOR, * true, * true * ); * * difference.open(); * * while (difference.hasNext()) * System.out.println(difference.next()); * * difference.close(); * </pre></code> * This example shows how to remove all even numbers from a given enumerator * with range [0, 20]. A default instance of a default * {@link xxl.core.comparators.ComparableComparator#INTEGER_COMPARATOR comparator} * for integers is used to compare the elements of the two inputs. This kind of * comparator is also be chosen, if no comparator has been specified. The flag * <code>all</code>, which is set to <code>true</code> does not have any effect * in this case due to unique input elements.</p> * * <p><b>Example usage (2):</b> * <code><pre> * difference = new SortBasedDifference<Integer>( * new ArrayCursor<Integer>(1, 2, 2, 2, 3), * new ArrayCursor<Integer>(1, 2, 2, 3), * ComparableComparator.INTEGER_COMPARATOR, * false, * true * ); * * difference.open(); * * while (difference.hasNext()) * System.out.println(difference.next()); * * difference.close(); * </pre></code> * The first input cursor contains the elements {1, 2, 2, 2, 3}. The second * cursor, that is to be subtracted, delivers the elements {1, 2, 2, 3}. So, in * this case the flag <code>all</code> plays an important role. If it is * <code>false</code>, as shown above, the sort-based difference operator * delivers the element {2} as the only result. If it has been set to * <code>true</code> the sort-based difference operator returns no * elements.</p> * * @param <E> the type of the elements returned by this iteration. * @see java.util.Iterator * @see xxl.core.cursors.Cursor * @see java.util.Comparator * @see xxl.core.comparators.ComparableComparator * @see xxl.core.cursors.differences.NestedLoopsDifference * @see xxl.core.relational.cursors.SortBasedDifference */ public class SortBasedDifference<E> extends AbstractCursor<E> { /** * The first (or left) input cursor of the difference operator. */ protected Cursor<E> input1; /** * The second (or right) input cursor of the difference operator. */ protected Cursor<? extends E> input2; /** * The comparator used to compare the elements of the two input cursors. */ protected Comparator<? super E> comparator; /** * A flag signaling if all matches returned by the comparator should be * removed or only one element will be removed. So depending on this flag * the result of difference operation can be a set, i.e., if * <code>all</code> is <code>true</code> all duplicates will be removed in * the resulting cursor, otherwise only one is removed and the result may * be a multi-set, i.e., duplicates may occur in the resulting cursor. */ protected boolean all; /** * A flag showing if the input cursors have been sorted in ascending or * descending order. */ protected boolean asc; /** * Creates a new instance of the sort-based difference operator. Every * iterator given to this constructor is wrapped to a cursor. * * @param sortedInput1 the first input iterator where the elements have to * be subtracted from. * @param sortedInput2 the second input iterator containing the elements * that have to be subtracted. * @param comparator a comparator comparing the elements of the two input * cursors. * @param all a boolean flag signaling if all elements of cursor * <code>input1</code> that are equal to an element of * <code>input2</code> will be removed, otherwise only one element * is removed. * @param asc a flag showing if the input cursors have been sorted * ascending or descending. */ public SortBasedDifference(Iterator<E> sortedInput1, Iterator<? extends E> sortedInput2, Comparator<? super E> comparator, boolean all, boolean asc) { this.input1 = Cursors.wrap(sortedInput1); this.input2 = Cursors.wrap(sortedInput2); this.comparator = comparator; this.all = all; this.asc = asc; } /** * Opens the sort-based difference operator, i.e., signals the cursor to * reserve resources, open input iterations, etc. Before a cursor has been * opened calls to methods like <code>next</code> or <code>peek</code> are * not guaranteed to yield proper results. Therefore <code>open</code> must * be called before a cursor's data can be processed. Multiple calls to * <code>open</code> do not have any effect, i.e., if <code>open</code> was * called the cursor remains in the state <i>opened</i> until its * <code>close</code> method is called. * * <p>Note, that a call to the <code>open</code> method of a closed cursor * usually does not open it again because of the fact that its state * generally cannot be restored when resources are released respectively * files are closed.</p> */ public void open() { if (isOpened) return; super.open(); input1.open(); input2.open(); } /** * Closes the sort-based difference operator, i.e., signals the cursor to * clean up resources and close its input cursors. When a cursor has been * closed calls to methods like <code>next</code> or <code>peek</code> are * not guaranteed to yield proper results. Multiple calls to * <code>close</code> do not have any effect, i.e., if <code>close</code> * was called the cursor remains in the state <i>closed</i>. * * <p>Note, that a closed cursor usually cannot be opened again because of * the fact that its state generally cannot be restored when resources are * released respectively files are closed.</p> */ public void close() { if (isClosed) return; super.close(); input1.close(); input2.close(); } /** * Returns <code>true</code> if the iteration has more elements. (In other * words, returns <code>true</code> if <code>next</code> or * <code>peek</code> would return an element rather than throwing an * exception.) * * <p>If <code>input2</code> has no further elements all remaining elements * of <code>input1</code> can be returned. If the input cursors are sorted * ascending, also the elements of <code>input1</code> are returned, if * they are smaller than the next element of <code>input2</code>. If the * next element of <code>input1</code> and <code>input2</code> are equal, * i.e., the given comparator returns 0, when comparing them, this element * will not be returned. Depending on the flag <code>all</code>, all * elements that are equal to the skipped element are skipped, too. So, if * <code>all</code> is <code>true</code> the resulting cursor contains no * duplicates. If the next element of <code>input2</code> is larger than * the next one of <code>input1</code>, <code>input2</code> is consumed as * long as this condition is fulfilled. If the input cursors are sorted * descending the conditions explained above are negated and the * computation runs the same way. * * @return <code>true</code> if the sort-based difference operator has more * elements. */ protected boolean hasNextObject() { boolean exit; do { exit = true; if (!input2.hasNext()) if (input1.hasNext()) { next = input1.next(); return true; } else return false; else if (input1.hasNext()) { int res = comparator.compare(input1.peek(), input2.peek()); if ((asc && res < 0) || (!asc && res > 0)) { next = input1.next(); return true; } else if (res == 0) { input1.next(); if (all) // remove duplicates while(input1.hasNext() && comparator.compare(input1.peek(), input2.peek()) == 0) input1.next(); else input2.next(); exit = false; } else { // (asc && res > 0) || (!asc && res < 0) input2.next(); while(input2.hasNext() && ((asc && comparator.compare(input1.peek(), input2.peek()) > 0) || (!asc && comparator.compare(input1.peek(), input2.peek()) < 0))) input2.next(); exit = false; } } else return false; } while (!exit); return false; } /** * Returns the next element in the iteration. This element will be removed * from the iteration, if <code>next</code> is called. * * @return the next element in the iteration. */ protected E nextObject() { return next; } /** * Removes from the underlying data structure the last element returned by * the sort-based difference operator (optional operation). This method can * be called only once per call to <code>next</code> or <code>peek</code> * and removes the element returned by this method. Note, that between a * call to <code>next</code> and <code>remove</code> the invocation of * <code>peek</code> or <code>hasNext</code> is forbidden. The behaviour of * a cursor is unspecified if the underlying data structure is modified * while the iteration is in progress in any way other than by calling this * method. * * @throws IllegalStateException if the <code>next</code> or * <code>peek</code> method has not yet been called, or the * <code>remove</code> method has already been called after the * last call to the <code>next</code> or <code>peek</code> method. * @throws UnsupportedOperationException if the <code>remove</code> * operation is not supported by the sort-based difference * operator. */ public void remove() throws IllegalStateException, UnsupportedOperationException { super.remove(); input1.remove(); } /** * Returns <code>true</code> if the <code>remove</code> operation is * supported by the sort-based difference operator. Otherwise it returns * <code>false</code>. * * @return <code>true</code> if the <code>remove</code> operation is * supported by the cursor, otherwise <code>false</code>. */ public boolean supportsRemove() { return input1.supportsRemove(); } /** * Replaces the object that was returned by the last call to * <code>next</code> or <code>peek</code> (optional operation). This * operation must not be called after a call to <code>hasNext</code>. It * should follow a call to <code>next</code> or <code>peek</code>. This * method should be called only once per call to <code>next</code> or * <code>peek</code>. The behaviour of a sort-based difference operator is * unspecified if the underlying data structure is modified while the * iteration is in progress in any way other than by calling this method. * * @param object the object that replaces the object returned by the last * call to <code>next</code> or <code>peek</code>. * @throws IllegalStateException if the <code>next</code> or * <code>peek</code> method has not yet been called, or the * <code>update</code> method has already been called after the * last call to the <code>next</code> or <code>peek</code> method. * @throws UnsupportedOperationException if the <code>update</code> * operation is not supported by the sort-based difference * operator. */ public void update(E object) throws IllegalStateException, UnsupportedOperationException { super.update(object); input1.update(object); } /** * Returns <code>true</code> if the <code>update</code> operation is * supported by the sort-based difference operator. Otherwise it returns * <code>false</code>. * * @return <code>true</code> if the <code>update</code> operation is * supported by the cursor, otherwise <code>false</code>. */ public boolean supportsUpdate() { return input1.supportsUpdate(); } /** * Resets the sort-based difference operator to its initial state (optional * operation). So the caller is able to traverse the underlying data * structure again. The modifications, removes and updates concerning the * underlying data structure, are still persistent. This method also resets * the input iterations. * * @throws UnsupportedOperationException if the <code>reset</code> * operation is not supported by the sort-based difference * operator. */ public void reset() throws UnsupportedOperationException { super.reset(); input1.reset(); input2.reset(); } /** * Returns <code>true</code> if the <code>reset</code> operation is * supported by the sort-based difference operator. Otherwise it returns * <code>false</code>. * * @return <code>true</code> if the <code>reset</code> operation is * supported by the cursor, otherwise <code>false</code>. */ public boolean supportsReset() { return input1.supportsReset() && input2.supportsReset(); } }