/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.cursors.mappers; import java.util.Iterator; import java.util.NoSuchElementException; import xxl.core.cursors.AbstractCursor; import xxl.core.cursors.Cursor; import xxl.core.cursors.Cursors; import xxl.core.math.functions.AggregationFunction; /** * The aggregator incrementally computes one or even more aggregates for an * input iteration. Due to the fact that an aggregator has to be initialized, * the user defined aggregation-function has to handle the case that the * aggregate is <code>null</code>. If the initialization of the aggregate is * finished, in the next step the aggregate-function is applied on the * aggregate and the input iteration's (a given input iterator is internally * wrapped to a cursor} <i>peek</i>-element, i.e., the element returned by the * iteration's <code>peek</code> method. In order to indicate that the * aggregation-function has not yet become initialized, <code>null</code> is * returned. The following code fragment shows this behaviour: * <code><pre> * aggregate = function.invoke(aggregate, input.peek()); * if (aggregate != null) * initialized = true; * return aggregate; * </pre></code> * If the aggregate has been initialized, the further computation is * demand-driven, so a call to the <code>next</code> method will set the * aggregate as follows: * <code><pre> * aggregate = function.invoke(aggregate, input.next()); * </pre></code> * This incremental computation with the help of a binary aggregation-function * implies that the absolute aggregate's value is first being determined when * the last element of the underlying iteration has been consumed and the * aggregation function has been applied. If the user is not interested in the * incremental computation of the aggregate during the demand-driven * computation, the final aggregation value can be delivered directly calling * the method <code>last</code>. * * <p>Futhermore the aggregator offers the possibility to define more than one * binary aggregation function, i.e., the user is able to compute a sum and an * average of the same data set in only one iteration process. This kind of * usage is often needed for SQL queries on relations. For further information * concerning the usage of multi-aggregate functions see * {@link xxl.core.relational.cursors.Aggregator}. Another very impressive use * of this class is given by the * {@link xxl.core.cursors.mappers.ReservoirSampler reservoir-sampler} which * uses a reservoir sampling function with a given strategy for <i>on-line * sampling</i>.</p> * * <p><b>Note:</b> When the given input iteration only implements the interface * {@link java.util.Iterator} it is wrapped to a cursor by a call to the static * method {@link xxl.core.cursors.Cursors#wrap(Iterator) wrap}.</p> * * <p><b>Example usage:</b> * <code><pre> * Aggregator<Integer, Integer> aggregator = new Aggregator<Integer, Integer>( * new DiscreteRandomNumber(new JavaDiscreteRandomWrapper(100), 50), * new AggregationFunction<Integer, Integer>() { * public Integer invoke(Integer aggregate, Integer next) { * if (aggregate == null) * return next; * return aggregate = Math.max(aggregate, next); * } * } * ); * * aggregator.open(); * * System.out.print("The result of the maximum aggregation is: " + aggregator.last()); * * aggregator.close(); * </pre></code> * This example determines the maximum of 50 random numbers with the * restriction that the value of a random number is not greater than 99. A new * function for the aggregation is defined that compares the value of the * current aggregate with the value of the next object and returns the object * with the maximum value. Furthermore the first two lines of the * <code>invoke</code> method show the initialization of this instance of an * aggregator. Because the aggregator works demand-driven the absolute maximum * is definitively detected if all elements were consumed. Therefore the method * <code>last</code> is used generating the final output. At last the * aggregator is closed with the intention to release resources.</p> * * <p>In order to compute various aggregation-functions simultanously, they * must be wrapped by a special * {@link xxl.core.math.Maths#multiDimAggregateFunction(xxl.core.math.functions.AggregationFunction[]) aggregation-function} * that calls successively all given functions. The wrapping * aggregation-function is initialized if and only if all specified functions * are initialized, meaning <code>null</code> will be returned by calling the * <code>next</code> method as long as all functions will return objects that * are not <code>null</code>! After initialization a list containing the * aggregation values given by the corresponding functions will be returned * every time calling the <code>next</code> method! * <code><pre> * Aggregator<Integer, List<Integer>> aggregator2 = new Aggregator<Integer, List<Integer>>( * new DiscreteRandomNumber(new JavaDiscreteRandomWrapper(100), 50), // the input cursor * Maths.multiDimAggregateFunction( * new AggregationFunction<Integer, Integer>() { // the aggregation function * public Integer invoke(Integer aggregate, Integer next) { * if (aggregate == null) * return next; * return Maths.max(aggregate, next); * } * }, * new AggregationFunction<Integer, Integer>() { // the second aggregation function * public Integer invoke(Integer aggregate, Integer next) { * if (aggregate == null) * return next; * return aggregate + next; * } * } * ) * ); * </pre></code> * </p> * * @param <E> the type of the elements returned by the iteration to be * aggregated. * @param <A> the type of the elements returned by the aggregated iteration. * @see java.util.Iterator * @see xxl.core.cursors.Cursor * @see xxl.core.cursors.AbstractCursor * @see xxl.core.relational.cursors.Aggregator */ public class Aggregator<E, A> extends AbstractCursor<A> { /** * The input iteration holding the data to be aggregated. */ protected Cursor<? extends E> input; /** * The function used for the aggregation. This binary function is invoked * with the prior aggregate and the next element of the input iteration. * When the aggregate is not yet initialized, a <code>null</code> value is * given to the aggregation-function. */ protected AggregationFunction<? super E, A> function; /** * The current aggregate of the processed input iteration. */ protected A aggregate; /** * A boolean flag to detect if an result-object for the aggregation has * been specified, i.e., the aggregate is already initialized. */ protected boolean initialized; /** * Creates a new aggregator backed on an input iteration and an aggregation * function. Every iterator given to this constructor is wrapped to a * cursor. * * @param iterator the input iteration holding the data to be aggragated. * @param function an aggregation-function. */ public Aggregator(Iterator<? extends E> iterator, final AggregationFunction<? super E, A> function) { this.input = Cursors.wrap(iterator); this.function = function; this.aggregate = null; this.initialized = false; } /** * Opens the aggregator, i.e., signals the cursor to reserve resources, * open the input iteration, etc. Before a cursor has been opened calls to * methods like <code>next</code> or <code>peek</code> are not guaranteed * to yield proper results. Therefore <code>open</code> must be called * before a cursor's data can be processed. Multiple calls to * <code>open</code> do not have any effect, i.e., if <code>open</code> was * called the cursor remains in the state <i>opened</i> until its * <code>close</code> method is called. * * <p>Note, that a call to the <code>open</code> method of a closed cursor * usually does not open it again because of the fact that its state * generally cannot be restored when resources are released respectively * files are closed.</p> */ public void open() { if (isOpened) return; super.open(); input.open(); } /** * Closes the aggregator, i.e., signals the cursor to clean up resources, * close the input iterations, etc. When a cursor has been closed calls to * methods like <code>next</code> or <code>peek</code> are not guaranteed * to yield proper results. Multiple calls to <code>close</code> do not * have any effect, i.e., if <code>close</code> was called the cursor * remains in the state <i>closed</i>. * * <p>Note, that a closed cursor usually cannot be opened again because of * the fact that its state generally cannot be restored when resources are * released respectively files are closed.</p> */ public void close () { if (isClosed) return; super.close(); input.close(); } /** * Returns <code>true</code> if the iteration has more elements. (In other * words, returns <code>true</code> if <code>next</code> or * <code>peek</code> would return an element rather than throwing an * exception.) * * @return <code>true</code> if the aggregator has more elements, otherwise * <code>false</code>. */ protected boolean hasNextObject() { if (input.hasNext()) { aggregate = function.invoke(aggregate, input.next()); if (!initialized && aggregate != null) initialized = true; return true; } return false; } /** * Returns the next element in the iteration. This element will be * accessible by some of the aggregator's methods, e.g., * <code>update</code> or <code>remove</code>, until a call to * <code>next</code> or <code>peek</code> occurs. This is calling * <code>next</code> or <code>peek</code> proceeds the iteration and * therefore its previous element will not be accessible any more. * * @return the next element in the iteration. */ protected A nextObject() { return aggregate; } /** * Returns the last element of this aggregator. This element represents the * final aggregation value. * * @return the last element of the aggregator. * @throws NoSuchElementException if a last element does not exist, i.e., * the input iteration does not hold enough elements to initialize * the aggregate. */ public A last() throws NoSuchElementException { try { return Cursors.last(this); } catch (NoSuchElementException nsee) { if (!initialized) throw nsee; return aggregate; } } /** * Resets the aggregator to its initial state such that the caller is able * to traverse the aggregation again without constructing a new aggregator * (optional operation). * * <p>Note, that this operation is optional and might not work for all * cursors.</p> * * @throws UnsupportedOperationException if the <code>reset</code> * operation is not supported by the aggregator. */ public void reset() throws UnsupportedOperationException { super.reset(); input.reset(); aggregate = null; initialized = false; } /** * Returns <code>true</code> if the <code>reset</code> operation is * supported by the aggregator. Otherwise it returns <code>false</code>. * * @return <code>true</code> if the <code>reset</code> operation is * supported by the aggregator, otherwise <code>false</code>. */ public boolean supportsReset() { return input.supportsReset(); } }