Aggregators.java example

Explorer
xxl-master
/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/

package xxl.core.math.statistics.nonparametric.aggregates;

import java.util.Iterator;
import java.util.List;

import xxl.core.cursors.mappers.Aggregator;
import xxl.core.cursors.mappers.ReservoirSampler;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
import xxl.core.math.Maths;
import xxl.core.math.functions.AdaptiveAggregationFunction;
import xxl.core.math.functions.AggregationFunction;
import xxl.core.math.functions.RealFunction;
import xxl.core.math.functions.SplineCompressedFunctionAggregateFunction;
import xxl.core.math.statistics.nonparametric.EmpiricalCDF;
import xxl.core.math.statistics.nonparametric.kernels.KernelBasedBlockEstimatorAggregationFunction;
import xxl.core.math.statistics.nonparametric.kernels.KernelFunction;
import xxl.core.math.statistics.parametric.aggregates.LastN;
import xxl.core.math.statistics.parametric.aggregates.Maximum;
import xxl.core.math.statistics.parametric.aggregates.Minimum;
import xxl.core.math.statistics.parametric.aggregates.ReservoirSample;
import xxl.core.math.statistics.parametric.aggregates.StatefulVariance;
import xxl.core.predicates.EveryNth;

/**
 * This class provides some static methods showing how one is able to use aggregation functions
 * and online aggregation functions of higher orders, i.e., initial statistical functions and
 * aggregation functions based on different aggregation functions. In regard to more complex applications,
 * preimplementations of kernel based methods are particularly provided.
 * 
 * @see xxl.core.cursors.mappers.Aggregator
 * @see xxl.core.math.functions.AdaptiveAggregationFunction
 * @see xxl.core.cursors.mappers.ReservoirSampler
 * @see xxl.core.math.functions.AdaptiveWeightFunctions
 * @see xxl.core.math.functions.SplineCompressedFunctionAggregateFunction
 * @see xxl.core.math.statistics.nonparametric.aggregates
 * @see xxl.core.math.statistics.nonparametric.kernels
 */

public class Aggregators {

	/**
	 * The default constructor has private access in order to ensure
	 * non-instantiability.
	 */
	private Aggregators() {}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering a {@link xxl.core.math.statistics.nonparametric.kernels.NativeKernelDensityEstimator native kernel density estimator}
	 * as result of the aggregation.
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new native kernel density estimator.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other online sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 * aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 * 
	 * </P>
	 * 
	 * <br><br>
	 * <code><pre>
	 * return new Aggregator(
	 *	new Aggregator( input,
	 *			 new Function [] {
	 *			 	mapSamplingStrategy( sampleSize, samplingType),
	 *				new StatefulVariance()}
	 *			),
	 *	new NKDEAggregateFunction( kf)
	 * );
	 * </pre></code>
	 *
	 *
	 * @param input data used to obtain an estimation of the pdf
	 * @param kf used kernel function to obtain an estimator
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @param bandwidthType used bandwidth strategy
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering density estimators based on an
	 * input iterator
	 */
	public static Aggregator getNKDEAggregator(
		Iterator input,
		KernelFunction kf,
		int sampleSize,
		int samplingType,
		int bandwidthType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(input, Maths.multiDimAggregateFunction(new AggregationFunction[] { mapSamplingStrategy(sampleSize, samplingType), new StatefulVariance()})),
			new NKDEAggregateFunction(kf, bandwidthType));
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering a {@link xxl.core.math.statistics.nonparametric.kernels.NativeKernelCDF native kernel cdf}
	 * as result of the aggregation.
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new native kernel cdf.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other online sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 *  aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 * 
	 * </P>
	 * <code><pre>
	 * return new Aggregator(
			new Aggregator(
				input,
				new Function[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance()}),
			new NativeKernelCDFAggregateFunction(kf, bandwidthType));
	 * </pre></code> 
	 * 
	 * @param input data used to obtain an estimation
	 * @param kf used kernel function to obtain an estimator
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @param bandwidthType used bandwidth strategy
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering native kernel cdf's based on an
	 * input iterator
	 */
	public static Aggregator getNKCDFAggregator(
		Iterator input,
		KernelFunction kf,
		int sampleSize,
		int samplingType,
		int bandwidthType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(input, Maths.multiDimAggregateFunction(new AggregationFunction[] { mapSamplingStrategy(sampleSize, samplingType), new StatefulVariance()})),
			new NativeKernelCDFAggregateFunction(kf, bandwidthType));
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering an {@link xxl.core.math.statistics.nonparametric.kernels.ReflectionKernelDensityEstimator reflection kernel density estimator}
	 * using reflection as the result of the aggregation. 
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new reflection kernel density estimator.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other online sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 *  aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 * 
	 * </P>
	 * 
	 * <br><br>
	 * <code><pre>
	 * return new Aggregator(
	 *	new Aggregator( input,
	 *			 new Function [] {
	 *			 	mapSamplingStrategy( sampleSize, samplingType),
	 *				new StatefulVariance(),
	 *				 new Minimum(),
	 *				 new Maximum()}
	 *			),
	 *	new RKDEAggregateFunction( kf)
	 * );
	 * </pre></code>
	 * 
	 * @param input data used to obtain an estimation
	 * @param kf used kernel function to obtain an estimator
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @param bandwidthType used bandwidth strategy
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering reflection kernel 
	 * density estimators based on an
	 * input iterator
	 */
	public static Aggregator getRKDEAggregator(
		Iterator input,
		KernelFunction kf,
		int sampleSize,
		int samplingType,
		int bandwidthType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(
				input,
				Maths.multiDimAggregateFunction(new AggregationFunction[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance(),
					new Minimum(),
					new Maximum()
				}
			)),
			new RKDEAggregateFunction(kf, bandwidthType));
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering a {@link xxl.core.math.statistics.nonparametric.kernels.ReflectionKernelCDF reflection kernel cdf}
	 * as result of the aggregation.
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new reflection kernel cdf.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other on-line sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 *  aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 *
	 * </P>
	 * <code><pre>
	 * return new Aggregator(
			new Aggregator(
				input,
				new Function[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance(),
					new Minimum(),
					new Maximum()
				}
			),
			new ReflectionKernelCDFAggregateFunction(kf, bandwidthType)
		);
	 * </pre></code>
	 * 
	 * @param input data used to obtain an estimation
	 * @param kf used kernel function to obtain an estimator
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @param bandwidthType used bandwidth strategy
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering reflection kernel cdf's based on an
	 * input iterator
	 */
	public static Aggregator getRKCDFAggregator(
		Iterator input,
		KernelFunction kf,
		int sampleSize,
		int samplingType,
		int bandwidthType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(
				input,
				Maths.multiDimAggregateFunction(new AggregationFunction[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance(),
					new Minimum(),
					new Maximum()
				}
			)),
			new ReflectionKernelCDFAggregateFunction(kf, bandwidthType)
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering estimators
	 * as result of the aggregation. They rely on a user-defined aggregate function for building
	 * estimators based on a sample and statistical values (e.g. variance).
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new kernel based estimator.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other online sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 *  aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 * 
	 * </P>
	 * 
	 * <br><br>
	 * <code><pre>
	 * return new Aggregator(
			new Aggregator(
				input,
				new Function[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance(),
					new Minimum(),
					new Maximum()
				}
			),
			kernelBasedAggregateFunction
		);
	 * </pre></code>
	 * 
	 * @param input data used to obtain an estimation
	 * @param kernelBasedAggregateFunction aggregation function
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering reflection kernel cdf's based on an
	 * input iterator
	 */
	public static Aggregator getReservoirBasedKernelEstimatorAggregator(
		Iterator input,
		AggregationFunction kernelBasedAggregateFunction,
		int sampleSize,
		int samplingType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(
				input,
				Maths.multiDimAggregateFunction(new AggregationFunction[] {
					mapSamplingStrategy(sampleSize, samplingType),
					new StatefulVariance(),
					new Minimum(),
					new Maximum()
				}
			)),
			kernelBasedAggregateFunction
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering estimators as result of the aggregation. An iterator containing Objects of type <TT>Number</TT> 
	 * is partitioned into blocks of a predefined size.
	 * While consuming the iterator, separate kernel based estimators with a chosen bandwidth strategy 
	 * are established for each block. In each step, after the new estimator is built, a convex linear combination
	 * of the 'old' and the 'new' estimator will be returned as the actual aggregation result. There exist different strategies
	 * for the weights of the estimators. They are provided in {xxl.core.math.functions.AdaptiveWeightFunctions
	 * AdaptiveWeightFunctions}. The construction of the aggregates according to the current step and weights
	 * is realized in {@link xxl.core.math.functions.AdaptiveAggregationFunction AdaptiveAggregationFunction}.  
	 * </P>
	 * 
	 * <br>
	 * <code><pre>
	 * return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new AdaptiveAggregationFunction(
				new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
				weights,
				realMode
			)
		);
	 * </pre></code>
	 *
	 * @param factory factory delivering kernel based estimators for each block 
	 * @param input data stream to build an adaptive estimator (must contain Objects of type <TT>Number</TT>)
	 * @param kf used {@link xxl.core.math.statistics.nonparametric.kernels.KernelFunction kernel function} to build up the block based estimator
	 * @param bandwidthType type of bandwidth used by each block estimator
	 * @param blocksize size of each block performed separately and used to build an estimator
	 * @param weights RealFunction delivering weights used to combine the blockestimators
	 * @param realMode indicates that the adaptive aggregation function assumes to combine Objects of type {@link xxl.core.math.functions.RealFunction}
	 * instead of Objects of type {@link xxl.core.functions.Function} consuming Objects of type <TT>Number</TT>.
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering kernel based estimators that 
	 * are iteratively combined
	 */
	public static Aggregator getAdaptiveKernelBasedAggregator(
		Function factory,
		Iterator input,
		KernelFunction kf,
		int bandwidthType,
		int blocksize,
		RealFunction weights,
		boolean realMode)
		throws IllegalArgumentException {

		return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new AdaptiveAggregationFunction(
				new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
				weights,
				realMode
			)
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering kernel based estimators as result of the aggregation. An iterator containing Objects of type <TT>Number</TT> 
	 * is partitioned into blocks of a predefined size.
	 * While consuming the iterator, separate kernel based estimators with a chosen bandwidth strategy 
	 * are established for each block. In each step, after the new estimator is built, a convex linear combination
	 * of the 'old' and the 'new' estimator will be returned as the actual aggregation result. There exist different strategies
	 * for the weights of the estimators. They are provided in {xxl.core.math.functions.AdaptiveWeightFunctions
	 * AdaptiveWeightFunctions}. The construction of the aggregates according to the current step and weights
	 * is realized in {@link xxl.core.math.functions.AdaptiveAggregationFunction AdaptiveAggregationFunction}.
	 * In regard to the limited memory resources, the current aggregate is compressed. Therefore, the {@link
	 * xxl.core.math.numerics.splines.CubicBezierSpline cubic Bezier-Spline interpolate} for the aggregate 
	 * is computed, whereas a predefined number of function values of the aggregate on an interval is computed.
	 * The compression of a new aggregate is realized in 
	 * {@link xxl.core.math.functions.SplineCompressedFunctionAggregateFunction}. The compression range, i.e.,
	 * the interval for the compression, has to be known.
	 * </P>
	 * 
	 * <br>
	 * <code><pre>
	 * return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
					weights,
					realMode
				),
				new EveryNth(blocksize),
				left,
				right,
				n,
				cdfMode
			)
		);
	 * </pre></code> 
	 *
	 * @param factory factory delivering kernel based estimators for each block 
	 * @param input data stream to build an adaptive estimator (must contain Objects of type <TT>Number</TT>)
	 * @param kf used {@link xxl.core.math.statistics.nonparametric.kernels.KernelFunction kernel function} to build up the block based estimator
	 * @param bandwidthType type of bandwidth used by each block estimator
	 * @param blocksize size of each block performed separately and used to build up an estimator
	 * @param weights RealFunction delivering weights used to combine the block estimators
	 * @param left left border of the valid compression range
	 * @param right right border of the valid compression range
	 * @param n number of points in the compression interval
	 * @param realMode indicates that the adaptive aggregation function assumes to combine Objects of type {@link xxl.core.math.functions.RealFunction}
	 * instead of Objects of type {@link xxl.core.functions.Function} consuming Objects of type <TT>Number</TT>.
	 * @param cdfMode indicates spline is in cdf mode, i.e., evaluating the spline at x > maximum causes the spline
	 * to return 1.0 instead of 0.0
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering kernel based estimators that 
	 * are iteratively combined and compressed in each aggregation step
	 */
	public static Aggregator getSplineCompressedAdaptiveKernelBasedAggregator(
		Function factory,
		Iterator input,
		KernelFunction kf,
		int bandwidthType,
		int blocksize,
		RealFunction weights,
		double left,
		double right,
		int n,
		boolean realMode,
		boolean cdfMode)
		throws IllegalArgumentException {
		return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
					weights,
					realMode
				),
				new EveryNth(blocksize),
				left,
				right,
				n,
				cdfMode
			)
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering kernel based estimators as result of the aggregation. An iterator containing Objects of type <TT>Number</TT> 
	 * is partitioned into blocks of a predefined size.
	 * While consuming the iterator, separate kernel based estimators with a chosen bandwidth strategy 
	 * are established for each block. In each step, after the new estimator is built, a convex linear combination
	 * of the 'old' and the 'new' estimator will be returned as the actual aggregation result. There exist different strategies
	 * for the weights of the estimators. They are provided in {xxl.core.math.functions.AdaptiveWeightFunctions
	 * AdaptiveWeightFunctions}. The construction of the aggregates according to the current step and weights
	 * is realized in {@link xxl.core.math.functions.AdaptiveAggregationFunction AdaptiveAggregationFunction}.
	 * In regard to the limited memory resources, the current aggregate is compressed. Therefore, the {@link
	 * xxl.core.math.numerics.splines.CubicBezierSpline cubic Bezier-Spline interpolate} for the aggregate 
	 * is computed, whereas a predefined number of function values of the aggregate on an interval is computed.
	 * The compression of a new aggregate is realized in 
	 * {@link xxl.core.math.functions.SplineCompressedFunctionAggregateFunction}. The compression range in turn relies on 
	 * the current extrema. 
	 * </P>
	 * 
	 * <br>
	 * <code><pre>
	 * return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
					weights,
					realMode
				),
				new EveryNth(blocksize),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(
					KernelBasedBlockEstimatorAggregationFunction.MINIMUM),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(
					KernelBasedBlockEstimatorAggregationFunction.MAXIMUM),
				new Constant(new Double(n)),
				cdfMode
			)
		);
	 * </pre></code> 
	 *
	 * @param factory factory delivering kernel based estimators for each block 
	 * @param input data stream to build an adaptive estimator (must contain Objects of type <TT>Number</TT>)
	 * @param kf used {@link xxl.core.math.statistics.nonparametric.kernels.KernelFunction kernel function} to build up the block based estimator
	 * @param bandwidthType type of bandwidth used by each block estimator
	 * @param blocksize size of each block performed separately and used to build up an estimator
	 * @param weights RealFunction delivering weights used to combine the block estimators
	 * @param n number of points in the compression interval
	 * @param realMode indicates that the adaptive aggregation function assumes to combine Objects of type {@link xxl.core.math.functions.RealFunction}
	 * instead of Objects of type {@link xxl.core.functions.Function} consuming Objects of type <TT>Number</TT>.
	 * @param cdfMode indicates spline is in cdf mode, i.e., evaluating the spline at x > maximum causes the spline
	 * to return 1.0 instead of 0.0
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering kernel based estimators that 
	 * are iteratively combined and compressed in each aggregation step
	 */
	public static Aggregator getSplineCompressedAdaptiveKernelBasedAggregator(
		Function factory,
		Iterator input,
		KernelFunction kf,
		int bandwidthType,
		int blocksize,
		RealFunction weights,
		int n,
		boolean realMode,
		boolean cdfMode)
		throws IllegalArgumentException {
		return new Aggregator(
			KernelBasedBlockEstimatorAggregationFunction.inputCursor(input, blocksize),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new KernelBasedBlockEstimatorAggregationFunction(factory, kf, bandwidthType),
					weights,
					realMode
				),
				new EveryNth(blocksize),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(
					KernelBasedBlockEstimatorAggregationFunction.MINIMUM),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(
					KernelBasedBlockEstimatorAggregationFunction.MAXIMUM),
				new Constant(new Double(n)),
				cdfMode
			)
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering estimators based on a FACTORY as result of the aggregation. An iterator containing Objects of type <TT>Number</TT> 
	 * is partitioned into blocks of a predefined size.
	 * While consuming the iterator, separate estimators 
	 * are established for each block. In each step, after the new estimator is built, a convex linear combination
	 * of the 'old' and the 'new' estimator will be returned as the actual aggregation result. There exist different strategies
	 * for the weights of the estimators. They are provided in {xxl.core.math.functions.AdaptiveWeightFunctions
	 * AdaptiveWeightFunctions}. The construction of the aggregates according to the current step and weights
	 * is realized in {@link xxl.core.math.functions.AdaptiveAggregationFunction AdaptiveAggregationFunction}.
	 * In regard to the limited memory resources, the current aggregate is compressed. Therefore, the {@link
	 * xxl.core.math.numerics.splines.CubicBezierSpline cubic Bezier-Spline interpolate} for the aggregate 
	 * is computed, whereas a predefined number of function values of the aggregate on an interval is computed.
	 * The compression of a new aggregate is realized in 
	 * {@link xxl.core.math.functions.SplineCompressedFunctionAggregateFunction}. The compression range, i.e.,
	 * the interval for the compression, has to be known.
	 * </P>
	 * 
	 * <br>
	 * <code><pre>
	 * return new Aggregator(
			input,
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new AbstractFunction() {
						int c = 0;
						public Object invoke(Object old, Object next) {
							c++;
							if (next == null)
								return null;
							if (c % blocksize == 0)
								return factory.invoke((Object[]) next);
							else
								return old;
					}
				}, 
				weights, realMode), new EveryNth(blocksize), left, right, n, cdfMode
			)
		);
	 * </pre></code> 
	 *
	 * @param factory factory delivering estimators for each block 
	 * @param input data stream to build an adaptive estimator (must contain Objects of type <TT>Number</TT>)
	  * @param blocksize size of each block performed separately and used to build up an estimator
	 * @param weights RealFunction delivering weights used to combine the block estimators
	 * @param left left border of valid compression range
	 * @param right right border of valid compression range
	 * @param n number of points in the compression interval
	 * @param realMode indicates that the adaptive aggregation function assumes to combine Objects of type {@link xxl.core.math.functions.RealFunction}
	 * instead of Objects of type {@link xxl.core.functions.Function} consuming Objects of type <TT>Number</TT>.
	 * @param cdfMode indicates spline is in cdf mode, i.e., evaluating the spline at x > maximum causes the spline
	 * to return 1.0 instead of 0.0
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering estimators that 
	 * are iteratively combined and compressed in each aggregation step
	 */
	public static Aggregator getSplineCompressedAdaptiveAggregator(
		final Function factory,
		Iterator input,
		final int blocksize,
		RealFunction weights,
		double left,
		double right,
		int n,
		boolean realMode,
		boolean cdfMode)
		throws IllegalArgumentException {

		return new Aggregator(
			input,
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new AggregationFunction<List,Object>() {
						int c = 0;
						public Object invoke(Object old, List next) {
							c++;
							if (next == null)
								return null;
							if (c % blocksize == 0)
								return factory.invoke(next);
							else
								return old;
					}
				}, 
				weights, realMode), new EveryNth(blocksize), left, right, n, cdfMode
			)
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering an {@link xxl.core.math.statistics.nonparametric.EmpiricalCDF empirical cdf}
	 * as result of the aggregation.
	 * The aggregates base on an input iterator delivering data of type <tt>Object</tt>. While consuming 
	 * the iterator, an <tt>iid</tt> sample of the previously seen data is maintained that in turn is used
	 * for establishing a new empirical cdf.   
	 * <br>
	 * Generally, the following steps are required:<br>
	 * <P>1. Use a {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir sampling
	 * function} (or any other online sampling algorithm) to obtain
	 * an {@link java.util.Iterator iterator} delivering samples of
	 * an input iterator.
	 * <BR>
	 * 2. Use an aggregation function delivering an estimation
	 * of the spread (e.g. standard deviation, inter quartil range, ...) of the data.
	 * <BR>
	 * 3. Combine the
	 * aggregation functions above within a new aggregator and use the tuples delivered
	 * by this iteration as input for an aggregation function of higher order.
	 * 
	 * </P>
	 * 
	 * <br><br>
	 * <code><pre>
	 * return new Aggregator(
			new Aggregator(input, mapSamplingStrategy(sampleSize, samplingType)),
			new EmpiricalCDFAggregateFunction()
		);
	 * </pre></code>
	 * 
	 * @param input data used to obtain an estimation of the pdf
	 * @param sampleSize used sample size
	 * @param samplingType used type of sampling
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering the empirical cdf
	 */
	public static Aggregator getEmpiricalCDFAggregator(Iterator input, int sampleSize, int samplingType)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(input, mapSamplingStrategy(sampleSize, samplingType)),
			new EmpiricalCDFAggregateFunction()
		);
	}

	/** Returns an {@link java.util.Iterator iterator} of type {@link xxl.core.cursors.mappers.Aggregator aggregator}
	 * delivering empirical cdf's as result of the aggregation. An iterator containing Objects of type <TT>Number</TT> 
	 * is partitioned into blocks of a predefined size.
	 * While consuming the iterator, separate empirical cdf's
	 * are established for each block. In each step, after the new estimator is built, a convex linear combination
	 * of the 'old' and the 'new' estimator will be returned as the actual aggregation result. There exist different strategies
	 * for the weights of the estimators. They are provided in {xxl.core.math.functions.AdaptiveWeightFunctions
	 * AdaptiveWeightFunctions}. The construction of the aggregates according to the current step and weights
	 * is realized in {@link xxl.core.math.functions.AdaptiveAggregationFunction AdaptiveAggregationFunction}.
	 * In regard to the limited memory resources, the current aggregate is compressed. Therefore, the {@link
	 * xxl.core.math.numerics.splines.CubicBezierSpline cubic Bezier-Spline interpolate} for the aggregate 
	 * is computed, whereas a predefined number of function values of the aggregate on an interval is computed.
	 * The compression of a new aggregate is realized in 
	 * {@link xxl.core.math.functions.SplineCompressedFunctionAggregateFunction}. The compression range relies on 
	 * the current extrema. 
	 * </P>
	 * 
	 * <br>
	 * <code><pre>
	 * return new Aggregator(
			new Aggregator(input, new Function[] { new LastN(blocksize), new Minimum(), new Maximum()}),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new AbstractFunction() {
						int c = 0;
						public Object invoke(Object old, Object next) {
							c++;
							if (next == null)
								return null;
							if (c % blocksize == 0)
								return EmpiricalCDF.FACTORY.invoke(((Object[]) next)[0]);
							else
								return old;
						}
					},
					weights, true // real mode
				),
				new EveryNth(blocksize),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(1),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(2),
				new Constant(new Double(n)),
				true // cdf mode
			)
		);
	 * </pre></code> 
	 *
	 * @param input data stream to build an adaptive estimator (must contain Objects of type <TT>Number</TT>)
	 * @param blocksize size of each block performed separately and used to build up an estimator
	 * @param weights RealFunction delivering weights used to combine the block estimators
	 * @param n number of points in the compression interval
	 * @throws IllegalArgumentException if the given samplingType is not known
	 * @return an {@link xxl.core.cursors.mappers.Aggregator aggregator} delivering empirical cdf's that 
	 * are iteratively combined and compressed in each aggregation step
	 */
	public static Aggregator getSplineCompressedAdaptiveEmpiricalCDFAggregator(
		Iterator input,
		final int blocksize,
		RealFunction weights,
		int n)
		throws IllegalArgumentException {

		return new Aggregator(
			new Aggregator(input, Maths.multiDimAggregateFunction(new AggregationFunction[] { new LastN(blocksize), new Minimum(), new Maximum()})),
			new SplineCompressedFunctionAggregateFunction(
				new AdaptiveAggregationFunction(
					new AggregationFunction<List,Object>() {
						int c = 0;
						public Object invoke(Object old, List next) {
							c++;
							if (next == null)
								return null;
							if (c % blocksize == 0)
								return EmpiricalCDF.FACTORY.invoke(next.get(0));
							else
								return old;
						}
					},
					weights, true // real mode
				),
				new EveryNth(blocksize),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(1),
				KernelBasedBlockEstimatorAggregationFunction.accessValue(2),
				new Constant(new Double(n)),
				true // cdf mode
			)
		);
	}

	/** The method returns a {@link xxl.core.functions.Function function} representing a
	 * strategy used with the {@link xxl.core.cursors.mappers.ReservoirSampler} cursor.
	 *
	 * @param sampleSize size of the sample
	 * @param type type of the reservoir sampling strategy
	 * @throws IllegalArgumentException if an unknown or not supported strategy has been given
	 * @return a function representing a reservoir sampling strategy
	 */
	public static AggregationFunction mapSamplingStrategy(int sampleSize, int type) throws IllegalArgumentException {
		AggregationFunction function = null;
		switch (type) {
			case ReservoirSampler.RTYPE :
				function = new ReservoirSample(sampleSize, new ReservoirSample.RType(sampleSize));
				break;
			case ReservoirSampler.XTYPE :
				function = new ReservoirSample(sampleSize, new ReservoirSample.XType(sampleSize));
				break;
			case ReservoirSampler.YTYPE :
				throw new IllegalArgumentException("Type y is not supported so far. See javadoc xxl.core.functions.ReservoirSample for details!");
			case ReservoirSampler.ZTYPE :
				function = new ReservoirSample(sampleSize, new ReservoirSample.ZType(sampleSize));
				break;
			default :
				throw new IllegalArgumentException("unknown sampling strategy given!");
		}
		return function;
	}
}