Aggregator.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.flink.api.common.aggregators;

import java.io.Serializable;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.types.Value;

/**
 * Aggregators are a means of aggregating values across parallel instances of a function. Aggregators  
 * collect simple statistics (such as the number of processed elements) about the actual work performed in a function.
 * Aggregators are specific to iterations and are commonly used to check the convergence of an iteration by using a
 * {@link ConvergenceCriterion}. In contrast to the {@link org.apache.flink.api.common.accumulators.Accumulator} (whose result is available at the end of a job,
 * the aggregators are computed once per iteration superstep. Their value can be used to check for convergence (at the end
 * of the iteration superstep) and it can be accessed in the next iteration superstep.
 * <p>
 * Aggregators must be registered at the iteration inside which they are used via the function. In the Java API, the
 * method is "IterativeDataSet.registerAggregator(...)" or "IterativeDataSet.registerAggregationConvergenceCriterion(...)"
 * when using the aggregator together with a convergence criterion. Aggregators are always registered under a name. That
 * name can be used to access the aggregator at runtime from within a function. The following code snippet shows a typical
 * case. Here, it count across all parallel instances how many elements are filtered out by a function.
 * 
 * <pre>
 * // the user-defined function 
 * public class MyFilter extends FilterFunction<Double> {
 *     private LongSumAggregator agg;
 *     
 *     public void open(Configuration parameters) {
 *         agg = getIterationRuntimeContext().getIterationAggregator("numFiltered");
 *     }
 *     
 *     public boolean filter (Double value) {
 *         if (value > 1000000.0) {
 *             agg.aggregate(1);
 *             return false
 *         }
 *         
 *         return true;
 *     }
 * }
 * 
 * // the iteration where the aggregator is registered
 * IterativeDataSet<Double> iteration = input.iterate(100).registerAggregator("numFiltered", LongSumAggregator.class);
 * ...
 * DataSet<Double> filtered = someIntermediateResult.filter(new MyFilter);
 * ...
 * DataSet<Double> result = iteration.closeWith(filtered);
 * ...
 * </pre>
 * 
 * <p>
 * Aggregators must be <i>distributive</i>: An aggregator must be able to pre-aggregate values and it must be able
 * to aggregate these pre-aggregated values to form the final aggregate. Many aggregation functions fulfill this
 * condition (sum, min, max) and others can be brought into that form: One can expressing <i>count</i> as a sum over
 * values of one, and one can express <i>average</i> through a sum and a count.
 * 
 * @param <T> The type of the aggregated value.
 */
@PublicEvolving
public interface Aggregator<T extends Value> extends Serializable {

	/**
	 * Gets the aggregator's current aggregate.
	 * 
	 * @return The aggregator's current aggregate.
	 */
	T getAggregate();

	/**
	 * Aggregates the given element. In the case of a <i>sum</i> aggregator, this method adds the given
	 * value to the sum.
	 * 
	 * @param element The element to aggregate.
	 */
	void aggregate(T element);

	/**
	 * Resets the internal state of the aggregator. This must bring the aggregator into the same
	 * state as if it was newly initialized.
	 */
	void reset();
}