/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.api;
import co.cask.cdap.api.annotation.Beta;
import java.util.Iterator;
/**
* Groups all input objects into collections and performs an aggregation on the entire group.
* Objects that have the same group key are placed into the same group for aggregation.
*
* @param <GROUP_KEY> Type of group key
* @param <GROUP_VALUE> Type of values to group
* @param <OUT> Type of output object
*/
@Beta
public interface Aggregator<GROUP_KEY, GROUP_VALUE, OUT> {
/**
* Emit the group key(s) for a given input value. If no group key is emitted, the input value
* is filtered out. If multiple group keys are emitted, the input value will be present in multiple groups.
*
* @param groupValue the value to group
* @param emitter the emitter to emit zero or more group keys for the input
* @throws Exception if there is some error getting the group
*/
void groupBy(GROUP_VALUE groupValue, Emitter<GROUP_KEY> emitter) throws Exception;
/**
* Aggregate all objects in the same group into zero or more output objects.
*
* @param groupKey the key for the group
* @param groupValues an iterator over all input objects that have the same group key
* @param emitter the emitter to emit aggregate values for the group
* @throws Exception if there is some error aggregating
*/
void aggregate(GROUP_KEY groupKey, Iterator<GROUP_VALUE> groupValues, Emitter<OUT> emitter) throws Exception;
}