/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.java.record.operators;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.Validate;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.Ordering;
import eu.stratosphere.api.common.operators.RecordOperator;
import eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase;
import eu.stratosphere.api.common.operators.util.UserCodeClassWrapper;
import eu.stratosphere.api.common.operators.util.UserCodeObjectWrapper;
import eu.stratosphere.api.common.operators.util.UserCodeWrapper;
import eu.stratosphere.api.java.record.functions.FunctionAnnotation;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.types.Key;
import eu.stratosphere.types.Record;
/**
* ReduceOperator evaluating a {@link ReduceFunction} over each group of records that share the same key.
*
* @see ReduceFunction
*/
public class ReduceOperator extends GroupReduceOperatorBase<Record, Record, ReduceFunction> implements RecordOperator {
private static final String DEFAULT_NAME = "<Unnamed Reducer>"; // the default name for contracts
/**
* The types of the keys that the contract operates on.
*/
private final Class<? extends Key<?>>[] keyTypes;
// --------------------------------------------------------------------------------------------
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
*/
public static Builder builder(ReduceFunction udf) {
return new Builder(new UserCodeObjectWrapper<ReduceFunction>(udf));
}
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
* @param keyClass The class of the key data type.
* @param keyColumn The position of the key.
*/
public static Builder builder(ReduceFunction udf, Class<? extends Key<?>> keyClass, int keyColumn) {
return new Builder(new UserCodeObjectWrapper<ReduceFunction>(udf), keyClass, keyColumn);
}
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
*/
public static Builder builder(Class<? extends ReduceFunction> udf) {
return new Builder(new UserCodeClassWrapper<ReduceFunction>(udf));
}
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
* @param keyClass The class of the key data type.
* @param keyColumn The position of the key.
*/
public static Builder builder(Class<? extends ReduceFunction> udf, Class<? extends Key<?>> keyClass, int keyColumn) {
return new Builder(new UserCodeClassWrapper<ReduceFunction>(udf), keyClass, keyColumn);
}
/**
* The private constructor that only gets invoked from the Builder.
* @param builder
*/
protected ReduceOperator(Builder builder) {
super(builder.udf, OperatorInfoHelper.unary(), builder.getKeyColumnsArray(), builder.name);
this.keyTypes = builder.getKeyClassesArray();
if (builder.inputs != null && !builder.inputs.isEmpty()) {
setInput(Operator.createUnionCascade(builder.inputs));
}
setGroupOrder(builder.secondaryOrder);
setBroadcastVariables(builder.broadcastInputs);
setSemanticProperties(FunctionAnnotation.readSingleConstantAnnotations(builder.udf));
}
// --------------------------------------------------------------------------------------------
@Override
public Class<? extends Key<?>>[] getKeyClasses() {
return this.keyTypes;
}
// --------------------------------------------------------------------------------------------
@Override
public boolean isCombinable() {
return super.isCombinable() || getUserCodeWrapper().getUserCodeAnnotation(Combinable.class) != null;
}
/**
* This annotation marks reduce stubs as eligible for the usage of a combiner.
*
* The following code excerpt shows how to make a simple reduce stub combinable (assuming here that
* the reducer function and combiner function do the same):
*
* <code>
* \@Combinable
* public static class CountWords extends ReduceFunction<StringValue>
* {
* private final IntValue theInteger = new IntValue();
*
* \@Override
* public void reduce(StringValue key, Iterator<Record> records, Collector out) throws Exception
* {
* Record element = null;
* int sum = 0;
* while (records.hasNext()) {
* element = records.next();
* element.getField(1, this.theInteger);
* // we could have equivalently used IntValue i = record.getField(1, IntValue.class);
*
* sum += this.theInteger.getValue();
* }
*
* element.setField(1, this.theInteger);
* out.collect(element);
* }
*
* public void combine(StringValue key, Iterator<Record> records, Collector out) throws Exception
* {
* this.reduce(key, records, out);
* }
* }
* </code>
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public static @interface Combinable {};
// --------------------------------------------------------------------------------------------
/**
* Builder pattern, straight from Joshua Bloch's Effective Java (2nd Edition).
*/
public static class Builder {
/* The required parameters */
private final UserCodeWrapper<ReduceFunction> udf;
private final List<Class<? extends Key<?>>> keyClasses;
private final List<Integer> keyColumns;
/* The optional parameters */
private Ordering secondaryOrder = null;
private List<Operator<Record>> inputs;
private Map<String, Operator<Record>> broadcastInputs;
private String name = DEFAULT_NAME;
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
*/
private Builder(UserCodeWrapper<ReduceFunction> udf) {
this.udf = udf;
this.keyClasses = new ArrayList<Class<? extends Key<?>>>();
this.keyColumns = new ArrayList<Integer>();
this.inputs = new ArrayList<Operator<Record>>();
this.broadcastInputs = new HashMap<String, Operator<Record>>();
}
/**
* Creates a Builder with the provided {@link ReduceFunction} implementation.
*
* @param udf The {@link ReduceFunction} implementation for this Reduce contract.
* @param keyClass The class of the key data type.
* @param keyColumn The position of the key.
*/
private Builder(UserCodeWrapper<ReduceFunction> udf, Class<? extends Key<?>> keyClass, int keyColumn) {
this.udf = udf;
this.keyClasses = new ArrayList<Class<? extends Key<?>>>();
this.keyClasses.add(keyClass);
this.keyColumns = new ArrayList<Integer>();
this.keyColumns.add(keyColumn);
this.inputs = new ArrayList<Operator<Record>>();
this.broadcastInputs = new HashMap<String, Operator<Record>>();
}
private int[] getKeyColumnsArray() {
int[] result = new int[keyColumns.size()];
for (int i = 0; i < keyColumns.size(); ++i) {
result[i] = keyColumns.get(i);
}
return result;
}
@SuppressWarnings("unchecked")
private Class<? extends Key<?>>[] getKeyClassesArray() {
return keyClasses.toArray(new Class[keyClasses.size()]);
}
/**
* Adds additional key field.
*
* @param keyClass The class of the key data type.
* @param keyColumn The position of the key.
*/
public Builder keyField(Class<? extends Key<?>> keyClass, int keyColumn) {
keyClasses.add(keyClass);
keyColumns.add(keyColumn);
return this;
}
/**
* Sets the order of the elements within a group.
*
* @param order The order for the elements in a group.
*/
public Builder secondaryOrder(Ordering order) {
this.secondaryOrder = order;
return this;
}
/**
* Sets the input.
*
* @param input The input.
*/
public Builder input(Operator<Record> input) {
Validate.notNull(input, "The input must not be null");
this.inputs.clear();
this.inputs.add(input);
return this;
}
/**
* Sets one or several inputs (union).
*
* @param inputs
*/
public Builder input(Operator<Record>...inputs) {
this.inputs.clear();
for (Operator<Record> c : inputs) {
this.inputs.add(c);
}
return this;
}
/**
* Sets the inputs.
*
* @param inputs
*/
public Builder inputs(List<Operator<Record>> inputs) {
this.inputs = inputs;
return this;
}
/**
* Binds the result produced by a plan rooted at {@code root} to a
* variable used by the UDF wrapped in this operator.
*/
public Builder setBroadcastVariable(String name, Operator<Record> input) {
this.broadcastInputs.put(name, input);
return this;
}
/**
* Binds multiple broadcast variables.
*/
public Builder setBroadcastVariables(Map<String, Operator<Record>> inputs) {
this.broadcastInputs.clear();
this.broadcastInputs.putAll(inputs);
return this;
}
/**
* Sets the name of this operator.
*
* @param name
*/
public Builder name(String name) {
this.name = name;
return this;
}
/**
* Creates and returns a ReduceOperator from using the values given
* to the builder.
*
* @return The created operator
*/
public ReduceOperator build() {
if (name == null) {
name = udf.getUserCodeClass().getName();
}
return new ReduceOperator(this);
}
}
}