/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.java.record.operators;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.Validate;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.Ordering;
import eu.stratosphere.api.common.operators.RecordOperator;
import eu.stratosphere.api.common.operators.base.CoGroupOperatorBase;
import eu.stratosphere.api.common.operators.util.UserCodeClassWrapper;
import eu.stratosphere.api.common.operators.util.UserCodeObjectWrapper;
import eu.stratosphere.api.common.operators.util.UserCodeWrapper;
import eu.stratosphere.api.java.record.functions.CoGroupFunction;
import eu.stratosphere.api.java.record.functions.FunctionAnnotation;
import eu.stratosphere.types.Key;
import eu.stratosphere.types.Record;
/**
* CoGroupOperator that applies a {@link CoGroupFunction} to groups of records sharing
* the same key (one group per input).
*
* @see CoGroupFunction
*/
public class CoGroupOperator extends CoGroupOperatorBase<Record, Record, Record, CoGroupFunction> implements RecordOperator {
/**
* The types of the keys that the operator groups on.
*/
private final Class<? extends Key<?>>[] keyTypes;
// --------------------------------------------------------------------------------------------
/**
* Creates a Builder with the provided {@link CoGroupFunction} implementation.
*
* @param udf The {@link CoGroupFunction} implementation for this CoGroup operator.
* @param keyClass The class of the key data type.
* @param keyColumn1 The position of the key in the first input's records.
* @param keyColumn2 The position of the key in the second input's records.
*/
public static Builder builder(CoGroupFunction udf, Class<? extends Key<?>> keyClass, int keyColumn1, int keyColumn2) {
return new Builder(new UserCodeObjectWrapper<CoGroupFunction>(udf), keyClass, keyColumn1, keyColumn2);
}
/**
* Creates a Builder with the provided {@link CoGroupFunction} implementation.
*
* @param udf The {@link CoGroupFunction} implementation for this CoGroup operator.
* @param keyClass The class of the key data type.
* @param keyColumn1 The position of the key in the first input's records.
* @param keyColumn2 The position of the key in the second input's records.
*/
public static Builder builder(Class<? extends CoGroupFunction> udf, Class<? extends Key<?>> keyClass,
int keyColumn1, int keyColumn2)
{
return new Builder(new UserCodeClassWrapper<CoGroupFunction>(udf), keyClass, keyColumn1, keyColumn2);
}
/**
* The private constructor that only gets invoked from the Builder.
* @param builder
*/
protected CoGroupOperator(Builder builder) {
super(builder.udf, OperatorInfoHelper.binary(), builder.getKeyColumnsArray1(), builder.getKeyColumnsArray2(), builder.name);
this.keyTypes = builder.getKeyClassesArray();
if (builder.inputs1 != null && !builder.inputs1.isEmpty()) {
setFirstInput(Operator.createUnionCascade(builder.inputs1));
}
if (builder.inputs2 != null && !builder.inputs2.isEmpty()) {
setSecondInput(Operator.createUnionCascade(builder.inputs2));
}
setBroadcastVariables(builder.broadcastInputs);
setGroupOrderForInputOne(builder.secondaryOrder1);
setGroupOrderForInputTwo(builder.secondaryOrder2);
setSemanticProperties(FunctionAnnotation.readDualConstantAnnotations(builder.udf));
}
// --------------------------------------------------------------------------------------------
@Override
public Class<? extends Key<?>>[] getKeyClasses() {
return this.keyTypes;
}
// --------------------------------------------------------------------------------------------
/**
* Builder pattern, straight from Joshua Bloch's Effective Java (2nd Edition).
*/
public static class Builder {
/* The required parameters */
private final UserCodeWrapper<CoGroupFunction> udf;
private final List<Class<? extends Key<?>>> keyClasses;
private final List<Integer> keyColumns1;
private final List<Integer> keyColumns2;
/* The optional parameters */
private List<Operator<Record>> inputs1;
private List<Operator<Record>> inputs2;
private Map<String, Operator<Record>> broadcastInputs;
private Ordering secondaryOrder1;
private Ordering secondaryOrder2;
private String name;
/**
* Creates a Builder with the provided {@link CoGroupFunction} implementation.
*
* @param udf The {@link CoGroupFunction} implementation for this CoGroup operator.
* @param keyClass The class of the key data type.
* @param keyColumn1 The position of the key in the first input's records.
* @param keyColumn2 The position of the key in the second input's records.
*/
protected Builder(UserCodeWrapper<CoGroupFunction> udf, Class<? extends Key<?>> keyClass,
int keyColumn1, int keyColumn2)
{
this.udf = udf;
this.keyClasses = new ArrayList<Class<? extends Key<?>>>();
this.keyClasses.add(keyClass);
this.keyColumns1 = new ArrayList<Integer>();
this.keyColumns1.add(keyColumn1);
this.keyColumns2 = new ArrayList<Integer>();
this.keyColumns2.add(keyColumn2);
this.inputs1 = new ArrayList<Operator<Record>>();
this.inputs2 = new ArrayList<Operator<Record>>();
this.broadcastInputs = new HashMap<String, Operator<Record>>();
}
/**
* Creates a Builder with the provided {@link CoGroupFunction} implementation. This method is intended
* for special case sub-types only.
*
* @param udf The {@link CoGroupFunction} implementation for this CoGroup operator.
*/
protected Builder(UserCodeWrapper<CoGroupFunction> udf) {
this.udf = udf;
this.keyClasses = new ArrayList<Class<? extends Key<?>>>();
this.keyColumns1 = new ArrayList<Integer>();
this.keyColumns2 = new ArrayList<Integer>();
this.inputs1 = new ArrayList<Operator<Record>>();
this.inputs2 = new ArrayList<Operator<Record>>();
this.broadcastInputs = new HashMap<String, Operator<Record>>();
}
private int[] getKeyColumnsArray1() {
int[] result = new int[keyColumns1.size()];
for (int i = 0; i < keyColumns1.size(); ++i) {
result[i] = keyColumns1.get(i);
}
return result;
}
private int[] getKeyColumnsArray2() {
int[] result = new int[keyColumns2.size()];
for (int i = 0; i < keyColumns2.size(); ++i) {
result[i] = keyColumns2.get(i);
}
return result;
}
@SuppressWarnings("unchecked")
private Class<? extends Key<?>>[] getKeyClassesArray() {
return keyClasses.toArray(new Class[keyClasses.size()]);
}
/**
* Adds additional key field.
*
* @param keyClass The class of the key data type.
* @param keyColumn1 The position of the key in the first input's records.
* @param keyColumn2 The position of the key in the second input's records.
*/
public Builder keyField(Class<? extends Key<?>> keyClass, int keyColumn1, int keyColumn2) {
keyClasses.add(keyClass);
keyColumns1.add(keyColumn1);
keyColumns2.add(keyColumn2);
return this;
}
/**
* Sets the order of the elements within a group for the first input.
*
* @param order The order for the elements in a group.
*/
public Builder secondaryOrder1(Ordering order) {
this.secondaryOrder1 = order;
return this;
}
/**
* Sets the order of the elements within a group for the second input.
*
* @param order The order for the elements in a group.
*/
public Builder secondaryOrder2(Ordering order) {
this.secondaryOrder2 = order;
return this;
}
/**
* Sets the input operator for input 1.
*
* @param input The input operator for input 1.
*/
public Builder input1(Operator<Record> input) {
Validate.notNull(input, "The input must not be null");
this.inputs1.clear();
this.inputs1.add(input);
return this;
}
/**
* Sets one or several inputs (union) for input 1.
*
* @param inputs
*/
public Builder input1(Operator<Record>...inputs) {
this.inputs1.clear();
for (Operator<Record> c : inputs) {
this.inputs1.add(c);
}
return this;
}
/**
* Sets the input operator for input 2.
*
* @param input The input operator for input 2.
*/
public Builder input2(Operator<Record> input) {
Validate.notNull(input, "The input must not be null");
this.inputs2.clear();
this.inputs2.add(input);
return this;
}
/**
* Sets one or several inputs (union) for input 2.
*
* @param inputs
*/
public Builder input2(Operator<Record>...inputs) {
this.inputs2.clear();
for (Operator<Record> c : inputs) {
this.inputs2.add(c);
}
return this;
}
/**
* Sets the first inputs.
*
* @param inputs
*/
public Builder inputs1(List<Operator<Record>> inputs) {
this.inputs1 = inputs;
return this;
}
/**
* Sets the second inputs.
*
* @param inputs
*/
public Builder inputs2(List<Operator<Record>> inputs) {
this.inputs2 = inputs;
return this;
}
/**
* Binds the result produced by a plan rooted at {@code root} to a
* variable used by the UDF wrapped in this operator.
*/
public Builder setBroadcastVariable(String name, Operator<Record> input) {
this.broadcastInputs.put(name, input);
return this;
}
/**
* Binds multiple broadcast variables.
*/
public Builder setBroadcastVariables(Map<String, Operator<Record>> inputs) {
this.broadcastInputs.clear();
this.broadcastInputs.putAll(inputs);
return this;
}
/**
* Sets the name of this operator.
*
* @param name
*/
public Builder name(String name) {
this.name = name;
return this;
}
/**
* Creates and returns a CoGroupOperator from using the values given
* to the builder.
*
* @return The created operator
*/
public CoGroupOperator build() {
if (keyClasses.size() <= 0) {
throw new IllegalStateException("At least one key attribute has to be set.");
}
if (name == null) {
name = udf.getUserCodeClass().getName();
}
return new CoGroupOperator(this);
}
}
}