/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.java.operators; import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.Public; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.common.functions.ReduceFunction; import org.apache.flink.api.common.operators.Keys; import org.apache.flink.api.common.operators.Operator; import org.apache.flink.api.common.operators.SingleInputSemanticProperties; import org.apache.flink.api.common.operators.UnaryOperatorInformation; import org.apache.flink.api.common.operators.base.ReduceOperatorBase; import org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.functions.SemanticPropUtil; import org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys; import org.apache.flink.api.java.operators.translation.PlanUnwrappingReduceOperator; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.DataSet; /** * This operator represents the application of a "reduce" function on a data set, and the * result data set produced by the function. * * @param <IN> The type of the data set reduced by the operator. * * @see org.apache.flink.api.common.functions.ReduceFunction */ @Public public class ReduceOperator<IN> extends SingleInputUdfOperator<IN, IN, ReduceOperator<IN>> { private final ReduceFunction<IN> function; private final Grouping<IN> grouper; private final String defaultName; // should be null in case of an all reduce private CombineHint hint; /** * * This is the case for a reduce-all case (in contrast to the reduce-per-group case). * * @param input * @param function */ public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function, String defaultName) { super(input, input.getType()); this.function = function; this.grouper = null; this.defaultName = defaultName; this.hint = null; } public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function, String defaultName) { super(input.getInputDataSet(), input.getInputDataSet().getType()); this.function = function; this.grouper = input; this.defaultName = defaultName; this.hint = CombineHint.OPTIMIZER_CHOOSES; UdfOperatorUtils.analyzeSingleInputUdf(this, ReduceFunction.class, defaultName, function, grouper.keys); } @Override protected ReduceFunction<IN> getFunction() { return function; } @Override @Internal public SingleInputSemanticProperties getSemanticProperties() { SingleInputSemanticProperties props = super.getSemanticProperties(); // offset semantic information by extracted key fields if(props != null && this.grouper != null && this.grouper.keys instanceof SelectorFunctionKeys) { int offset = ((SelectorFunctionKeys<?,?>) this.grouper.keys).getKeyType().getTotalFields(); if(this.grouper instanceof SortedGrouping) { offset += ((SortedGrouping<?>) this.grouper).getSortSelectionFunctionKey().getKeyType().getTotalFields(); } props = SemanticPropUtil.addSourceFieldOffset(props, this.getInputType().getTotalFields(), offset); } return props; } @Override protected org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : "Reduce at "+defaultName; // distinguish between grouped reduce and non-grouped reduce if (grouper == null) { // non grouped reduce UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getInputType()); ReduceOperatorBase<IN, ReduceFunction<IN>> po = new ReduceOperatorBase<>(function, operatorInfo, new int[0], name); po.setInput(input); // the parallelism for a non grouped reduce can only be 1 po.setParallelism(1); return po; } if (grouper.getKeys() instanceof SelectorFunctionKeys) { // reduce with key selector function @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys(); org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getInputType(), name, input, getParallelism(), hint); ((PlanUnwrappingReduceOperator<?, ?>) po.getInput()).setCustomPartitioner(grouper.getCustomPartitioner()); return po; } else if (grouper.getKeys() instanceof Keys.ExpressionKeys) { // reduce with field positions int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions(); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getInputType()); ReduceOperatorBase<IN, ReduceFunction<IN>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCustomPartitioner(grouper.getCustomPartitioner()); po.setInput(input); po.setParallelism(getParallelism()); po.setCombineHint(hint); return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } } /** * Sets the strategy to use for the combine phase of the reduce. * * If this method is not called, then the default hint will be used. * ({@link org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint#OPTIMIZER_CHOOSES}) * * @param strategy The hint to use. * @return The ReduceOperator object, for function call chaining. */ @PublicEvolving public ReduceOperator<IN> setCombineHint(CombineHint strategy) { this.hint = strategy; return this; } // -------------------------------------------------------------------------------------------- private static <T, K> org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateSelectorFunctionReducer( SelectorFunctionKeys<T, ?> rawKeys, ReduceFunction<T> function, TypeInformation<T> inputType, String name, Operator<T> input, int parallelism, CombineHint hint) { @SuppressWarnings("unchecked") final SelectorFunctionKeys<T, K> keys = (SelectorFunctionKeys<T, K>) rawKeys; TypeInformation<Tuple2<K, T>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys); Operator<Tuple2<K, T>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys); PlanUnwrappingReduceOperator<T, K> reducer = new PlanUnwrappingReduceOperator<>(function, keys, name, inputType, typeInfoWithKey); reducer.setInput(keyedInput); reducer.setParallelism(parallelism); reducer.setCombineHint(hint); return KeyFunctions.appendKeyRemover(reducer, keys); } }