/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.java.operators; import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.Public; import org.apache.flink.api.common.functions.GroupCombineFunction; import org.apache.flink.api.common.operators.Keys; import org.apache.flink.api.common.operators.Operator; import org.apache.flink.api.common.operators.Order; import org.apache.flink.api.common.operators.Ordering; import org.apache.flink.api.common.operators.SingleInputSemanticProperties; import org.apache.flink.api.common.operators.UnaryOperatorInformation; import org.apache.flink.api.common.operators.base.GroupCombineOperatorBase; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.functions.SemanticPropUtil; import org.apache.flink.api.java.operators.translation.PlanUnwrappingGroupCombineOperator; import org.apache.flink.api.java.operators.translation.PlanUnwrappingSortedGroupCombineOperator; import org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple3; /** * This operator behaves like the GroupReduceOperator with Combine but only runs the Combine part which reduces all data * locally in their partitions. The combine part can return an arbitrary data type. This is useful to pre-combine values * into an intermediate representation before applying a proper reduce operation. * * @param <IN> The type of the data set consumed by the operator. * @param <OUT> The type of the data set created by the operator. */ @Public public class GroupCombineOperator<IN, OUT> extends SingleInputUdfOperator<IN, OUT, GroupCombineOperator<IN, OUT>> { private final GroupCombineFunction<IN, OUT> function; private final Grouping<IN> grouper; private final String defaultName; /** * Constructor for a non-grouped reduce (all reduce). * * @param input The input data set to the groupReduce function. * @param resultType The type information for the resulting type. * @param function The user-defined GroupReduce function. * @param defaultName The operator's name. */ public GroupCombineOperator(DataSet<IN> input, TypeInformation<OUT> resultType, GroupCombineFunction<IN, OUT> function, String defaultName) { super(input, resultType); this.function = function; this.grouper = null; this.defaultName = defaultName; } /** * Constructor for a grouped reduce. * * @param input The grouped input to be processed group-wise by the groupReduce function. * @param function The user-defined GroupReduce function. */ public GroupCombineOperator(Grouping<IN> input, TypeInformation<OUT> resultType, GroupCombineFunction<IN, OUT> function, String defaultName) { super(input != null ? input.getInputDataSet() : null, resultType); this.function = function; this.grouper = input; this.defaultName = defaultName; } @Override protected GroupCombineFunction<IN, OUT> getFunction() { return function; } @Override @Internal public SingleInputSemanticProperties getSemanticProperties() { SingleInputSemanticProperties props = super.getSemanticProperties(); // offset semantic information by extracted key fields if(props != null && this.grouper != null && this.grouper.keys instanceof SelectorFunctionKeys) { int offset = ((SelectorFunctionKeys<?,?>) this.grouper.keys).getKeyType().getTotalFields(); if(this.grouper instanceof SortedGrouping) { offset += ((SortedGrouping<?>) this.grouper).getSortSelectionFunctionKey().getKeyType().getTotalFields(); } props = SemanticPropUtil.addSourceFieldOffset(props, this.getInputType().getTotalFields(), offset); } return props; } // -------------------------------------------------------------------------------------------- // Translation // -------------------------------------------------------------------------------------------- @Override protected GroupCombineOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : "GroupCombine at " + defaultName; // distinguish between grouped reduce and non-grouped reduce if (grouper == null) { // non grouped reduce UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, new int[0], name); po.setInput(input); // the parallelism for a non grouped reduce can only be 1 po.setParallelism(1); return po; } if (grouper.getKeys() instanceof SelectorFunctionKeys) { @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys(); if (grouper instanceof SortedGrouping) { SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper; SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey(); Ordering groupOrder = sortedGrouping.getGroupOrdering(); PlanUnwrappingSortedGroupCombineOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input); po.setParallelism(this.getParallelism()); return po; } else { PlanUnwrappingGroupCombineOperator<IN, OUT, ?> po = translateSelectorFunctionReducer( selectorKeys, function, getResultType(), name, input); po.setParallelism(this.getParallelism()); return po; } } else if (grouper.getKeys() instanceof Keys.ExpressionKeys) { int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions(); UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setInput(input); po.setParallelism(getParallelism()); // set group order if (grouper instanceof SortedGrouping) { SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper; int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions(); Order[] sortOrders = sortedGrouper.getGroupSortOrders(); Ordering o = new Ordering(); for(int i=0; i < sortKeyPositions.length; i++) { o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]); } po.setGroupOrder(o); } return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } } // -------------------------------------------------------------------------------------------- @SuppressWarnings("unchecked") private static <IN, OUT, K> PlanUnwrappingGroupCombineOperator<IN, OUT, K> translateSelectorFunctionReducer( SelectorFunctionKeys<IN, ?> rawKeys, GroupCombineFunction<IN, OUT> function, TypeInformation<OUT> outputType, String name, Operator<IN> input) { final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys; TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys); Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys); PlanUnwrappingGroupCombineOperator<IN, OUT, K> reducer = new PlanUnwrappingGroupCombineOperator<>(function, keys, name, outputType, typeInfoWithKey); reducer.setInput(keyedInput); return reducer; } @SuppressWarnings("unchecked") private static <IN, OUT, K1, K2> PlanUnwrappingSortedGroupCombineOperator<IN, OUT, K1, K2> translateSelectorFunctionSortedReducer( SelectorFunctionKeys<IN, ?> rawGroupingKey, SelectorFunctionKeys<IN, ?> rawSortingKeys, Ordering groupOrder, GroupCombineFunction<IN, OUT> function, TypeInformation<OUT> outputType, String name, Operator<IN> input) { final SelectorFunctionKeys<IN, K1> groupingKey = (SelectorFunctionKeys<IN, K1>) rawGroupingKey; final SelectorFunctionKeys<IN, K2> sortingKey = (SelectorFunctionKeys<IN, K2>)rawSortingKeys; TypeInformation<Tuple3<K1, K2, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(groupingKey, sortingKey); Operator<Tuple3<K1, K2, IN>> inputWithKey = KeyFunctions.appendKeyExtractor(input, groupingKey, sortingKey); PlanUnwrappingSortedGroupCombineOperator<IN, OUT, K1, K2> reducer = new PlanUnwrappingSortedGroupCombineOperator<>(function, groupingKey, sortingKey, name, outputType, typeInfoWithKey); reducer.setInput(inputWithKey); reducer.setGroupOrder(groupOrder); return reducer; } }