/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.java.operators; import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.Public; import org.apache.flink.api.common.functions.CombineFunction; import org.apache.flink.api.common.functions.GroupCombineFunction; import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.common.functions.RichGroupReduceFunction; import org.apache.flink.api.java.operators.translation.CombineToGroupCombineWrapper; import org.apache.flink.api.common.operators.Operator; import org.apache.flink.api.common.operators.Order; import org.apache.flink.api.common.operators.Ordering; import org.apache.flink.api.common.operators.SingleInputSemanticProperties; import org.apache.flink.api.common.operators.UnaryOperatorInformation; import org.apache.flink.api.common.operators.base.GroupReduceOperatorBase; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.functions.SemanticPropUtil; import org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys; import org.apache.flink.api.common.operators.Keys.ExpressionKeys; import org.apache.flink.api.java.operators.translation.PlanUnwrappingReduceGroupOperator; import org.apache.flink.api.java.operators.translation.PlanUnwrappingSortedReduceGroupOperator; import org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.api.java.DataSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; /** * This operator represents the application of a "reduceGroup" function on a data set, and the * result data set produced by the function. * * @param <IN> The type of the data set consumed by the operator. * @param <OUT> The type of the data set created by the operator. */ @Public public class GroupReduceOperator<IN, OUT> extends SingleInputUdfOperator<IN, OUT, GroupReduceOperator<IN, OUT>> { private static final Logger LOG = LoggerFactory.getLogger(GroupReduceOperator.class); private GroupReduceFunction<IN, OUT> function; private final Grouping<IN> grouper; private final String defaultName; private boolean combinable; /** * Constructor for a non-grouped reduce (all reduce). * * @param input The input data set to the groupReduce function. * @param function The user-defined GroupReduce function. */ public GroupReduceOperator(DataSet<IN> input, TypeInformation<OUT> resultType, GroupReduceFunction<IN, OUT> function, String defaultName) { super(input, resultType); this.function = function; this.grouper = null; this.defaultName = defaultName; this.combinable = checkCombinability(); } /** * Constructor for a grouped reduce. * * @param input The grouped input to be processed group-wise by the groupReduce function. * @param function The user-defined GroupReduce function. */ public GroupReduceOperator(Grouping<IN> input, TypeInformation<OUT> resultType, GroupReduceFunction<IN, OUT> function, String defaultName) { super(input != null ? input.getInputDataSet() : null, resultType); this.function = function; this.grouper = input; this.defaultName = defaultName; this.combinable = checkCombinability(); UdfOperatorUtils.analyzeSingleInputUdf(this, GroupReduceFunction.class, defaultName, function, grouper.keys); } private boolean checkCombinability() { if (function instanceof GroupCombineFunction || function instanceof CombineFunction) { // check if the generic types of GroupCombineFunction and GroupReduceFunction match, i.e., // GroupCombineFunction<IN, IN> and GroupReduceFunction<IN, OUT>. // This is a best effort check. If the check cannot be done, we might fail at runtime. Type[] reduceTypes = null; Type[] combineTypes = null; Type[] genInterfaces = function.getClass().getGenericInterfaces(); for (Type genInterface : genInterfaces) { if (genInterface instanceof ParameterizedType) { // get parameters of GroupReduceFunction if (((ParameterizedType) genInterface).getRawType().equals(GroupReduceFunction.class)) { reduceTypes = ((ParameterizedType) genInterface).getActualTypeArguments(); // get parameters of GroupCombineFunction } else if ((((ParameterizedType) genInterface).getRawType().equals(GroupCombineFunction.class)) || (((ParameterizedType) genInterface).getRawType().equals(CombineFunction.class))) { combineTypes = ((ParameterizedType) genInterface).getActualTypeArguments(); } } } if (reduceTypes != null && reduceTypes.length == 2 && combineTypes != null && combineTypes.length == 2) { if (reduceTypes[0].equals(combineTypes[0]) && reduceTypes[0].equals(combineTypes[1])) { return true; } else { LOG.warn("GroupCombineFunction cannot be used as combiner for GroupReduceFunction. " + "Generic types are incompatible."); return false; } } else if (reduceTypes == null || reduceTypes.length != 2) { LOG.warn("Cannot check generic types of GroupReduceFunction. " + "Enabling combiner but combine function might fail at runtime."); return true; } else { LOG.warn("Cannot check generic types of GroupCombineFunction. " + "Enabling combiner but combine function might fail at runtime."); return true; } } return false; } @Override protected GroupReduceFunction<IN, OUT> getFunction() { return function; } // -------------------------------------------------------------------------------------------- // Properties // -------------------------------------------------------------------------------------------- @Internal public boolean isCombinable() { return combinable; } public GroupReduceOperator<IN, OUT> setCombinable(boolean combinable) { if(combinable) { // sanity check that the function is a subclass of the combine interface if (!checkCombinability()) { throw new IllegalArgumentException("Either the function does not implement a combine interface, " + "or the types of the combine() and reduce() methods are not compatible."); } this.combinable = true; } else { this.combinable = false; } return this; } @Override @Internal public SingleInputSemanticProperties getSemanticProperties() { SingleInputSemanticProperties props = super.getSemanticProperties(); // offset semantic information by extracted key fields if(props != null && this.grouper != null && this.grouper.keys instanceof SelectorFunctionKeys) { int offset = ((SelectorFunctionKeys<?,?>) this.grouper.keys).getKeyType().getTotalFields(); if(this.grouper instanceof SortedGrouping) { offset += ((SortedGrouping<?>) this.grouper).getSortSelectionFunctionKey().getKeyType().getTotalFields(); } props = SemanticPropUtil.addSourceFieldOffset(props, this.getInputType().getTotalFields(), offset); } return props; } // -------------------------------------------------------------------------------------------- // Translation // -------------------------------------------------------------------------------------------- @Override @SuppressWarnings("unchecked") protected GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : "GroupReduce at " + defaultName; // wrap CombineFunction in GroupCombineFunction if combinable if (combinable && function instanceof CombineFunction<?, ?>) { this.function = function instanceof RichGroupReduceFunction<?, ?> ? new RichCombineToGroupCombineWrapper((RichGroupReduceFunction<?, ?>) function) : new CombineToGroupCombineWrapper((CombineFunction<?, ?>) function); } // distinguish between grouped reduce and non-grouped reduce if (grouper == null) { // non grouped reduce UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, new int[0], name); po.setCombinable(combinable); po.setInput(input); // the parallelism for a non grouped reduce can only be 1 po.setParallelism(1); return po; } if (grouper.getKeys() instanceof SelectorFunctionKeys) { @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys(); if (grouper instanceof SortedGrouping) { SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper; SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey(); Ordering groupOrder = sortedGrouping.getGroupOrdering(); PlanUnwrappingSortedReduceGroupOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer( selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input, isCombinable() ); po.setParallelism(this.getParallelism()); po.setCustomPartitioner(grouper.getCustomPartitioner()); return po; } else { PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer( selectorKeys, function, getResultType(), name, input, isCombinable()); po.setParallelism(this.getParallelism()); po.setCustomPartitioner(grouper.getCustomPartitioner()); return po; } } else if (grouper.getKeys() instanceof ExpressionKeys) { int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions(); UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCombinable(combinable); po.setInput(input); po.setParallelism(getParallelism()); po.setCustomPartitioner(grouper.getCustomPartitioner()); // set group order if (grouper instanceof SortedGrouping) { SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper; int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions(); Order[] sortOrders = sortedGrouper.getGroupSortOrders(); Ordering o = new Ordering(); for(int i=0; i < sortKeyPositions.length; i++) { o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]); } po.setGroupOrder(o); } return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } } // -------------------------------------------------------------------------------------------- @SuppressWarnings("unchecked") private static <IN, OUT, K> PlanUnwrappingReduceGroupOperator<IN, OUT, K> translateSelectorFunctionReducer( SelectorFunctionKeys<IN, ?> rawKeys, GroupReduceFunction<IN, OUT> function, TypeInformation<OUT> outputType, String name, Operator<IN> input, boolean combinable) { SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys; TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys); Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys); PlanUnwrappingReduceGroupOperator<IN, OUT, K> reducer = new PlanUnwrappingReduceGroupOperator(function, keys, name, outputType, typeInfoWithKey, combinable); reducer.setInput(keyedInput); return reducer; } @SuppressWarnings("unchecked") private static <IN, OUT, K1, K2> PlanUnwrappingSortedReduceGroupOperator<IN, OUT, K1, K2> translateSelectorFunctionSortedReducer( SelectorFunctionKeys<IN, ?> rawGroupingKey, SelectorFunctionKeys<IN, ?> rawSortingKey, Ordering groupOrdering, GroupReduceFunction<IN, OUT> function, TypeInformation<OUT> outputType, String name, Operator<IN> input, boolean combinable) { final SelectorFunctionKeys<IN, K1> groupingKey = (SelectorFunctionKeys<IN, K1>) rawGroupingKey; final SelectorFunctionKeys<IN, K2> sortingKey = (SelectorFunctionKeys<IN, K2>) rawSortingKey; TypeInformation<Tuple3<K1, K2, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(groupingKey,sortingKey); Operator<Tuple3<K1, K2, IN>> inputWithKey = KeyFunctions.appendKeyExtractor(input, groupingKey, sortingKey); PlanUnwrappingSortedReduceGroupOperator<IN, OUT, K1, K2> reducer = new PlanUnwrappingSortedReduceGroupOperator<>( function, groupingKey, sortingKey, name, outputType, typeInfoWithKey, combinable); reducer.setInput(inputWithKey); reducer.setGroupOrder(groupOrdering); return reducer; } }