/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.test.javaApiOperators; import java.util.Collection; import java.util.Date; import java.util.List; import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.ReduceFunction; import org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.common.functions.RichReduceFunction; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.api.java.tuple.Tuple5; import org.apache.flink.configuration.Configuration; import org.apache.flink.test.javaApiOperators.util.CollectionDataSets; import org.apache.flink.test.javaApiOperators.util.CollectionDataSets.CustomType; import org.apache.flink.test.javaApiOperators.util.CollectionDataSets.PojoWithDateAndEnum; import org.apache.flink.test.util.MultipleProgramsTestBase; import org.apache.flink.util.Collector; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @SuppressWarnings("serial") @RunWith(Parameterized.class) public class ReduceITCase extends MultipleProgramsTestBase { public ReduceITCase(TestExecutionMode mode){ super(mode); } @Test public void testReduceOnTuplesWithKeyFieldSelector() throws Exception { /* * Reduce on tuples with key field selector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(1).reduce(new Tuple3Reduce("B-)")); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,B-)\n" + "15,3,B-)\n" + "34,4,B-)\n" + "65,5,B-)\n" + "111,6,B-)\n"; compareResultAsTuples(result, expected); } @Test public void testReduceOnTupleWithMultipleKeyFieldSelectors() throws Exception{ /* * Reduce on tuples with multiple key field selectors */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds. groupBy(4,0).reduce(new Tuple5Reduce()); List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs .collect(); String expected = "1,1,0,Hallo,1\n" + "2,3,2,Hallo Welt wie,1\n" + "2,2,1,Hallo Welt,2\n" + "3,9,0,P-),2\n" + "3,6,5,BCD,3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,10,GHI,1\n" + "5,29,0,P-),2\n" + "5,25,0,P-),3\n"; compareResultAsTuples(result, expected); } @Test public void testReduceOnTuplesWithKeyExtractor() throws Exception { /* * Reduce on tuples with key extractor */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(new KeySelector1()).reduce(new Tuple3Reduce("B-)")); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,B-)\n" + "15,3,B-)\n" + "34,4,B-)\n" + "65,5,B-)\n" + "111,6,B-)\n"; compareResultAsTuples(result, expected); } public static class KeySelector1 implements KeySelector<Tuple3<Integer,Long,String>, Long> { private static final long serialVersionUID = 1L; @Override public Long getKey(Tuple3<Integer, Long, String> in) { return in.f1; } } @Test public void testReduceOnCustomTypeWithKeyExtractor() throws Exception { /* * Reduce on custom type with key extractor */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds. groupBy(new KeySelector2()).reduce(new CustomTypeReduce()); List<CustomType> result = reduceDs.collect(); String expected = "1,0,Hi\n" + "2,3,Hello!\n" + "3,12,Hello!\n" + "4,30,Hello!\n" + "5,60,Hello!\n" + "6,105,Hello!\n"; compareResultAsText(result, expected); } public static class KeySelector2 implements KeySelector<CustomType, Integer> { private static final long serialVersionUID = 1L; @Override public Integer getKey(CustomType in) { return in.myInt; } } @Test public void testAllReduceForTuple() throws Exception { /* * All-reduce for tuple */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. reduce(new AllAddingTuple3Reduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "231,91,Hello World\n"; compareResultAsTuples(result, expected); } @Test public void testAllReduceForCustomTypes() throws Exception { /* * All-reduce for custom types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds. reduce(new AllAddingCustomTypeReduce()); List<CustomType> result = reduceDs.collect(); String expected = "91,210,Hello!"; compareResultAsText(result, expected); } @Test public void testReduceWithBroadcastSet() throws Exception { /* * Reduce with broadcast set */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints"); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,55\n" + "15,3,55\n" + "34,4,55\n" + "65,5,55\n" + "111,6,55\n"; compareResultAsTuples(result, expected); } @Test public void testReduceATupleReturningKeySelector() throws Exception { /* * Reduce with a Tuple-returning KeySelector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds . groupBy(new KeySelector3()).reduce(new Tuple5Reduce()); List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs .collect(); String expected = "1,1,0,Hallo,1\n" + "2,3,2,Hallo Welt wie,1\n" + "2,2,1,Hallo Welt,2\n" + "3,9,0,P-),2\n" + "3,6,5,BCD,3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,10,GHI,1\n" + "5,29,0,P-),2\n" + "5,25,0,P-),3\n"; compareResultAsTuples(result, expected); } public static class KeySelector3 implements KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>> { private static final long serialVersionUID = 1L; @Override public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) { return new Tuple2<Integer, Long>(t.f0, t.f4); } } @Test public void testReduceOnTupleWithMultipleKeyExpressions() throws Exception { /* * Case 2 with String-based field expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds. groupBy("f4","f0").reduce(new Tuple5Reduce()); List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs .collect(); String expected = "1,1,0,Hallo,1\n" + "2,3,2,Hallo Welt wie,1\n" + "2,2,1,Hallo Welt,2\n" + "3,9,0,P-),2\n" + "3,6,5,BCD,3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,10,GHI,1\n" + "5,29,0,P-),2\n" + "5,25,0,P-),3\n"; compareResultAsTuples(result, expected); } @Test public void testReduceOnTupleWithMultipleKeyExpressionsWithHashHint() throws Exception { /* * Case 2 with String-based field expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds. groupBy("f4","f0").reduce(new Tuple5Reduce()).setCombineHint(CombineHint.HASH); List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs .collect(); String expected = "1,1,0,Hallo,1\n" + "2,3,2,Hallo Welt wie,1\n" + "2,2,1,Hallo Welt,2\n" + "3,9,0,P-),2\n" + "3,6,5,BCD,3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,10,GHI,1\n" + "5,29,0,P-),2\n" + "5,25,0,P-),3\n"; compareResultAsTuples(result, expected); } @Test public void testSupportForDataAndEnumSerialization() throws Exception { /** * Test support for Date and enum serialization */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<PojoWithDateAndEnum> ds = env.generateSequence(0,2).map(new Mapper1()); ds = ds.union(CollectionDataSets.getPojoWithDateAndEnum(env)); DataSet<String> res = ds.groupBy("group").reduceGroup(new GroupReducer1()); List<String> result = res.collect(); String expected = "ok\nok"; compareResultAsText(result, expected); } public static class Mapper1 implements MapFunction<Long, PojoWithDateAndEnum> { @Override public PojoWithDateAndEnum map(Long value) throws Exception { int l = value.intValue(); switch (l) { case 0: PojoWithDateAndEnum one = new PojoWithDateAndEnum(); one.group = "a"; one.date = new Date(666); one.cat = CollectionDataSets.Category.CAT_A; return one; case 1: PojoWithDateAndEnum two = new PojoWithDateAndEnum(); two.group = "a"; two.date = new Date(666); two.cat = CollectionDataSets.Category.CAT_A; return two; case 2: PojoWithDateAndEnum three = new PojoWithDateAndEnum(); three.group = "b"; three.date = new Date(666); three.cat = CollectionDataSets.Category.CAT_B; return three; } throw new RuntimeException("Unexpected value for l=" + l); } } public static class GroupReducer1 implements GroupReduceFunction<CollectionDataSets.PojoWithDateAndEnum, String> { private static final long serialVersionUID = 1L; @Override public void reduce(Iterable<PojoWithDateAndEnum> values, Collector<String> out) throws Exception { for(PojoWithDateAndEnum val : values) { if(val.cat == CollectionDataSets.Category.CAT_A) { Assert.assertEquals("a", val.group); } else if(val.cat == CollectionDataSets.Category.CAT_B) { Assert.assertEquals("b", val.group); } else { Assert.fail("error. Cat = "+val.cat); } Assert.assertEquals(666, val.date.getTime()); } out.collect("ok"); } } public static class Tuple3Reduce implements ReduceFunction<Tuple3<Integer, Long, String>> { private static final long serialVersionUID = 1L; private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>(); private final String f2Replace; public Tuple3Reduce() { this.f2Replace = null; } public Tuple3Reduce(String f2Replace) { this.f2Replace = f2Replace; } @Override public Tuple3<Integer, Long, String> reduce( Tuple3<Integer, Long, String> in1, Tuple3<Integer, Long, String> in2) throws Exception { if(f2Replace == null) { out.setFields(in1.f0+in2.f0, in1.f1, in1.f2); } else { out.setFields(in1.f0+in2.f0, in1.f1, this.f2Replace); } return out; } } public static class Tuple5Reduce implements ReduceFunction<Tuple5<Integer, Long, Integer, String, Long>> { private static final long serialVersionUID = 1L; private final Tuple5<Integer, Long, Integer, String, Long> out = new Tuple5<Integer, Long, Integer, String, Long>(); @Override public Tuple5<Integer, Long, Integer, String, Long> reduce( Tuple5<Integer, Long, Integer, String, Long> in1, Tuple5<Integer, Long, Integer, String, Long> in2) throws Exception { out.setFields(in1.f0, in1.f1+in2.f1, 0, "P-)", in1.f4); return out; } } public static class CustomTypeReduce implements ReduceFunction<CustomType> { private static final long serialVersionUID = 1L; private final CustomType out = new CustomType(); @Override public CustomType reduce(CustomType in1, CustomType in2) throws Exception { out.myInt = in1.myInt; out.myLong = in1.myLong + in2.myLong; out.myString = "Hello!"; return out; } } public static class AllAddingTuple3Reduce implements ReduceFunction<Tuple3<Integer, Long, String>> { private static final long serialVersionUID = 1L; private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>(); @Override public Tuple3<Integer, Long, String> reduce( Tuple3<Integer, Long, String> in1, Tuple3<Integer, Long, String> in2) throws Exception { out.setFields(in1.f0+in2.f0, in1.f1+in2.f1, "Hello World"); return out; } } public static class AllAddingCustomTypeReduce implements ReduceFunction<CustomType> { private static final long serialVersionUID = 1L; private final CustomType out = new CustomType(); @Override public CustomType reduce(CustomType in1, CustomType in2) throws Exception { out.myInt = in1.myInt + in2.myInt; out.myLong = in1.myLong + in2.myLong; out.myString = "Hello!"; return out; } } public static class BCTuple3Reduce extends RichReduceFunction<Tuple3<Integer, Long, String>> { private static final long serialVersionUID = 1L; private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>(); private String f2Replace = ""; @Override public void open(Configuration config) { Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints"); int sum = 0; for(Integer i : ints) { sum += i; } f2Replace = sum+""; } @Override public Tuple3<Integer, Long, String> reduce( Tuple3<Integer, Long, String> in1, Tuple3<Integer, Long, String> in2) throws Exception { out.setFields(in1.f0+in2.f0, in1.f1, this.f2Replace); return out; } } }