/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.java.operator; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.flink.api.common.InvalidProgramException; import org.apache.flink.api.common.operators.Order; import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple4; import org.apache.flink.api.java.tuple.Tuple5; import org.apache.flink.api.java.typeutils.TupleTypeInfo; import org.apache.flink.api.java.typeutils.TypeExtractor; import org.junit.Assert; import org.junit.Test; public class GroupingTest { // TUPLE DATA private final List<Tuple5<Integer, Long, String, Long, Integer>> emptyTupleData = new ArrayList<Tuple5<Integer, Long, String, Long, Integer>>(); private final TupleTypeInfo<Tuple5<Integer, Long, String, Long, Integer>> tupleTypeInfo = new TupleTypeInfo<Tuple5<Integer, Long, String, Long, Integer>>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO ); private final TupleTypeInfo<Tuple4<Integer, Long, CustomType, Long[]>> tupleWithCustomInfo = new TupleTypeInfo<Tuple4<Integer, Long, CustomType, Long[]>>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, TypeExtractor.createTypeInfo(CustomType.class), BasicArrayTypeInfo.LONG_ARRAY_TYPE_INFO ); // LONG DATA private final List<Long> emptyLongData = new ArrayList<Long>(); private final List<CustomType> customTypeData = new ArrayList<CustomType>(); private final List<Tuple4<Integer, Long, CustomType, Long[]>> tupleWithCustomData = new ArrayList<Tuple4<Integer, Long, CustomType, Long[]>>(); private final List<Tuple2<byte[], byte[]>> byteArrayData = new ArrayList<Tuple2<byte[], byte[]>>(); @Test public void testGroupByKeyFields1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { tupleDs.groupBy(0); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) public void testGroupByKeyFields2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO); // should not work: groups on basic type longDs.groupBy(0); } @Test(expected = InvalidProgramException.class) public void testGroupByKeyFields3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work: groups on custom type customDs.groupBy(0); } @Test(expected = IndexOutOfBoundsException.class) public void testGroupByKeyFields4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, key out of tuple bounds tupleDs.groupBy(5); } @Test(expected = IndexOutOfBoundsException.class) public void testGroupByKeyFields5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, negative field position tupleDs.groupBy(-1); } @Test public void testGroupByKeyFieldsOnPrimitiveArray() { this.byteArrayData.add(new Tuple2(new byte[]{0}, new byte[]{1})); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<byte[], byte[]>> tupleDs = env.fromCollection(byteArrayData); tupleDs.groupBy(0); } @Test public void testGroupByKeyExpressions1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> ds = env.fromCollection(customTypeData); // should work try { ds.groupBy("myInt"); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) public void testGroupByKeyExpressions2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO); // should not work: groups on basic type longDs.groupBy("myInt"); } @Test(expected = InvalidProgramException.class) public void testGroupByKeyExpressions3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work: tuple selector on custom type customDs.groupBy(0); } @Test(expected = IllegalArgumentException.class) public void testGroupByKeyExpressions4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = env.fromCollection(customTypeData); // should not work, key out of tuple bounds ds.groupBy("myNonExistent"); } @Test public void testGroupByKeyExpressions1Nested() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> ds = env.fromCollection(customTypeData); // should work try { ds.groupBy("nested.myInt"); } catch(Exception e) { Assert.fail(); } } @Test(expected = IllegalArgumentException.class) public void testGroupByKeyExpressions2Nested() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = env.fromCollection(customTypeData); // should not work, key out of tuple bounds ds.groupBy("nested.myNonExistent"); } @Test @SuppressWarnings("serial") public void testGroupByKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } }); } catch(Exception e) { Assert.fail(); } } @Test @SuppressWarnings("serial") public void testGroupByKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer,Long> getKey(CustomType value) { return new Tuple2<Integer, Long>(value.myInt, value.myLong); } }); } catch(Exception e) { Assert.fail(); } } @Test @SuppressWarnings("serial") public void testGroupByKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, CustomType>() { @Override public CustomType getKey(CustomType value) { return value; } }); } catch(Exception e) { Assert.fail(); } } @Test @SuppressWarnings("serial") public void testGroupByKeySelector4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Tuple2<Integer, GroupingTest.CustomType>>() { @Override public Tuple2<Integer, CustomType> getKey(CustomType value) { return new Tuple2<Integer, CustomType>(value.myInt, value); } }); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) @SuppressWarnings("serial") public void testGroupByKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, CustomType2>() { @Override public CustomType2 getKey(CustomType value) { return new CustomType2(); } }); } @Test public void testGroupSortKeyFields1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { tupleDs.groupBy(0).sortGroup(0, Order.ASCENDING); } catch(Exception e) { Assert.fail(); } } @Test(expected = IndexOutOfBoundsException.class) public void testGroupSortKeyFields2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, field index out of bounds tupleDs.groupBy(0).sortGroup(5, Order.ASCENDING); } @Test(expected = InvalidProgramException.class) public void testGroupSortKeyFields3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO); // should not work: sorted groups on groupings by key selectors longDs.groupBy(new KeySelector<Long, Long>() { private static final long serialVersionUID = 1L; @Override public Long getKey(Long value) { return value; } }).sortGroup(0, Order.ASCENDING); } @Test(expected = InvalidProgramException.class) public void testGroupSortKeyFields4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy(0) .sortGroup(2, Order.ASCENDING); } @Test(expected = InvalidProgramException.class) public void testGroupSortKeyFields5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy(0) .sortGroup(3, Order.ASCENDING); } @Test public void testChainedGroupSortKeyFields() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { tupleDs.groupBy(0).sortGroup(0, Order.ASCENDING).sortGroup(2, Order.DESCENDING); } catch(Exception e) { Assert.fail(); } } @Test public void testGroupSortByKeyExpression1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should work try { tupleDs.groupBy("f0").sortGroup("f1", Order.ASCENDING); } catch(Exception e) { Assert.fail(); } } @Test public void testGroupSortByKeyExpression2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should work try { tupleDs.groupBy("f0").sortGroup("f2.myString", Order.ASCENDING); } catch(Exception e) { Assert.fail(); } } @Test public void testGroupSortByKeyExpression3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should work try { tupleDs.groupBy("f0") .sortGroup("f2.myString", Order.ASCENDING) .sortGroup("f1", Order.DESCENDING); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) public void testGroupSortByKeyExpression4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy("f0") .sortGroup("f2", Order.ASCENDING); } @Test(expected = InvalidProgramException.class) public void testGroupSortByKeyExpression5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy("f0") .sortGroup("f1", Order.ASCENDING) .sortGroup("f2", Order.ASCENDING); } @Test(expected = InvalidProgramException.class) public void testGroupSortByKeyExpression6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy("f0") .sortGroup("f3", Order.ASCENDING); } @SuppressWarnings("serial") @Test public void testGroupSortByKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy( new KeySelector<Tuple4<Integer,Long,CustomType,Long[]>, Long>() { @Override public Long getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f1; } }) .sortGroup( new KeySelector<Tuple4<Integer, Long, CustomType, Long[]>, Integer>() { @Override public Integer getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f0; } }, Order.ASCENDING); } @SuppressWarnings("serial") @Test(expected = InvalidProgramException.class) public void testGroupSortByKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy( new KeySelector<Tuple4<Integer,Long,CustomType,Long[]>, Long>() { @Override public Long getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f1; } }) .sortGroup( new KeySelector<Tuple4<Integer, Long, CustomType, Long[]>, CustomType>() { @Override public CustomType getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f2; } }, Order.ASCENDING); } @SuppressWarnings("serial") @Test(expected = InvalidProgramException.class) public void testGroupSortByKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo); // should not work tupleDs.groupBy( new KeySelector<Tuple4<Integer,Long,CustomType,Long[]>, Long>() { @Override public Long getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f1; } }) .sortGroup( new KeySelector<Tuple4<Integer, Long, CustomType, Long[]>, Long[]>() { @Override public Long[] getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception { return value.f3; } }, Order.ASCENDING); } @Test public void testGroupingAtomicType() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 1, 2, 0, 0); dataSet.groupBy("*"); } @Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("*", "invalidField"); } @Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("invalidField"); } @Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<ArrayList<Integer>> dataSet = env.fromElements(new ArrayList<Integer>()); dataSet.groupBy("*"); } public static class CustomType implements Serializable { public static class Nest { public int myInt; } private static final long serialVersionUID = 1L; public int myInt; public long myLong; public String myString; public Nest nested; public CustomType() {} public CustomType(int i, long l, String s) { myInt = i; myLong = l; myString = s; } @Override public String toString() { return myInt+","+myLong+","+myString; } } public static class CustomType2 implements Serializable { public int myInt; public Integer[] myIntArray; } }