/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.test.javaApiOperators; import org.apache.flink.api.common.distributions.DataDistribution; import org.apache.flink.api.common.functions.RichMapPartitionFunction; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.io.DiscardingOutputFormat; import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.api.java.utils.DataSetUtils; import org.apache.flink.core.memory.DataInputView; import org.apache.flink.core.memory.DataOutputView; import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster; import org.apache.flink.test.javaApiOperators.util.CollectionDataSets; import org.apache.flink.test.util.TestBaseUtils; import org.apache.flink.test.util.TestEnvironment; import org.apache.flink.util.Collector; import org.apache.flink.util.TestLogger; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; import static org.junit.Assert.fail; @SuppressWarnings("serial") public class CustomDistributionITCase extends TestLogger { // ------------------------------------------------------------------------ // The mini cluster that is shared across tests // ------------------------------------------------------------------------ private static LocalFlinkMiniCluster cluster; @BeforeClass public static void setup() throws Exception { cluster = TestBaseUtils.startCluster(1, 8, false, false, true); } @AfterClass public static void teardown() throws Exception { TestBaseUtils.stopCluster(cluster, TestBaseUtils.DEFAULT_TIMEOUT); } @Before public void prepare() { TestEnvironment clusterEnv = new TestEnvironment(cluster, 1, false); clusterEnv.setAsContext(); } @After public void cleanup() { TestEnvironment.unsetAsContext(); } // ------------------------------------------------------------------------ /** * Test the record partitioned rightly with one field according to the customized data distribution */ @Test public void testPartitionWithDistribution1() throws Exception { final TestDataDist1 dist = new TestDataDist1(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(dist.getParallelism()); DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); DataSet<Boolean> result = DataSetUtils .partitionByRange(input, dist, 0) .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() { @Override public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception { int pIdx = getRuntimeContext().getIndexOfThisSubtask(); for (Tuple3<Integer, Long, String> s : values) { boolean correctlyPartitioned = true; if (pIdx == 0) { Integer[] upper = dist.boundaries[0]; if (s.f0.compareTo(upper[0]) > 0) { correctlyPartitioned = false; } } else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { Integer[] lower = dist.boundaries[pIdx - 1]; Integer[] upper = dist.boundaries[pIdx]; if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) { correctlyPartitioned = false; } } else { Integer[] lower = dist.boundaries[pIdx - 1]; if ((s.f0.compareTo(lower[0]) <= 0)) { correctlyPartitioned = false; } } if (!correctlyPartitioned) { fail("Record was not correctly partitioned: " + s.toString()); } } } } ); result.output(new DiscardingOutputFormat<Boolean>()); env.execute(); } /** * Test the record partitioned rightly with two fields according to the customized data distribution */ @Test public void testRangeWithDistribution2() throws Exception { final TestDataDist2 dist = new TestDataDist2(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(dist.getParallelism()); DataSet<Tuple3<Integer, Integer, String>> input = env.fromElements( new Tuple3<>(1, 5, "Hi"), new Tuple3<>(1, 6, "Hi"), new Tuple3<>(1, 7, "Hi"), new Tuple3<>(1, 11, "Hello"), new Tuple3<>(2, 3, "World"), new Tuple3<>(2, 4, "World"), new Tuple3<>(2, 5, "World"), new Tuple3<>(2, 13, "Hello World"), new Tuple3<>(3, 8, "Say"), new Tuple3<>(4, 0, "Why"), new Tuple3<>(4, 2, "Java"), new Tuple3<>(4, 11, "Say Hello"), new Tuple3<>(5, 1, "Hi Java!"), new Tuple3<>(5, 2, "Hi Java?"), new Tuple3<>(5, 3, "Hi Java again") ); DataSet<Boolean> result = DataSetUtils .partitionByRange(input, dist, 0, 1) .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Integer, String>, Boolean>() { @Override public void mapPartition(Iterable<Tuple3<Integer, Integer, String>> values, Collector<Boolean> out) throws Exception { int pIdx = getRuntimeContext().getIndexOfThisSubtask(); boolean correctlyPartitioned = true; for (Tuple3<Integer, Integer, String> s : values) { if (pIdx == 0) { Integer[] upper = dist.boundaries[0]; if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0)) { correctlyPartitioned = false; } } else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { Integer[] lower = dist.boundaries[pIdx - 1]; Integer[] upper = dist.boundaries[pIdx]; if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0) || (s.f0.compareTo(lower[0]) < 0) || (s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) { correctlyPartitioned = false; } } else { Integer[] lower = dist.boundaries[pIdx - 1]; if ((s.f0.compareTo(lower[0]) < 0) || (s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) { correctlyPartitioned = false; } } if (!correctlyPartitioned) { fail("Record was not correctly partitioned: " + s.toString()); } } } } ); result.output(new DiscardingOutputFormat<Boolean>()); env.execute(); } /* * Test the number of partition keys less than the number of distribution fields */ @Test public void testPartitionKeyLessDistribution() throws Exception { final TestDataDist2 dist = new TestDataDist2(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(dist.getParallelism()); DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); DataSet<Boolean> result = DataSetUtils .partitionByRange(input, dist, 0) .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() { @Override public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception { int pIdx = getRuntimeContext().getIndexOfThisSubtask(); for (Tuple3<Integer, Long, String> s : values) { boolean correctlyPartitioned = true; if (pIdx == 0) { Integer[] upper = dist.boundaries[0]; if (s.f0.compareTo(upper[0]) > 0) { correctlyPartitioned = false; } } else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { Integer[] lower = dist.boundaries[pIdx - 1]; Integer[] upper = dist.boundaries[pIdx]; if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) { correctlyPartitioned = false; } } else { Integer[] lower = dist.boundaries[pIdx - 1]; if ((s.f0.compareTo(lower[0]) <= 0)) { correctlyPartitioned = false; } } if (!correctlyPartitioned) { fail("Record was not correctly partitioned: " + s.toString()); } } } } ); result.output(new DiscardingOutputFormat<Boolean>()); env.execute(); } /* * Test the number of partition keys larger than the number of distribution fields */ @Test(expected = IllegalArgumentException.class) public void testPartitionMoreThanDistribution() throws Exception { final TestDataDist2 dist = new TestDataDist2(); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); DataSetUtils.partitionByRange(input, dist, 0, 1, 2); } /** * The class is used to do the tests of range partition with one key. */ public static class TestDataDist1 implements DataDistribution { public Integer boundaries[][] = new Integer[][]{ new Integer[]{4}, new Integer[]{9}, new Integer[]{13}, new Integer[]{18} }; public TestDataDist1() {} public int getParallelism() { return boundaries.length; } @Override public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) { return boundaries[bucketNum]; } @Override public int getNumberOfFields() { return 1; } @Override public TypeInformation[] getKeyTypes() { return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO}; } @Override public void write(DataOutputView out) throws IOException {} @Override public void read(DataInputView in) throws IOException {} } /** * The class is used to do the tests of range partition with two keys. */ public static class TestDataDist2 implements DataDistribution { public Integer boundaries[][] = new Integer[][]{ new Integer[]{1, 6}, new Integer[]{2, 4}, new Integer[]{3, 9}, new Integer[]{4, 1}, new Integer[]{5, 2} }; public TestDataDist2() {} public int getParallelism() { return boundaries.length; } @Override public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) { return boundaries[bucketNum]; } @Override public int getNumberOfFields() { return 2; } @Override public TypeInformation[] getKeyTypes() { return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO}; } @Override public void write(DataOutputView out) throws IOException {} @Override public void read(DataInputView in) throws IOException {} } }