/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.streaming.runtime;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.util.StreamingMultipleProgramsTestBase;
import org.apache.flink.util.Collector;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
@SuppressWarnings("serial")
public class CoGroupJoinITCase extends StreamingMultipleProgramsTestBase {
private static List<String> testResults;
@Test
public void testCoGroup() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
DataStream<Tuple2<String, Integer>> source2 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("c", 6));
ctx.collect(Tuple2.of("c", 7));
ctx.collect(Tuple2.of("c", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.coGroup(source2)
.where(new Tuple2KeyExtractor())
.equalTo(new Tuple2KeyExtractor())
.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
.apply(new CoGroupFunction<Tuple2<String,Integer>, Tuple2<String,Integer>, String>() {
@Override
public void coGroup(Iterable<Tuple2<String, Integer>> first,
Iterable<Tuple2<String, Integer>> second,
Collector<String> out) throws Exception {
StringBuilder result = new StringBuilder();
result.append("F:");
for (Tuple2<String, Integer> t: first) {
result.append(t.toString());
}
result.append(" S:");
for (Tuple2<String, Integer> t: second) {
result.append(t.toString());
}
out.collect(result.toString());
}
})
.addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("CoGroup Test");
List<String> expectedResult = Arrays.asList(
"F:(a,0)(a,1)(a,2) S:(a,0)(a,1)",
"F:(b,3)(b,4)(b,5) S:(b,3)",
"F:(a,6)(a,7)(a,8) S:",
"F: S:(c,6)(c,7)(c,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
@Test
public void testJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
DataStream<Tuple3<String, String, Integer>> source2 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "u", 0));
ctx.collect(Tuple3.of("a", "w", 1));
ctx.collect(Tuple3.of("b", "i", 3));
ctx.collect(Tuple3.of("b", "k", 5));
ctx.collect(Tuple3.of("a", "x", 6));
ctx.collect(Tuple3.of("a", "z", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source2)
.where(new Tuple3KeyExtractor())
.equalTo(new Tuple3KeyExtractor())
.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
.apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
})
.addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Join Test");
List<String> expectedResult = Arrays.asList(
"(a,x,0):(a,u,0)",
"(a,x,0):(a,w,1)",
"(a,y,1):(a,u,0)",
"(a,y,1):(a,w,1)",
"(a,z,2):(a,u,0)",
"(a,z,2):(a,w,1)",
"(b,u,3):(b,i,3)",
"(b,u,3):(b,k,5)",
"(b,w,5):(b,i,3)",
"(b,w,5):(b,k,5)",
"(a,i,6):(a,x,6)",
"(a,i,6):(a,z,8)",
"(a,j,7):(a,x,6)",
"(a,j,7):(a,z,8)",
"(a,k,8):(a,x,6)",
"(a,k,8):(a,z,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
@Test
public void testSelfJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source1)
.where(new Tuple3KeyExtractor())
.equalTo(new Tuple3KeyExtractor())
.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
.apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
})
.addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Self-Join Test");
List<String> expectedResult = Arrays.asList(
"(a,x,0):(a,x,0)",
"(a,x,0):(a,y,1)",
"(a,x,0):(a,z,2)",
"(a,y,1):(a,x,0)",
"(a,y,1):(a,y,1)",
"(a,y,1):(a,z,2)",
"(a,z,2):(a,x,0)",
"(a,z,2):(a,y,1)",
"(a,z,2):(a,z,2)",
"(b,u,3):(b,u,3)",
"(b,u,3):(b,w,5)",
"(b,w,5):(b,u,3)",
"(b,w,5):(b,w,5)",
"(a,i,6):(a,i,6)",
"(a,i,6):(a,j,7)",
"(a,i,6):(a,k,8)",
"(a,j,7):(a,i,6)",
"(a,j,7):(a,j,7)",
"(a,j,7):(a,k,8)",
"(a,k,8):(a,i,6)",
"(a,k,8):(a,j,7)",
"(a,k,8):(a,k,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
private static class Tuple2TimestampExtractor implements AssignerWithPunctuatedWatermarks<Tuple2<String, Integer>> {
@Override
public long extractTimestamp(Tuple2<String, Integer> element, long previousTimestamp) {
return element.f1;
}
@Override
public Watermark checkAndGetNextWatermark(Tuple2<String, Integer> element, long extractedTimestamp) {
return new Watermark(extractedTimestamp - 1);
}
}
private static class Tuple3TimestampExtractor implements AssignerWithPunctuatedWatermarks<Tuple3<String, String, Integer>> {
@Override
public long extractTimestamp(Tuple3<String, String, Integer> element, long previousTimestamp) {
return element.f2;
}
@Override
public Watermark checkAndGetNextWatermark(Tuple3<String, String, Integer> lastElement, long extractedTimestamp) {
return new Watermark(lastElement.f2 - 1);
}
}
private static class Tuple2KeyExtractor implements KeySelector<Tuple2<String,Integer>, String> {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
}
private static class Tuple3KeyExtractor implements KeySelector<Tuple3<String, String, Integer>, String> {
@Override
public String getKey(Tuple3<String, String, Integer> value) throws Exception {
return value.f0;
}
}
}