/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.checkpointing;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.runtime.state.CheckpointListener;
import org.apache.flink.streaming.api.checkpoint.ListCheckpointed;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.windowing.RichWindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.util.TestStreamEnvironment;
import org.apache.flink.test.util.SuccessException;
import org.apache.flink.util.Collector;
import org.apache.flink.util.TestLogger;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.apache.flink.test.util.TestUtils.tryExecute;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* This test uses a custom non-serializable data type to to ensure that state
* serializability is handled correctly.
*/
@SuppressWarnings("serial")
@RunWith(Parameterized.class)
public class WindowCheckpointingITCase extends TestLogger {
private TimeCharacteristic timeCharacteristic;
public WindowCheckpointingITCase(TimeCharacteristic timeCharacteristic) {
this.timeCharacteristic = timeCharacteristic;
}
private static final int PARALLELISM = 4;
private static LocalFlinkMiniCluster cluster;
private static TestStreamEnvironment env;
@BeforeClass
public static void startTestCluster() {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, PARALLELISM / 2);
config.setLong(TaskManagerOptions.MANAGED_MEMORY_SIZE, 48L);
cluster = new LocalFlinkMiniCluster(config, false);
cluster.start();
env = new TestStreamEnvironment(cluster, PARALLELISM);
}
@AfterClass
public static void stopTestCluster() {
if (cluster != null) {
cluster.stop();
}
}
// ------------------------------------------------------------------------
@Test
public void testTumblingProcessingTimeWindow() {
final int NUM_ELEMENTS = 3000;
FailingSource.reset();
try {
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(timeCharacteristic);
env.getConfig().setAutoWatermarkInterval(10);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env
.addSource(new FailingSource(NUM_ELEMENTS, NUM_ELEMENTS / 3))
.rebalance()
.keyBy(0)
.timeWindow(Time.of(100, MILLISECONDS))
.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>, Tuple, TimeWindow>() {
private boolean open = false;
@Override
public void open(Configuration parameters) {
assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
open = true;
}
@Override
public void apply(
Tuple tuple,
TimeWindow window,
Iterable<Tuple2<Long, IntType>> values,
Collector<Tuple2<Long, IntType>> out) {
// validate that the function has been opened properly
assertTrue(open);
for (Tuple2<Long, IntType> value : values) {
assertEquals(value.f0.intValue(), value.f1.value);
out.collect(new Tuple2<Long, IntType>(value.f0, new IntType(1)));
}
}
})
.addSink(new ValidatingSink(NUM_ELEMENTS, 1)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
@Test
public void testSlidingProcessingTimeWindow() {
final int NUM_ELEMENTS = 3000;
FailingSource.reset();
try {
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(timeCharacteristic);
env.getConfig().setAutoWatermarkInterval(10);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env
.addSource(new FailingSource(NUM_ELEMENTS, NUM_ELEMENTS / 3))
.rebalance()
.keyBy(0)
.timeWindow(Time.of(150, MILLISECONDS), Time.of(50, MILLISECONDS))
.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>, Tuple, TimeWindow>() {
private boolean open = false;
@Override
public void open(Configuration parameters) {
assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
open = true;
}
@Override
public void apply(
Tuple tuple,
TimeWindow window,
Iterable<Tuple2<Long, IntType>> values,
Collector<Tuple2<Long, IntType>> out) {
// validate that the function has been opened properly
assertTrue(open);
for (Tuple2<Long, IntType> value : values) {
assertEquals(value.f0.intValue(), value.f1.value);
out.collect(new Tuple2<Long, IntType>(value.f0, new IntType(1)));
}
}
})
.addSink(new ValidatingSink(NUM_ELEMENTS, 3)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
@Test
public void testAggregatingTumblingProcessingTimeWindow() {
final int NUM_ELEMENTS = 3000;
FailingSource.reset();
try {
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(timeCharacteristic);
env.getConfig().setAutoWatermarkInterval(10);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env
.addSource(new FailingSource(NUM_ELEMENTS, NUM_ELEMENTS / 3))
.map(new MapFunction<Tuple2<Long,IntType>, Tuple2<Long,IntType>>() {
@Override
public Tuple2<Long, IntType> map(Tuple2<Long, IntType> value) {
value.f1.value = 1;
return value;
}
})
.rebalance()
.keyBy(0)
.timeWindow(Time.of(100, MILLISECONDS))
.reduce(new ReduceFunction<Tuple2<Long, IntType>>() {
@Override
public Tuple2<Long, IntType> reduce(
Tuple2<Long, IntType> a,
Tuple2<Long, IntType> b) {
return new Tuple2<>(a.f0, new IntType(1));
}
})
.addSink(new ValidatingSink(NUM_ELEMENTS, 1)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
@Test
public void testAggregatingSlidingProcessingTimeWindow() {
final int NUM_ELEMENTS = 3000;
FailingSource.reset();
try {
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(timeCharacteristic);
env.getConfig().setAutoWatermarkInterval(10);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env
.addSource(new FailingSource(NUM_ELEMENTS, NUM_ELEMENTS / 3))
.map(new MapFunction<Tuple2<Long,IntType>, Tuple2<Long,IntType>>() {
@Override
public Tuple2<Long, IntType> map(Tuple2<Long, IntType> value) {
value.f1.value = 1;
return value;
}
})
.rebalance()
.keyBy(0)
.timeWindow(Time.of(150, MILLISECONDS), Time.of(50, MILLISECONDS))
.reduce(new ReduceFunction<Tuple2<Long, IntType>>() {
@Override
public Tuple2<Long, IntType> reduce(
Tuple2<Long, IntType> a,
Tuple2<Long, IntType> b) {
return new Tuple2<>(a.f0, new IntType(1));
}
})
.addSink(new ValidatingSink(NUM_ELEMENTS, 3)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
private static class FailingSource extends RichSourceFunction<Tuple2<Long, IntType>>
implements ListCheckpointed<Integer>, CheckpointListener
{
private static volatile boolean failedBefore = false;
private final int numElementsToEmit;
private final int failureAfterNumElements;
private volatile int numElementsEmitted;
private volatile int numSuccessfulCheckpoints;
private volatile boolean running = true;
private FailingSource(int numElementsToEmit, int failureAfterNumElements) {
this.numElementsToEmit = numElementsToEmit;
this.failureAfterNumElements = failureAfterNumElements;
}
@Override
public void open(Configuration parameters) {
// non-parallel source
assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
}
@Override
public void run(SourceContext<Tuple2<Long, IntType>> ctx) throws Exception {
// we loop longer than we have elements, to permit delayed checkpoints
// to still cause a failure
while (running) {
if (!failedBefore) {
// delay a bit, if we have not failed before
Thread.sleep(1);
if (numSuccessfulCheckpoints >= 2 && numElementsEmitted >= failureAfterNumElements) {
// cause a failure if we have not failed before and have reached
// enough completed checkpoints and elements
failedBefore = true;
throw new Exception("Artificial Failure");
}
}
if (numElementsEmitted < numElementsToEmit &&
(failedBefore || numElementsEmitted <= failureAfterNumElements)) {
// the function failed before, or we are in the elements before the failure
synchronized (ctx.getCheckpointLock()) {
int next = numElementsEmitted++;
ctx.collect(new Tuple2<Long, IntType>((long) next, new IntType(next)));
}
} else {
// if our work is done, delay a bit to prevent busy waiting
Thread.sleep(10);
}
}
}
@Override
public void cancel() {
running = false;
}
@Override
public void notifyCheckpointComplete(long checkpointId) {
numSuccessfulCheckpoints++;
}
@Override
public List<Integer> snapshotState(long checkpointId, long timestamp) throws Exception {
return Collections.singletonList(this.numElementsEmitted);
}
@Override
public void restoreState(List<Integer> state) throws Exception {
if (state.isEmpty() || state.size() > 1) {
throw new RuntimeException("Test failed due to unexpected recovered state size " + state.size());
}
this.numElementsEmitted = state.get(0);
}
public static void reset() {
failedBefore = false;
}
}
private static class ValidatingSink extends RichSinkFunction<Tuple2<Long, IntType>>
implements ListCheckpointed<HashMap<Long, Integer>> {
private final HashMap<Long, Integer> counts = new HashMap<>();
private final int elementCountExpected;
private final int countPerElementExpected;
private int aggCount;
private ValidatingSink(int elementCountExpected, int countPerElementExpected) {
this.elementCountExpected = elementCountExpected;
this.countPerElementExpected = countPerElementExpected;
}
@Override
public void open(Configuration parameters) throws Exception {
// this sink can only work with DOP 1
assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
checkSuccess();
}
@Override
public void invoke(Tuple2<Long, IntType> value) throws Exception {
Integer curr = counts.get(value.f0);
if (curr != null) {
counts.put(value.f0, curr + value.f1.value);
}
else {
counts.put(value.f0, value.f1.value);
}
// check if we have seen all we expect
aggCount += value.f1.value;
checkSuccess();
}
private void checkSuccess() throws SuccessException {
if (aggCount >= elementCountExpected * countPerElementExpected) {
// we are done. validate
assertEquals(elementCountExpected, counts.size());
for (Integer i : counts.values()) {
assertEquals(countPerElementExpected, i.intValue());
}
// exit
throw new SuccessException();
}
}
@Override
public List<HashMap<Long, Integer>> snapshotState(long checkpointId, long timestamp) throws Exception {
return Collections.singletonList(this.counts);
}
@Override
public void restoreState(List<HashMap<Long, Integer>> state) throws Exception {
if (state.isEmpty() || state.size() > 1) {
throw new RuntimeException("Test failed due to unexpected recovered state size " + state.size());
}
this.counts.putAll(state.get(0));
for (Integer i : state.get(0).values()) {
this.aggCount += i;
}
}
}
// ------------------------------------------------------------------------
// Parametrization for testing different time characteristics
// ------------------------------------------------------------------------
@Parameterized.Parameters(name = "TimeCharacteristic = {0}")
@SuppressWarnings("unchecked,rawtypes")
public static Collection<TimeCharacteristic[]> timeCharacteristic(){
return Arrays.asList(new TimeCharacteristic[]{TimeCharacteristic.ProcessingTime},
new TimeCharacteristic[]{TimeCharacteristic.IngestionTime}
);
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
public static class IntType {
public int value;
public IntType() {}
public IntType(int value) { this.value = value; }
}
}