JobManagerTest.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobmanager;

import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.actor.PoisonPill;
import akka.actor.Status;
import akka.testkit.JavaTestKit;
import akka.testkit.TestProbe;
import com.typesafe.config.Config;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.akka.ListeningBehaviour;
import org.apache.flink.runtime.checkpoint.CheckpointDeclineReason;
import org.apache.flink.runtime.clusterframework.messages.NotifyResourceStarted;
import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManager;
import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManagerSuccessful;
import org.apache.flink.runtime.clusterframework.messages.TriggerRegistrationAtJobManager;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.executiongraph.IntermediateResultPartition;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedHaServices;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.instance.AkkaActorGateway;
import org.apache.flink.runtime.instance.HardwareDescription;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
import org.apache.flink.runtime.jobgraph.tasks.ExternalizedCheckpointSettings;
import org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings;
import org.apache.flink.runtime.jobmanager.JobManagerHARecoveryTest.BlockingStatefulInvokable;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.messages.JobManagerMessages.CancelJob;
import org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure;
import org.apache.flink.runtime.messages.JobManagerMessages.CancellationResponse;
import org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess;
import org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState;
import org.apache.flink.runtime.messages.JobManagerMessages.StopJob;
import org.apache.flink.runtime.messages.JobManagerMessages.StoppingFailure;
import org.apache.flink.runtime.messages.JobManagerMessages.StoppingSuccess;
import org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob;
import org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint;
import org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess;
import org.apache.flink.runtime.messages.RegistrationMessages;
import org.apache.flink.runtime.query.KvStateID;
import org.apache.flink.runtime.query.KvStateLocation;
import org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation;
import org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateRegistered;
import org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateUnregistered;
import org.apache.flink.runtime.query.KvStateServerAddress;
import org.apache.flink.runtime.query.UnknownKvStateLocation;
import org.apache.flink.runtime.state.KeyGroupRange;
import org.apache.flink.runtime.taskmanager.TaskManager;
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
import org.apache.flink.runtime.testingUtils.TestingCluster;
import org.apache.flink.runtime.testingUtils.TestingJobManager;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished;
import org.apache.flink.runtime.testingUtils.TestingMemoryArchivist;
import org.apache.flink.runtime.testingUtils.TestingTaskManager;
import org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages;
import org.apache.flink.runtime.testingUtils.TestingUtils;
import org.apache.flink.runtime.testtasks.BlockingNoOpInvokable;
import org.apache.flink.runtime.testtasks.NoOpInvokable;
import org.apache.flink.runtime.testutils.StoppableInvokable;
import org.apache.flink.runtime.util.LeaderRetrievalUtils;
import org.apache.flink.util.TestLogger;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import scala.Option;
import scala.Some;
import scala.Tuple2;
import scala.concurrent.Await;
import scala.concurrent.Future;
import scala.concurrent.duration.Deadline;
import scala.concurrent.duration.FiniteDuration;
import scala.reflect.ClassTag$;

import java.io.File;
import java.net.InetAddress;
import java.util.Collections;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import static org.apache.flink.runtime.io.network.partition.ResultPartitionType.PIPELINED;
import static org.apache.flink.runtime.messages.JobManagerMessages.JobResultSuccess;
import static org.apache.flink.runtime.messages.JobManagerMessages.JobSubmitSuccess;
import static org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.AllVerticesRunning;
import static org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.JobStatusIs;
import static org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenAtLeastNumTaskManagerAreRegistered;
import static org.apache.flink.runtime.testingUtils.TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT;
import static org.apache.flink.runtime.testingUtils.TestingUtils.TESTING_TIMEOUT;
import static org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;

public class JobManagerTest extends TestLogger {

	@Rule
	public TemporaryFolder tmpFolder = new TemporaryFolder();

	private static ActorSystem system;

	private HighAvailabilityServices highAvailabilityServices;

	@BeforeClass
	public static void setup() {
		system = AkkaUtils.createLocalActorSystem(new Configuration());
	}

	@AfterClass
	public static void teardown() {
		JavaTestKit.shutdownActorSystem(system);
	}

	@Before
	public void setupTest() {
		highAvailabilityServices = new EmbeddedHaServices(TestingUtils.defaultExecutor());
	}

	@After
	public void tearDownTest() throws Exception {
		highAvailabilityServices.closeAndCleanupAllData();
		highAvailabilityServices = null;
	}

	@Test
	public void testNullHostnameGoesToLocalhost() {
		try {
			Tuple2<String, Object> address = new Tuple2<String, Object>(null, 1772);
			Config cfg = AkkaUtils.getAkkaConfig(new Configuration(),
					new Some<Tuple2<String, Object>>(address));

			String hostname = cfg.getString("akka.remote.netty.tcp.hostname");
			assertTrue(InetAddress.getByName(hostname).isLoopbackAddress());
		}
		catch (Exception e) {
			e.printStackTrace();
			fail(e.getMessage());
		}
	}

	/**
	 * Tests responses to partition state requests.
	 */
	@Test
	public void testRequestPartitionState() throws Exception {
		new JavaTestKit(system) {{
			new Within(duration("15 seconds")) {
				@Override
				protected void run() {
					// Setup
					TestingCluster cluster = null;

					try {
						cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());

						final IntermediateDataSetID rid = new IntermediateDataSetID();

						// Create a task
						final JobVertex sender = new JobVertex("Sender");
						sender.setParallelism(1);
						sender.setInvokableClass(BlockingNoOpInvokable.class); // just block
						sender.createAndAddResultDataSet(rid, PIPELINED);

						final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
						final JobID jid = jobGraph.getJobID();

						final ActorGateway jobManagerGateway = cluster.getLeaderGateway(
							TestingUtils.TESTING_DURATION());

						// we can set the leader session ID to None because we don't use this gateway to send messages
						final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), HighAvailabilityServices.DEFAULT_LEADER_ID);

						// Submit the job and wait for all vertices to be running
						jobManagerGateway.tell(
							new SubmitJob(
								jobGraph,
								ListeningBehaviour.EXECUTION_RESULT),
							testActorGateway);
						expectMsgClass(JobSubmitSuccess.class);

						jobManagerGateway.tell(
							new WaitForAllVerticesToBeRunningOrFinished(jid),
							testActorGateway);

						expectMsgClass(AllVerticesRunning.class);

						// This is the mock execution ID of the task requesting the state of the partition
						final ExecutionAttemptID receiver = new ExecutionAttemptID();

						// Request the execution graph to get the runtime info
						jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);

						final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class)
							.executionGraph();

						final ExecutionVertex vertex = eg.getJobVertex(sender.getID())
							.getTaskVertices()[0];

						final IntermediateResultPartition partition = vertex.getProducedPartitions()
							.values().iterator().next();

						final ResultPartitionID partitionId = new ResultPartitionID(
							partition.getPartitionId(),
							vertex.getCurrentExecutionAttempt().getAttemptId());

						// - The test ----------------------------------------------------------------------

						// 1. All execution states
						RequestPartitionProducerState request = new RequestPartitionProducerState(
							jid, rid, partitionId);

						for (ExecutionState state : ExecutionState.values()) {
							ExecutionGraphTestUtils.setVertexState(vertex, state);

							Future<ExecutionState> futurePartitionState = jobManagerGateway
								.ask(request, getRemainingTime())
								.mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));

							ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
							assertEquals(state, resp);
						}

						// 2. Non-existing execution
						request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());

						Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
						try {
							Await.result(futurePartitionState, getRemainingTime());
							fail("Did not fail with expected RuntimeException");
						} catch (RuntimeException e) {
							assertEquals(IllegalArgumentException.class, e.getCause().getClass());
						}

						// 3. Non-existing job
						request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
						futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());

						try {
							Await.result(futurePartitionState, getRemainingTime());
							fail("Did not fail with expected IllegalArgumentException");
						} catch (IllegalArgumentException ignored) {
						}
					} catch (Exception e) {
						e.printStackTrace();
						fail(e.getMessage());
					} finally {
						if (cluster != null) {
							cluster.shutdown();
						}
					}
				}
			};
		}};
	}

	/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph.
	 */
	@Test
	public void testRequestPartitionStateUnregisteredExecution() throws Exception {
		new JavaTestKit(system) {{
			new Within(duration("15 seconds")) {
				@Override
				protected void run() {
					// Setup
					TestingCluster cluster = null;

					try {
						cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());

						final IntermediateDataSetID rid = new IntermediateDataSetID();

						// Create a task
						final JobVertex sender = new JobVertex("Sender");
						sender.setParallelism(1);
						sender.setInvokableClass(NoOpInvokable.class); // just finish
						sender.createAndAddResultDataSet(rid, PIPELINED);

						final JobVertex sender2 = new JobVertex("Blocking Sender");
						sender2.setParallelism(1);
						sender2.setInvokableClass(BlockingNoOpInvokable.class); // just block
						sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);

						final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
						final JobID jid = jobGraph.getJobID();

						final ActorGateway jobManagerGateway = cluster.getLeaderGateway(
							TestingUtils.TESTING_DURATION());

						// we can set the leader session ID to None because we don't use this gateway to send messages
						final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), HighAvailabilityServices.DEFAULT_LEADER_ID);

						// Submit the job and wait for all vertices to be running
						jobManagerGateway.tell(
							new SubmitJob(
								jobGraph,
								ListeningBehaviour.EXECUTION_RESULT),
							testActorGateway);
						expectMsgClass(JobSubmitSuccess.class);

						jobManagerGateway.tell(
							new WaitForAllVerticesToBeRunningOrFinished(jid),
							testActorGateway);

						expectMsgClass(AllVerticesRunning.class);

						Future<Object> egFuture = jobManagerGateway.ask(
							new RequestExecutionGraph(jobGraph.getJobID()), remaining());

						ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
						ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();

						ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
						while (vertex.getExecutionState() != ExecutionState.FINISHED) {
							Thread.sleep(1);
						}

						IntermediateResultPartition partition = vertex.getProducedPartitions()
							.values().iterator().next();

						ResultPartitionID partitionId = new ResultPartitionID(
							partition.getPartitionId(),
							vertex.getCurrentExecutionAttempt().getAttemptId());

						// Producer finished, request state
						Object request = new RequestPartitionProducerState(jid, rid, partitionId);

						Future<ExecutionState> producerStateFuture = jobManagerGateway
							.ask(request, getRemainingTime())
							.mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));

						assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
					} catch (Exception e) {
						e.printStackTrace();
						fail(e.getMessage());
					} finally {
						if (cluster != null) {
							cluster.shutdown();
						}
					}
				}
			};
		}};
	}

	/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph anymore and a new execution attempt is available.
	 */
	@Test
	public void testRequestPartitionStateMoreRecentExecutionAttempt() throws Exception {
		new JavaTestKit(system) {{
			new Within(duration("15 seconds")) {
				@Override
				protected void run() {
					// Setup
					TestingCluster cluster = null;

					try {
						cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());

						final IntermediateDataSetID rid = new IntermediateDataSetID();

						// Create a task
						final JobVertex sender = new JobVertex("Sender");
						sender.setParallelism(1);
						sender.setInvokableClass(NoOpInvokable.class); // just finish
						sender.createAndAddResultDataSet(rid, PIPELINED);

						final JobVertex sender2 = new JobVertex("Blocking Sender");
						sender2.setParallelism(1);
						sender2.setInvokableClass(BlockingNoOpInvokable.class); // just block
						sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);

						final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
						final JobID jid = jobGraph.getJobID();

						final ActorGateway jobManagerGateway = cluster.getLeaderGateway(
							TestingUtils.TESTING_DURATION());

						// we can set the leader session ID to None because we don't use this gateway to send messages
						final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), HighAvailabilityServices.DEFAULT_LEADER_ID);

						// Submit the job and wait for all vertices to be running
						jobManagerGateway.tell(
							new SubmitJob(
								jobGraph,
								ListeningBehaviour.EXECUTION_RESULT),
							testActorGateway);
						expectMsgClass(JobManagerMessages.JobSubmitSuccess.class);

						jobManagerGateway.tell(
							new WaitForAllVerticesToBeRunningOrFinished(jid),
							testActorGateway);

						expectMsgClass(TestingJobManagerMessages.AllVerticesRunning.class);

						Future<Object> egFuture = jobManagerGateway.ask(
							new RequestExecutionGraph(jobGraph.getJobID()), remaining());

						ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
						ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();

						ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
						while (vertex.getExecutionState() != ExecutionState.FINISHED) {
							Thread.sleep(1);
						}

						IntermediateResultPartition partition = vertex.getProducedPartitions()
							.values().iterator().next();

						ResultPartitionID partitionId = new ResultPartitionID(
							partition.getPartitionId(),
							vertex.getCurrentExecutionAttempt().getAttemptId());

						// Reset execution => new execution attempt
						vertex.resetForNewExecution(System.currentTimeMillis(), 1L);

						// Producer finished, request state
						Object request = new JobManagerMessages.RequestPartitionProducerState(jid, rid, partitionId);

						Future<?> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime());

						try {
							Await.result(producerStateFuture, getRemainingTime());
							fail("Did not fail with expected Exception");
						} catch (PartitionProducerDisposedException ignored) {
						}
					} catch (Exception e) {
						e.printStackTrace();
						fail(e.getMessage());
					} finally {
						if (cluster != null) {
							cluster.shutdown();
						}
					}
				}
			};
		}};
	}

	@Test
	public void testStopSignal() throws Exception {
		new JavaTestKit(system) {{
			new Within(duration("15 seconds")) {
				@Override
				protected void run() {
					// Setup
					TestingCluster cluster = null;

					try {
						cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());

						// Create a task
						final JobVertex sender = new JobVertex("Sender");
						sender.setParallelism(2);
						sender.setInvokableClass(StoppableInvokable.class);

						final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
						final JobID jid = jobGraph.getJobID();

						final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());

						// we can set the leader session ID to None because we don't use this gateway to send messages
						final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), HighAvailabilityServices.DEFAULT_LEADER_ID);

						// Submit the job and wait for all vertices to be running
						jobManagerGateway.tell(
							new SubmitJob(
								jobGraph,
								ListeningBehaviour.EXECUTION_RESULT),
							testActorGateway);
						expectMsgClass(JobSubmitSuccess.class);

						jobManagerGateway.tell(new WaitForAllVerticesToBeRunning(jid), testActorGateway);
						expectMsgClass(AllVerticesRunning.class);

						jobManagerGateway.tell(new StopJob(jid), testActorGateway);

						// - The test ----------------------------------------------------------------------
						expectMsgClass(StoppingSuccess.class);

						expectMsgClass(JobResultSuccess.class);
					} finally {
						if (cluster != null) {
							cluster.shutdown();
						}
					}
				}
			};
		}};
	}

	@Test
	public void testStopSignalFail() throws Exception {
		new JavaTestKit(system) {{
			new Within(duration("15 seconds")) {
				@Override
				protected void run() {
					// Setup
					TestingCluster cluster = null;

					try {
						cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());

						// Create a task
						final JobVertex sender = new JobVertex("Sender");
						sender.setParallelism(1);
						sender.setInvokableClass(BlockingNoOpInvokable.class); // just block

						final JobGraph jobGraph = new JobGraph("Non-Stoppable batching test job", sender);
						final JobID jid = jobGraph.getJobID();

						final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());

						// we can set the leader session ID to None because we don't use this gateway to send messages
						final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), HighAvailabilityServices.DEFAULT_LEADER_ID);

						// Submit the job and wait for all vertices to be running
						jobManagerGateway.tell(
							new SubmitJob(
								jobGraph,
								ListeningBehaviour.EXECUTION_RESULT),
							testActorGateway);
						expectMsgClass(JobSubmitSuccess.class);

						jobManagerGateway.tell(new WaitForAllVerticesToBeRunning(jid), testActorGateway);
						expectMsgClass(AllVerticesRunning.class);

						jobManagerGateway.tell(new StopJob(jid), testActorGateway);

						// - The test ----------------------------------------------------------------------
						expectMsgClass(StoppingFailure.class);

						jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);

						expectMsgClass(ExecutionGraphFound.class);
					} finally {
						if (cluster != null) {
							cluster.shutdown();
						}
					}
				}
			};
		}};
	}

	/**
	 * Tests that the JobManager handles {@link org.apache.flink.runtime.query.KvStateMessage}
	 * instances as expected.
	 */
	@Test
	public void testKvStateMessages() throws Exception {
		Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();

		Configuration config = new Configuration();
		config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100ms");

		ActorRef jobManagerActor = JobManager.startJobManagerActors(
			config,
			system,
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			highAvailabilityServices,
			TestingJobManager.class,
			MemoryArchivist.class)._1();

		UUID leaderId = LeaderRetrievalUtils.retrieveLeaderSessionId(
			highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
			TestingUtils.TESTING_TIMEOUT());

		ActorGateway jobManager = new AkkaActorGateway(
				jobManagerActor,
				leaderId);

		Configuration tmConfig = new Configuration();
		tmConfig.setLong(TaskManagerOptions.MANAGED_MEMORY_SIZE, 4L);
		tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);

		ActorRef taskManager = TaskManager.startTaskManagerComponentsAndActor(
			tmConfig,
			ResourceID.generate(),
			system,
			highAvailabilityServices,
			"localhost",
			scala.Option.<String>empty(),
			true,
			TestingTaskManager.class);

		Future<Object> registrationFuture = jobManager
				.ask(new NotifyWhenAtLeastNumTaskManagerAreRegistered(1), deadline.timeLeft());

		Await.ready(registrationFuture, deadline.timeLeft());

		//
		// Location lookup
		//
		LookupKvStateLocation lookupNonExistingJob = new LookupKvStateLocation(
				new JobID(),
				"any-name");

		Future<KvStateLocation> lookupFuture = jobManager
				.ask(lookupNonExistingJob, deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));

		try {
			Await.result(lookupFuture, deadline.timeLeft());
			fail("Did not throw expected Exception");
		} catch (IllegalStateException ignored) {
			// Expected
		}

		JobGraph jobGraph = new JobGraph("croissant");
		JobVertex jobVertex1 = new JobVertex("cappuccino");
		jobVertex1.setParallelism(4);
		jobVertex1.setMaxParallelism(16);
		jobVertex1.setInvokableClass(BlockingNoOpInvokable.class);

		JobVertex jobVertex2 = new JobVertex("americano");
		jobVertex2.setParallelism(4);
		jobVertex2.setMaxParallelism(16);
		jobVertex2.setInvokableClass(BlockingNoOpInvokable.class);

		jobGraph.addVertex(jobVertex1);
		jobGraph.addVertex(jobVertex2);

		Future<JobSubmitSuccess> submitFuture = jobManager
				.ask(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED), deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<JobSubmitSuccess>apply(JobSubmitSuccess.class));

		Await.result(submitFuture, deadline.timeLeft());

		Object lookupUnknownRegistrationName = new LookupKvStateLocation(
				jobGraph.getJobID(),
				"unknown");

		lookupFuture = jobManager
				.ask(lookupUnknownRegistrationName, deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));

		try {
			Await.result(lookupFuture, deadline.timeLeft());
			fail("Did not throw expected Exception");
		} catch (UnknownKvStateLocation ignored) {
			// Expected
		}

		//
		// Registration
		//
		NotifyKvStateRegistered registerNonExistingJob = new NotifyKvStateRegistered(
				new JobID(),
				new JobVertexID(),
				new KeyGroupRange(0, 0),
				"any-name",
				new KvStateID(),
				new KvStateServerAddress(InetAddress.getLocalHost(), 1233));

		jobManager.tell(registerNonExistingJob);

		LookupKvStateLocation lookupAfterRegistration = new LookupKvStateLocation(
				registerNonExistingJob.getJobId(),
				registerNonExistingJob.getRegistrationName());

		lookupFuture = jobManager
				.ask(lookupAfterRegistration, deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));

		try {
			Await.result(lookupFuture, deadline.timeLeft());
			fail("Did not throw expected Exception");
		} catch (IllegalStateException ignored) {
			// Expected
		}

		NotifyKvStateRegistered registerForExistingJob = new NotifyKvStateRegistered(
				jobGraph.getJobID(),
				jobVertex1.getID(),
				new KeyGroupRange(0, 0),
				"register-me",
				new KvStateID(),
				new KvStateServerAddress(InetAddress.getLocalHost(), 1293));

		jobManager.tell(registerForExistingJob);

		lookupAfterRegistration = new LookupKvStateLocation(
				registerForExistingJob.getJobId(),
				registerForExistingJob.getRegistrationName());

		lookupFuture = jobManager
				.ask(lookupAfterRegistration, deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));

		KvStateLocation location = Await.result(lookupFuture, deadline.timeLeft());
		assertNotNull(location);

		assertEquals(jobGraph.getJobID(), location.getJobId());
		assertEquals(jobVertex1.getID(), location.getJobVertexId());
		assertEquals(jobVertex1.getMaxParallelism(), location.getNumKeyGroups());
		assertEquals(1, location.getNumRegisteredKeyGroups());
		KeyGroupRange keyGroupRange = registerForExistingJob.getKeyGroupRange();
		assertEquals(1, keyGroupRange.getNumberOfKeyGroups());
		assertEquals(registerForExistingJob.getKvStateId(), location.getKvStateID(keyGroupRange.getStartKeyGroup()));
		assertEquals(registerForExistingJob.getKvStateServerAddress(), location.getKvStateServerAddress(keyGroupRange.getStartKeyGroup()));

		//
		// Unregistration
		//
		NotifyKvStateUnregistered unregister = new NotifyKvStateUnregistered(
				registerForExistingJob.getJobId(),
				registerForExistingJob.getJobVertexId(),
				registerForExistingJob.getKeyGroupRange(),
				registerForExistingJob.getRegistrationName());

		jobManager.tell(unregister);

		lookupFuture = jobManager
				.ask(lookupAfterRegistration, deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));

		try {
			Await.result(lookupFuture, deadline.timeLeft());
			fail("Did not throw expected Exception");
		} catch (UnknownKvStateLocation ignored) {
			// Expected
		}

		//
		// Duplicate registration fails task
		//
		NotifyKvStateRegistered register = new NotifyKvStateRegistered(
				jobGraph.getJobID(),
				jobVertex1.getID(),
				new KeyGroupRange(0, 0),
				"duplicate-me",
				new KvStateID(),
				new KvStateServerAddress(InetAddress.getLocalHost(), 1293));

		NotifyKvStateRegistered duplicate = new NotifyKvStateRegistered(
				jobGraph.getJobID(),
				jobVertex2.getID(), // <--- different operator, but...
				new KeyGroupRange(0, 0),
				"duplicate-me", // ...same name
				new KvStateID(),
				new KvStateServerAddress(InetAddress.getLocalHost(), 1293));

		Future<TestingJobManagerMessages.JobStatusIs> failedFuture = jobManager
				.ask(new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.FAILED), deadline.timeLeft())
				.mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));

		jobManager.tell(register);
		jobManager.tell(duplicate);

		// Wait for failure
		JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
		assertEquals(JobStatus.FAILED, jobStatus.state());

	}

	@Test
	public void testCancelWithSavepoint() throws Exception {
		File defaultSavepointDir = tmpFolder.newFolder();

		FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
		Configuration config = new Configuration();
		config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());

		ActorSystem actorSystem = null;
		ActorGateway jobManager = null;
		ActorGateway archiver = null;
		ActorGateway taskManager = null;
		try {
			actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());

			Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(
				config,
				actorSystem,
				TestingUtils.defaultExecutor(),
				TestingUtils.defaultExecutor(),
				highAvailabilityServices,
				Option.apply("jm"),
				Option.apply("arch"),
				TestingJobManager.class,
				TestingMemoryArchivist.class);

			UUID leaderId = LeaderRetrievalUtils.retrieveLeaderSessionId(
				highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
				TestingUtils.TESTING_TIMEOUT());

			jobManager = new AkkaActorGateway(master._1(), leaderId);
			archiver = new AkkaActorGateway(master._2(), leaderId);

			ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(
				config,
				ResourceID.generate(),
				actorSystem,
				highAvailabilityServices,
				"localhost",
				Option.apply("tm"),
				true,
				TestingTaskManager.class);

			taskManager = new AkkaActorGateway(taskManagerRef, leaderId);

			// Wait until connected
			Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
			Await.ready(taskManager.ask(msg, timeout), timeout);

			// Create job graph
			JobVertex sourceVertex = new JobVertex("Source");
			sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
			sourceVertex.setParallelism(1);

			JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);

			JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					3600000,
					3600000,
					0,
					Integer.MAX_VALUE,
					ExternalizedCheckpointSettings.none(),
					null,
					true);

			jobGraph.setSnapshotSettings(snapshottingSettings);

			// Submit job graph
			msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Wait for all tasks to be running
			msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Notify when canelled
			msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
			Future<Object> cancelled = jobManager.ask(msg, timeout);

			// Cancel with savepoint
			String savepointPath = null;

			for (int i = 0; i < 10; i++) {
				msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
				CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);

				if (cancelResp instanceof CancellationFailure) {
					CancellationFailure failure = (CancellationFailure) cancelResp;
					if (failure.cause().getMessage().contains(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING.message())) {
						Thread.sleep(200); // wait and retry
					} else {
						failure.cause().printStackTrace();
						fail("Failed to cancel job: " + failure.cause().getMessage());
					}
				} else {
					savepointPath = ((CancellationSuccess) cancelResp).savepointPath();
					break;
				}
			}

			// Verify savepoint path
			assertNotEquals("Savepoint not triggered", null, savepointPath);

			// Wait for job status change
			Await.ready(cancelled, timeout);

			File savepointFile = new File(savepointPath);
			assertEquals(true, savepointFile.exists());
		} finally {
			if (actorSystem != null) {
				actorSystem.shutdown();
			}

			if (archiver != null) {
				archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (jobManager != null) {
				jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (taskManager != null) {
				taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (actorSystem != null) {
				actorSystem.awaitTermination(TESTING_TIMEOUT());
			}
		}
	}

	/**
	 * Tests that a meaningful exception is returned if no savepoint directory is
	 * configured.
	 */
	@Test
	public void testCancelWithSavepointNoDirectoriesConfigured() throws Exception {
		FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
		Configuration config = new Configuration();

		ActorSystem actorSystem = null;
		ActorGateway jobManager = null;
		ActorGateway archiver = null;
		ActorGateway taskManager = null;
		try {
			actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());

			Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(
				config,
				actorSystem,
				TestingUtils.defaultExecutor(),
				TestingUtils.defaultExecutor(),
				highAvailabilityServices,
				Option.apply("jm"),
				Option.apply("arch"),
				TestingJobManager.class,
				TestingMemoryArchivist.class);

			UUID leaderId = LeaderRetrievalUtils.retrieveLeaderSessionId(
				highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
				TestingUtils.TESTING_TIMEOUT());

			jobManager = new AkkaActorGateway(master._1(), leaderId);
			archiver = new AkkaActorGateway(master._2(), leaderId);

			ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(
				config,
				ResourceID.generate(),
				actorSystem,
				highAvailabilityServices,
				"localhost",
				Option.apply("tm"),
				true,
				TestingTaskManager.class);

			taskManager = new AkkaActorGateway(taskManagerRef, leaderId);

			// Wait until connected
			Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
			Await.ready(taskManager.ask(msg, timeout), timeout);

			// Create job graph
			JobVertex sourceVertex = new JobVertex("Source");
			sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
			sourceVertex.setParallelism(1);

			JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);

			JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
				Collections.singletonList(sourceVertex.getID()),
				Collections.singletonList(sourceVertex.getID()),
				Collections.singletonList(sourceVertex.getID()),
				3600000,
				3600000,
				0,
				Integer.MAX_VALUE,
				ExternalizedCheckpointSettings.none(),
				null,
				true);

			jobGraph.setSnapshotSettings(snapshottingSettings);

			// Submit job graph
			msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Wait for all tasks to be running
			msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Cancel with savepoint
			msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
			CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);

			if (cancelResp instanceof CancellationFailure) {
				CancellationFailure failure = (CancellationFailure) cancelResp;
				assertTrue(failure.cause() instanceof IllegalStateException);
				assertTrue(failure.cause().getMessage().contains("savepoint directory"));
			} else {
				fail("Unexpected cancellation response from JobManager: " + cancelResp);
			}
		} finally {
			if (actorSystem != null) {
				actorSystem.shutdown();
			}

			if (archiver != null) {
				archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (jobManager != null) {
				jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (taskManager != null) {
				taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}
		}
	}

	/**
	 * Tests that we can trigger a savepoint when periodic checkpoints are disabled.
	 */
	@Test
	public void testSavepointWithDeactivatedPeriodicCheckpointing() throws Exception {
		File defaultSavepointDir = tmpFolder.newFolder();

		FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
		Configuration config = new Configuration();
		config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());

		ActorSystem actorSystem = null;
		ActorGateway jobManager = null;
		ActorGateway archiver = null;
		ActorGateway taskManager = null;
		try {
			actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());

			Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(
				config,
				actorSystem,
				TestingUtils.defaultExecutor(),
				TestingUtils.defaultExecutor(),
				highAvailabilityServices,
				Option.apply("jm"),
				Option.apply("arch"),
				TestingJobManager.class,
				TestingMemoryArchivist.class);

			UUID leaderId = LeaderRetrievalUtils.retrieveLeaderSessionId(
				highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
				TestingUtils.TESTING_TIMEOUT());

			jobManager = new AkkaActorGateway(master._1(), leaderId);
			archiver = new AkkaActorGateway(master._2(), leaderId);

			ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(
				config,
				ResourceID.generate(),
				actorSystem,
				highAvailabilityServices,
				"localhost",
				Option.apply("tm"),
				true,
				TestingTaskManager.class);

			taskManager = new AkkaActorGateway(taskManagerRef, leaderId);

			// Wait until connected
			Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
			Await.ready(taskManager.ask(msg, timeout), timeout);

			// Create job graph
			JobVertex sourceVertex = new JobVertex("Source");
			sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
			sourceVertex.setParallelism(1);

			JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);

			JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					Long.MAX_VALUE, // deactivated checkpointing
					360000,
					0,
					Integer.MAX_VALUE,
					ExternalizedCheckpointSettings.none(),
					null,
					true);

			jobGraph.setSnapshotSettings(snapshottingSettings);

			// Submit job graph
			msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Wait for all tasks to be running
			msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Cancel with savepoint
			File targetDirectory = tmpFolder.newFolder();

			msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
			Future<Object> future = jobManager.ask(msg, timeout);
			Object result = Await.result(future, timeout);

			assertTrue("Did not trigger savepoint", result instanceof TriggerSavepointSuccess);
			assertEquals(1, targetDirectory.listFiles().length);
		} finally {
			if (actorSystem != null) {
				actorSystem.shutdown();
			}

			if (archiver != null) {
				archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (jobManager != null) {
				jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (taskManager != null) {
				taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (actorSystem != null) {
				actorSystem.awaitTermination(TestingUtils.TESTING_TIMEOUT());
			}
		}
	}

	/**
	 * Tests that configured {@link SavepointRestoreSettings} are respected.
	 */
	@Test
	public void testSavepointRestoreSettings() throws Exception {
		FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);

		ActorSystem actorSystem = null;
		ActorGateway jobManager = null;
		ActorGateway archiver = null;
		ActorGateway taskManager = null;
		try {
			actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());

			Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(
				new Configuration(),
				actorSystem,
				TestingUtils.defaultExecutor(),
				TestingUtils.defaultExecutor(),
				highAvailabilityServices,
				Option.apply("jm"),
				Option.apply("arch"),
				TestingJobManager.class,
				TestingMemoryArchivist.class);

			UUID leaderId = LeaderRetrievalUtils.retrieveLeaderSessionId(
				highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
				TestingUtils.TESTING_TIMEOUT());

			jobManager = new AkkaActorGateway(master._1(), leaderId);
			archiver = new AkkaActorGateway(master._2(), leaderId);

			Configuration tmConfig = new Configuration();
			tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);

			ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(
				tmConfig,
				ResourceID.generate(),
				actorSystem,
				highAvailabilityServices,
				"localhost",
				Option.apply("tm"),
				true,
				TestingTaskManager.class);

			taskManager = new AkkaActorGateway(taskManagerRef, leaderId);

			// Wait until connected
			Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
			Await.ready(taskManager.ask(msg, timeout), timeout);

			// Create job graph
			JobVertex sourceVertex = new JobVertex("Source");
			sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
			sourceVertex.setParallelism(1);

			JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);

			JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					Collections.singletonList(sourceVertex.getID()),
					Long.MAX_VALUE, // deactivated checkpointing
					360000,
					0,
					Integer.MAX_VALUE,
					ExternalizedCheckpointSettings.none(),
					null,
					true);

			jobGraph.setSnapshotSettings(snapshottingSettings);

			// Submit job graph
			msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Wait for all tasks to be running
			msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
			Await.result(jobManager.ask(msg, timeout), timeout);

			// Trigger savepoint
			File targetDirectory = tmpFolder.newFolder();
			msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
			Future<Object> future = jobManager.ask(msg, timeout);
			Object result = Await.result(future, timeout);

			String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();

			// Cancel because of restarts
			msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
			Future<?> removedFuture = jobManager.ask(msg, timeout);

			Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
			Object response = Await.result(cancelFuture, timeout);
			assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);

			Await.ready(removedFuture, timeout);

			// Adjust the job (we need a new operator ID)
			JobVertex newSourceVertex = new JobVertex("NewSource");
			newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
			newSourceVertex.setParallelism(1);

			JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);

			JobCheckpointingSettings newSnapshottingSettings = new JobCheckpointingSettings(
					Collections.singletonList(newSourceVertex.getID()),
					Collections.singletonList(newSourceVertex.getID()),
					Collections.singletonList(newSourceVertex.getID()),
					Long.MAX_VALUE, // deactivated checkpointing
					360000,
					0,
					Integer.MAX_VALUE,
					ExternalizedCheckpointSettings.none(),
					null,
					true);

			newJobGraph.setSnapshotSettings(newSnapshottingSettings);

			SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
			newJobGraph.setSavepointRestoreSettings(restoreSettings);

			msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
			response = Await.result(jobManager.ask(msg, timeout), timeout);

			assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);

			JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
			Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());

			assertTrue(cause instanceof IllegalStateException);
			assertTrue(cause.getMessage().contains("allowNonRestoredState"));

			// Wait until removed
			msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
			Await.ready(jobManager.ask(msg, timeout), timeout);

			// Resubmit, but allow non restored state now
			restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
			newJobGraph.setSavepointRestoreSettings(restoreSettings);

			msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
			response = Await.result(jobManager.ask(msg, timeout), timeout);

			assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
		} finally {
			if (actorSystem != null) {
				actorSystem.shutdown();
			}

			if (archiver != null) {
				archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (jobManager != null) {
				jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (taskManager != null) {
				taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
			}

			if (actorSystem != null) {
				actorSystem.awaitTermination(TestingUtils.TESTING_TIMEOUT());
			}
		}
	}

	/**
	 * This tests makes sure that triggering a reconnection from the ResourceManager will stop after a new
	 * ResourceManager has connected. Furthermore it makes sure that there is not endless loop of reconnection
	 * commands (see FLINK-6341).
	 */
	@Test
	public void testResourceManagerConnection() throws TimeoutException, InterruptedException {
		FiniteDuration testTimeout = new FiniteDuration(30L, TimeUnit.SECONDS);
		final long reconnectionInterval = 200L;

		final Configuration configuration = new Configuration();
		configuration.setLong(JobManagerOptions.RESOURCE_MANAGER_RECONNECT_INTERVAL, reconnectionInterval);


		final ActorSystem actorSystem = AkkaUtils.createLocalActorSystem(configuration);

		try {
			final ActorGateway jmGateway = TestingUtils.createJobManager(
				actorSystem,
				TestingUtils.defaultExecutor(),
				TestingUtils.defaultExecutor(),
				configuration,
				highAvailabilityServices);

			final TestProbe probe = TestProbe.apply(actorSystem);
			final AkkaActorGateway rmGateway = new AkkaActorGateway(probe.ref(), HighAvailabilityServices.DEFAULT_LEADER_ID);

			// wait for the JobManager to become the leader
			Future<?> leaderFuture = jmGateway.ask(TestingJobManagerMessages.getNotifyWhenLeader(), testTimeout);
			Await.ready(leaderFuture, testTimeout);

			jmGateway.tell(new RegisterResourceManager(probe.ref()), rmGateway);

			JobManagerMessages.LeaderSessionMessage leaderSessionMessage = probe.expectMsgClass(JobManagerMessages.LeaderSessionMessage.class);

			assertEquals(jmGateway.leaderSessionID(), leaderSessionMessage.leaderSessionID());
			assertTrue(leaderSessionMessage.message() instanceof RegisterResourceManagerSuccessful);

			jmGateway.tell(
				new RegistrationMessages.RegisterTaskManager(
					ResourceID.generate(),
					mock(TaskManagerLocation.class),
					new HardwareDescription(1, 1L, 1L, 1L),
					1));
			leaderSessionMessage = probe.expectMsgClass(JobManagerMessages.LeaderSessionMessage.class);

			assertTrue(leaderSessionMessage.message() instanceof NotifyResourceStarted);

			// fail the NotifyResourceStarted so that we trigger the reconnection process on the JobManager's side
			probe.lastSender().tell(new Status.Failure(new Exception("Test exception")), ActorRef.noSender());

			Deadline reconnectionDeadline = new FiniteDuration(5L * reconnectionInterval, TimeUnit.MILLISECONDS).fromNow();
			boolean registered = false;

			while (reconnectionDeadline.hasTimeLeft()) {
				try {
					leaderSessionMessage = probe.expectMsgClass(reconnectionDeadline.timeLeft(), JobManagerMessages.LeaderSessionMessage.class);
				} catch (AssertionError ignored) {
					// expected timeout after the reconnectionDeadline has been exceeded
					continue;
				}

				if (leaderSessionMessage.message() instanceof TriggerRegistrationAtJobManager) {
					if (registered) {
						fail("A successful registration should not be followed by another TriggerRegistrationAtJobManager message.");
					}

					jmGateway.tell(new RegisterResourceManager(probe.ref()), rmGateway);
				} else if (leaderSessionMessage.message() instanceof RegisterResourceManagerSuccessful) {
					// now we should no longer receive TriggerRegistrationAtJobManager messages
					registered = true;
				} else {
					fail("Received unknown message: " + leaderSessionMessage.message() + '.');
				}
			}

			assertTrue(registered);

		} finally {
			// cleanup the actor system and with it all of the started actors if not already terminated
			actorSystem.shutdown();
			actorSystem.awaitTermination();
		}
	}
}