ZooKeeperLeaderElectionITCase.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.test.runtime.leaderelection;

import akka.actor.ActorSystem;
import akka.actor.Kill;
import akka.actor.PoisonPill;
import org.apache.curator.test.TestingServer;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.io.network.partition.ResultPartitionType;
import org.apache.flink.runtime.jobgraph.DistributionPattern;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobmanager.Tasks;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
import org.apache.flink.runtime.testingUtils.TestingCluster;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages;
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished;
import org.apache.flink.runtime.testingUtils.TestingUtils;
import org.apache.flink.runtime.testutils.ZooKeeperTestUtils;
import org.apache.flink.util.TestLogger;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.Await;
import scala.concurrent.Future;
import scala.concurrent.duration.Deadline;
import scala.concurrent.duration.FiniteDuration;
import scala.concurrent.impl.Promise;

import java.io.File;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

public class ZooKeeperLeaderElectionITCase extends TestLogger {

	private static final FiniteDuration timeout = TestingUtils.TESTING_DURATION();

	private static TestingServer zkServer;

	@Rule
	public TemporaryFolder tempFolder = new TemporaryFolder();

	@BeforeClass
	public static void setup() throws Exception {
		zkServer = new TestingServer(true);
	}

	@AfterClass
	public static void tearDown() throws Exception {
		if (zkServer != null) {
			zkServer.close();
			zkServer = null;
		}
	}

	/**
	 * Tests that the TaskManagers successfully register at the new leader once the old leader
	 * is terminated.
	 */
	@Test
	public void testTaskManagerRegistrationAtReelectedLeader() throws Exception {
		File rootFolder = tempFolder.getRoot();

		Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(
			zkServer.getConnectString(),
			rootFolder.getPath());

		int numJMs = 10;
		int numTMs = 3;

		configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
		configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);

		TestingCluster cluster = new TestingCluster(configuration);

		try {
			cluster.start();

			for (int i = 0; i < numJMs; i++) {
				ActorGateway leadingJM = cluster.getLeaderGateway(timeout);

				cluster.waitForTaskManagersToBeRegisteredAtJobManager(leadingJM.actor());

				Future<Object> registeredTMs = leadingJM.ask(
					JobManagerMessages.getRequestNumberRegisteredTaskManager(),
					timeout);

				int numRegisteredTMs = (Integer) Await.result(registeredTMs, timeout);

				assertEquals(numTMs, numRegisteredTMs);

				cluster.clearLeader();
				leadingJM.tell(PoisonPill.getInstance());
			}
		} finally {
			cluster.stop();
		}
	}

	/**
	 * Tests that a job can be executed after a new leader has been elected. For all except for the
	 * last leader, the job is blocking. The JobManager will be terminated while executing the
	 * blocking job. Once only one JobManager is left, it is checked that a non-blocking can be
	 * successfully executed.
	 */
	@Test
	public void testJobExecutionOnClusterWithLeaderReelection() throws Exception {
		int numJMs = 10;
		int numTMs = 2;
		int numSlotsPerTM = 3;
		int parallelism = numTMs * numSlotsPerTM;

		File rootFolder = tempFolder.getRoot();

		Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(
			zkServer.getConnectString(),
			rootFolder.getPath());

		configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
		configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
		configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTM);

		// we "effectively" disable the automatic RecoverAllJobs message and sent it manually to make
		// sure that all TMs have registered to the JM prior to issueing the RecoverAllJobs message
		configuration.setString(ConfigConstants.AKKA_ASK_TIMEOUT, AkkaUtils.INF_TIMEOUT().toString());

		Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);

		JobVertex sender = new JobVertex("sender");
		JobVertex receiver = new JobVertex("receiver");

		sender.setInvokableClass(Tasks.Sender.class);
		receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);

		sender.setParallelism(parallelism);
		receiver.setParallelism(parallelism);

		receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE,
			ResultPartitionType.PIPELINED);

		SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
		sender.setSlotSharingGroup(slotSharingGroup);
		receiver.setSlotSharingGroup(slotSharingGroup);

		final JobGraph graph = new JobGraph("Blocking test job", sender, receiver);

		final TestingCluster cluster = new TestingCluster(configuration);

		ActorSystem clientActorSystem = null;

		Thread thread = null;

		JobSubmitterRunnable jobSubmission = null;

		try {
			cluster.start();

			clientActorSystem = cluster.startJobClientActorSystem(graph.getJobID());

			final ActorSystem clientAS = clientActorSystem;

			jobSubmission = new JobSubmitterRunnable(clientAS, cluster, graph);

			thread = new Thread(jobSubmission);

			thread.start();

			Deadline deadline = timeout.$times(3).fromNow();

			// Kill all JobManager except for two
			for (int i = 0; i < numJMs; i++) {
				ActorGateway jm = cluster.getLeaderGateway(deadline.timeLeft());

				cluster.waitForTaskManagersToBeRegisteredAtJobManager(jm.actor());

				// recover all jobs, sent manually
				log.info("Sent recover all jobs manually to job manager {}.", jm.path());
				jm.tell(JobManagerMessages.getRecoverAllJobs());

				if (i < numJMs - 1) {
					Future<Object> future = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(graph.getJobID()), deadline.timeLeft());

					Await.ready(future, deadline.timeLeft());

					cluster.clearLeader();

					if (i == numJMs - 2) {
						Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
					}

					log.info("Kill job manager {}.", jm.path());

					jm.tell(TestingJobManagerMessages.getDisablePostStop());
					jm.tell(Kill.getInstance());
				}
			}

			log.info("Waiting for submitter thread to terminate.");

			thread.join(deadline.timeLeft().toMillis());

			log.info("Submitter thread has terminated.");

			if (thread.isAlive()) {
				fail("The job submission thread did not stop (meaning it did not succeeded in" +
						"executing the test job.");
			}

			Await.result(jobSubmission.resultPromise.future(), deadline.timeLeft());
		}
		finally {
			if (clientActorSystem != null) {
				cluster.shutdownJobClientActorSystem(clientActorSystem);
			}

			if (thread != null && thread.isAlive()) {
				jobSubmission.finished = true;
			}
			cluster.stop();
		}
	}

	public static class JobSubmitterRunnable implements Runnable {
		private static final Logger LOG = LoggerFactory.getLogger(JobSubmitterRunnable.class);
		boolean finished = false;

		final ActorSystem clientActorSystem;
		final LocalFlinkMiniCluster cluster;
		final JobGraph graph;

		final Promise<JobExecutionResult> resultPromise = new Promise.DefaultPromise<>();

		public JobSubmitterRunnable(
				ActorSystem actorSystem,
				LocalFlinkMiniCluster cluster,
				JobGraph graph) {
			this.clientActorSystem = actorSystem;
			this.cluster = cluster;
			this.graph = graph;
		}

		@Override
		public void run() {
			try {
				JobExecutionResult result = JobClient.submitJobAndWait(
						clientActorSystem,
						cluster.configuration(),
						cluster.highAvailabilityServices(),
						graph,
						timeout,
						false,
						getClass().getClassLoader());

				resultPromise.success(result);
			} catch (Exception e) {
				// This was not expected... fail the test case
				resultPromise.failure(e);
			}
		}
	}
}