/*
* Copyright 2014-2015 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.xd.dirt.server.admin.deployment.zk;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.cache.PathChildrenCache;
import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
import org.apache.curator.framework.recipes.leader.LeaderSelector;
import org.apache.curator.framework.recipes.leader.LeaderSelectorListener;
import org.apache.curator.framework.recipes.leader.LeaderSelectorListenerAdapter;
import org.apache.curator.utils.ThreadUtils;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.context.embedded.EmbeddedServletContainerInitializedEvent;
import org.springframework.boot.context.embedded.EmbeddedWebApplicationContext;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.context.event.ContextStoppedEvent;
import org.springframework.util.StringUtils;
import org.springframework.xd.dirt.cluster.Admin;
import org.springframework.xd.dirt.cluster.AdminAttributes;
import org.springframework.xd.dirt.container.store.AdminRepository;
import org.springframework.xd.dirt.container.store.ContainerRepository;
import org.springframework.xd.dirt.job.JobFactory;
import org.springframework.xd.dirt.server.admin.deployment.ContainerMatcher;
import org.springframework.xd.dirt.server.admin.deployment.DeploymentUnitStateCalculator;
import org.springframework.xd.dirt.stream.StreamFactory;
import org.springframework.xd.dirt.zookeeper.Paths;
import org.springframework.xd.dirt.zookeeper.ZooKeeperConnection;
import org.springframework.xd.dirt.zookeeper.ZooKeeperConnectionListener;
import org.springframework.xd.dirt.zookeeper.ZooKeeperUtils;
/**
* Process that watches ZooKeeper for Container arrivals and departures from
* the XD cluster. Each {@code DeploymentSupervisor} instance will attempt
* to request leadership, but at any given time only one {@code DeploymentSupervisor}
* instance in the cluster will have leadership status.
*
* @author Patrick Peralta
* @author Mark Fisher
* @author Ilayaperumal Gopinathan
*
* @see org.apache.curator.framework.recipes.leader.LeaderSelector
*/
public class DeploymentSupervisor implements ApplicationListener<ApplicationEvent>, DisposableBean {
/**
* Logger.
*/
private static final Logger logger = LoggerFactory.getLogger(DeploymentSupervisor.class);
/**
* ZooKeeper connection.
*/
@Autowired
private ZooKeeperConnection zkConnection;
/**
* Repository to load the admins.
*/
@Autowired
private AdminRepository adminRepository;
/**
* Consumer for {@link org.springframework.xd.dirt.server.admin.deployment.DeploymentMessage}s.
*/
@Autowired
private DeploymentMessageConsumer deploymentMessageConsumer;
/**
* Factory to construct {@link org.springframework.xd.dirt.core.Stream} instance
*/
@Autowired
private StreamFactory streamFactory;
/**
* Factory to construct {@link org.springframework.xd.dirt.core.Job} instance
*/
@Autowired
private JobFactory jobFactory;
/**
* Matcher that applies container matching criteria
*/
@Autowired
private ContainerMatcher containerMatcher;
/**
* Repository for the containers
*/
@Autowired
private ContainerRepository containerRepository;
/**
* Utility that writes module deployment requests to ZK path
*/
@Autowired
private ModuleDeploymentWriter moduleDeploymentWriter;
/**
* Deployment unit state calculator
*/
@Autowired
private DeploymentUnitStateCalculator stateCalculator;
/**
* Attributes for admin stored in admin repository.
*/
private final AdminAttributes adminAttributes;
/**
* ZK distributed queue for the {@link DeploymentMessageConsumer}
* to use.
*/
private volatile DeploymentQueue deploymentQueueForConsumer = null;
/**
* {@link ApplicationContext} for this admin server. This reference is updated
* via an application context event and read via {@link #getId()}.
*/
private volatile ApplicationContext applicationContext;
/**
* Leader selector to elect admin server that will handle stream deployment requests. Marked volatile because this
* reference is written and read by the Curator event dispatch threads - there is no guarantee that the same thread
* will do the reading and writing.
*/
private volatile LeaderSelector leaderSelector;
/**
* Listener that is invoked when this admin server is elected leader.
*/
private final LeaderSelectorListener leaderListener = new LeaderListener();
/**
* ZooKeeper connection listener that attempts to obtain leadership when
* the ZooKeeper connection is established.
*/
private final ConnectionListener connectionListener = new ConnectionListener();
/**
* Executor service used to execute Curator path cache events.
*
* @see #instantiatePathChildrenCache
*/
private final ScheduledExecutorService executorService =
Executors.newSingleThreadScheduledExecutor(ThreadUtils.newThreadFactory("DeploymentSupervisor"));
/**
* Namespace for management context in the container's application context.
*/
private final static String MGMT_CONTEXT_NAMESPACE = "management";
/**
* The amount of time that must elapse after the newest container arrives
* before deployments to new containers are initiated.
*/
private final AtomicLong quietPeriod = new AtomicLong(15000);
/**
* Property for specifying the {@link #quietPeriod quiet period}
* for deployments to new containers.
*/
public static final String QUIET_PERIOD_PROPERTY = "xd.admin.quietPeriod";
/**
* Construct Deployment Supervisor
* @param adminAttributes the admin attributes
*/
public DeploymentSupervisor(AdminAttributes adminAttributes) {
this.adminAttributes = adminAttributes;
}
/**
* {@inheritDoc}
*/
@Override
public void onApplicationEvent(ApplicationEvent event) {
if (event instanceof ContextRefreshedEvent) {
String namespace = ((EmbeddedWebApplicationContext) event.getSource()).getNamespace();
// If a custom management port is selected, a child application context
// for management will be created (see EndpointWebMvcAutoConfiguration
// in Spring Boot). Since the management context does not contain ZooKeeper
// beans, ZooKeeper related initialization should not take place.
// See https://jira.spring.io/browse/XD-2861.
if (!MGMT_CONTEXT_NAMESPACE.equals(namespace)) {
this.applicationContext = ((ContextRefreshedEvent) event).getApplicationContext();
String delay = this.applicationContext.getEnvironment().getProperty(QUIET_PERIOD_PROPERTY);
if (StringUtils.hasText(delay)) {
quietPeriod.set(Long.parseLong(delay));
logger.info("Set container quiet period to {} ms", delay);
}
if (this.zkConnection.isConnected()) {
// initial registration, we don't yet have a port info
registerWithZooKeeper(zkConnection.getClient());
requestLeadership(this.zkConnection.getClient());
}
this.zkConnection.addListener(connectionListener);
}
}
else if (event instanceof ContextStoppedEvent) {
if (this.leaderSelector != null) {
this.leaderSelector.close();
}
}
else if (event instanceof EmbeddedServletContainerInitializedEvent) {
String namespace = ((EmbeddedServletContainerInitializedEvent) event).getApplicationContext().getNamespace();
int port = ((EmbeddedServletContainerInitializedEvent) event).getEmbeddedServletContainer().getPort();
synchronized (adminAttributes) {
if (MGMT_CONTEXT_NAMESPACE.equals(namespace)) {
adminAttributes.setManagementPort(port);
}
else {
adminAttributes.setPort(port);
}
if (zkConnection.isConnected() && adminRepository.exists(adminAttributes.getId())) {
adminRepository.update(new Admin(adminAttributes.getId(), adminAttributes));
}
}
}
}
/**
* Return the UUID for this admin server.
*
* @return id for this admin server
*/
private String getId() {
return this.applicationContext.getId();
}
/**
* Register the {@link LeaderListener} if not already registered. This method is {@code synchronized} because it may
* be invoked either by the thread starting the {@link ApplicationContext} or the thread that raises the ZooKeeper
* connection event.
*
* @param client the {@link CuratorFramework} client
*/
@SuppressWarnings("rawtypes")
private synchronized void requestLeadership(CuratorFramework client) {
try {
Paths.ensurePath(client, Paths.MODULE_DEPLOYMENTS);
Paths.ensurePath(client, Paths.STREAM_DEPLOYMENTS);
Paths.ensurePath(client, Paths.JOB_DEPLOYMENTS);
Paths.ensurePath(client, Paths.ADMINS);
Paths.ensurePath(client, Paths.CONTAINERS);
Paths.ensurePath(client, Paths.STREAMS);
Paths.ensurePath(client, Paths.JOBS);
if (leaderSelector == null) {
leaderSelector = new LeaderSelector(client, Paths.build(Paths.ADMINELECTION), leaderListener);
leaderSelector.setId(getId());
leaderSelector.start();
}
}
catch (Exception e) {
throw ZooKeeperUtils.wrapThrowable(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void destroy() {
if (leaderSelector != null) {
leaderSelector.close();
leaderSelector = null;
}
}
/**
* Instantiate a Curator {@link PathChildrenCache} for the provided path with
* the following parameters:
* <ul>
* <li>node cache enabled</li>
* <li>data compression disabled</li>
* <li>executor service {@link #executorService} for invoking event handlers</li>
* </ul>
*
* @param client the {@link CuratorFramework} client
* @param path the path for the cache
* @return Curator path children cache
*/
private PathChildrenCache instantiatePathChildrenCache(CuratorFramework client, String path) {
return new PathChildrenCache(client, path, true, false, executorService);
}
/**
* Write the Container runtime attributes to ZooKeeper in an ephemeral node under {@code /xd/admins}.
*/
private void registerWithZooKeeper(CuratorFramework client) {
try {
String containerId = adminAttributes.getId();
String containerPath = Paths.build(Paths.ADMINS, containerId);
Stat containerPathStat = client.checkExists().forPath(containerPath);
if (containerPathStat != null) {
long prevSession = containerPathStat.getEphemeralOwner();
long currSession = client.getZookeeperClient().getZooKeeper().getSessionId();
if (prevSession == currSession) {
// the current session still exists on the server; skip the
// rest of the registration process
logger.info(String.format(
"Existing registration for admin runtime %s with session 0x%x detected",
containerId, currSession));
return;
}
logger.info(String.format("Trying to delete previous registration for admin runtime %s with " +
"session %x detected; current session: 0x%x; path: %s",
containerId, prevSession, currSession, containerPath));
try {
client.delete().forPath(containerPath);
}
catch (Exception e) {
// NoNodeException - nothing to delete
ZooKeeperUtils.wrapAndThrowIgnoring(e, NoNodeException.class);
}
}
synchronized (adminAttributes) {
// reading the container runtime attributes and writing them to
// the container node must be an atomic operation; see
// the handling of EmbeddedServletContainerInitializedEvent
// in onApplicationEvent
adminRepository.save(new Admin(containerId, adminAttributes));
}
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw ZooKeeperUtils.wrapThrowable(e);
}
catch (Exception e) {
throw ZooKeeperUtils.wrapThrowable(e);
}
}
/**
* {@link ZooKeeperConnectionListener} implementation that requests leadership
* upon connection to ZooKeeper.
*/
private class ConnectionListener implements ZooKeeperConnectionListener {
/**
* {@inheritDoc}
*/
@Override
public void onConnect(CuratorFramework client) {
logger.info("Admin {} connection established", getId());
registerWithZooKeeper(client);
requestLeadership(client);
}
/**
* {@inheritDoc}
*/
@Override
public void onResume(CuratorFramework client) {
logger.info("Admin {} connection resumed, client state: {}", getId(), client.getState());
registerWithZooKeeper(client);
requestLeadership(client);
}
/**
* {@inheritDoc}
*/
@Override
public void onDisconnect(CuratorFramework client) {
logger.info("Admin {} connection terminated", getId());
try {
destroy();
}
catch (Exception e) {
logger.warn("exception occurred while closing leader selector", e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void onSuspend(CuratorFramework client) {
logger.info("Admin {} connection suspended", getId());
try {
destroy();
}
catch (Exception e) {
logger.warn("exception occurred while closing leader selector", e);
}
}
}
/**
* Listener implementation that is invoked when this server becomes the leader.
*/
class LeaderListener extends LeaderSelectorListenerAdapter {
/**
* {@inheritDoc}
* <p/>
* Upon leadership election, this Admin server will create a {@link PathChildrenCache}
* for {@link Paths#STREAMS} and {@link Paths#JOBS}. These caches will have
* {@link PathChildrenCacheListener PathChildrenCacheListeners} attached to them
* that will react to stream and job creation and deletion. Upon leadership
* relinquishment, the listeners will be removed and the caches shut down.
*/
@Override
@SuppressWarnings("rawtypes")
public void takeLeadership(CuratorFramework client) throws Exception {
logger.info("Leader Admin {} is watching for stream/job deployment requests.", getId());
PathChildrenCache containers = null;
PathChildrenCache streamDeployments = null;
PathChildrenCache jobDeployments = null;
PathChildrenCache moduleDeploymentRequests = null;
ContainerListener containerListener;
try {
String requestedModulesPath = Paths.build(Paths.MODULE_DEPLOYMENTS, Paths.REQUESTED);
Paths.ensurePath(client, requestedModulesPath);
String allocatedModulesPath = Paths.build(Paths.MODULE_DEPLOYMENTS, Paths.ALLOCATED);
Paths.ensurePath(client, allocatedModulesPath);
moduleDeploymentRequests = instantiatePathChildrenCache(client, requestedModulesPath);
moduleDeploymentRequests.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);
streamDeployments = instantiatePathChildrenCache(client, Paths.STREAM_DEPLOYMENTS);
// // using BUILD_INITIAL_CACHE so that all known streams are populated
// // in the cache before invoking recalculateStreamStates; same for
// // jobs below
streamDeployments.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);
jobDeployments = instantiatePathChildrenCache(client, Paths.JOB_DEPLOYMENTS);
jobDeployments.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);
SupervisorElectedEvent supervisorElectedEvent = new SupervisorElectedEvent(moduleDeploymentRequests,
streamDeployments, jobDeployments);
Map<String, SupervisorElectionListener> listenersMap =
applicationContext.getBeansOfType(SupervisorElectionListener.class);
for (Map.Entry<String, SupervisorElectionListener> entry : listenersMap.entrySet()) {
entry.getValue().onSupervisorElected(supervisorElectedEvent);
}
containerListener = new ContainerListener(zkConnection,
containerRepository,
streamFactory,
jobFactory,
streamDeployments,
jobDeployments,
moduleDeploymentRequests,
containerMatcher,
moduleDeploymentWriter,
stateCalculator,
executorService,
quietPeriod);
containers = instantiatePathChildrenCache(client, Paths.CONTAINERS);
containers.getListenable().addListener(containerListener);
containers.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
containerListener.scheduleDepartedContainerDeployer();
deploymentQueueForConsumer = new DeploymentQueue(client, deploymentMessageConsumer,
Paths.DEPLOYMENT_QUEUE, executorService);
deploymentQueueForConsumer.start();
Thread.sleep(Long.MAX_VALUE);
}
catch (InterruptedException e) {
logger.info("Leadership canceled due to thread interrupt");
Thread.currentThread().interrupt();
}
finally {
if (containers != null) {
containers.close();
}
if (streamDeployments != null) {
streamDeployments.close();
}
if (jobDeployments != null) {
jobDeployments.close();
}
if (moduleDeploymentRequests != null) {
moduleDeploymentRequests.close();
}
if (deploymentQueueForConsumer != null) {
try {
deploymentQueueForConsumer.destroy();
}
catch (IOException e) {
logger.warn("Exception closing the distributed queue producer " + e);
}
}
}
}
}
}