/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.controller.leader.election; import java.util.HashMap; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.curator.RetryPolicy; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.recipes.leader.LeaderSelector; import org.apache.curator.framework.recipes.leader.LeaderSelectorListener; import org.apache.curator.framework.recipes.leader.LeaderSelectorListenerAdapter; import org.apache.curator.framework.recipes.leader.Participant; import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.retry.RetryNTimes; import org.apache.nifi.controller.cluster.ZooKeeperClientConfig; import org.apache.nifi.engine.FlowEngine; import org.apache.nifi.util.NiFiProperties; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.common.PathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class CuratorLeaderElectionManager implements LeaderElectionManager { private static final Logger logger = LoggerFactory.getLogger(CuratorLeaderElectionManager.class); private final FlowEngine leaderElectionMonitorEngine; private final ZooKeeperClientConfig zkConfig; private CuratorFramework curatorClient; private volatile boolean stopped = true; private final Map<String, LeaderRole> leaderRoles = new HashMap<>(); private final Map<String, RegisteredRole> registeredRoles = new HashMap<>(); public CuratorLeaderElectionManager(final int threadPoolSize, final NiFiProperties properties) { leaderElectionMonitorEngine = new FlowEngine(threadPoolSize, "Leader Election Notification", true); zkConfig = ZooKeeperClientConfig.createConfig(properties); } @Override public synchronized void start() { if (!stopped) { return; } stopped = false; curatorClient = createClient(); // Call #register for each already-registered role. This will // cause us to start listening for leader elections for that // role again for (final Map.Entry<String, RegisteredRole> entry : registeredRoles.entrySet()) { final RegisteredRole role = entry.getValue(); register(entry.getKey(), role.getListener(), role.getParticipantId()); } logger.info("{} started", this); } @Override public void register(String roleName, LeaderElectionStateChangeListener listener) { register(roleName, listener, null); } private String getElectionPath(final String roleName) { final String rootPath = zkConfig.getRootPath(); final String leaderPath = rootPath + (rootPath.endsWith("/") ? "" : "/") + "leaders/" + roleName; return leaderPath; } @Override public synchronized void register(final String roleName, final LeaderElectionStateChangeListener listener, final String participantId) { logger.debug("{} Registering new Leader Selector for role {}", this, roleName); // If we already have a Leader Role registered and either the Leader Role is participating in election, // or the given participant id == null (don't want to participant in election) then we're done. final LeaderRole currentRole = leaderRoles.get(roleName); if (currentRole != null && (currentRole.isParticipant() || participantId == null)) { logger.info("{} Attempted to register Leader Election for role '{}' but this role is already registered", this, roleName); return; } final String leaderPath = getElectionPath(roleName); try { PathUtils.validatePath(leaderPath); } catch (final IllegalArgumentException e) { throw new IllegalStateException("Cannot register leader election for role '" + roleName + "' because this is not a valid role name"); } registeredRoles.put(roleName, new RegisteredRole(participantId, listener)); final boolean isParticipant = participantId != null && !participantId.trim().isEmpty(); if (!isStopped()) { final ElectionListener electionListener = new ElectionListener(roleName, listener); final LeaderSelector leaderSelector = new LeaderSelector(curatorClient, leaderPath, leaderElectionMonitorEngine, electionListener); if (isParticipant) { leaderSelector.autoRequeue(); leaderSelector.setId(participantId); leaderSelector.start(); } final LeaderRole leaderRole = new LeaderRole(leaderSelector, electionListener, isParticipant); leaderRoles.put(roleName, leaderRole); } if (isParticipant) { logger.info("{} Registered new Leader Selector for role {}; this node is an active participant in the election.", this, roleName); } else { logger.info("{} Registered new Leader Selector for role {}; this node is a silent observer in the election.", this, roleName); } } @Override public synchronized void unregister(final String roleName) { registeredRoles.remove(roleName); final LeaderRole leaderRole = leaderRoles.remove(roleName); if (leaderRole == null) { logger.info("Cannot unregister Leader Election Role '{}' becuase that role is not registered", roleName); return; } final LeaderSelector leaderSelector = leaderRole.getLeaderSelector(); if (leaderSelector == null) { logger.info("Cannot unregister Leader Election Role '{}' becuase that role is not registered", roleName); return; } leaderSelector.close(); logger.info("This node is no longer registered to be elected as the Leader for Role '{}'", roleName); } @Override public synchronized void stop() { stopped = true; for (final Map.Entry<String, LeaderRole> entry : leaderRoles.entrySet()) { final LeaderRole role = entry.getValue(); final LeaderSelector selector = role.getLeaderSelector(); try { selector.close(); } catch (final Exception e) { logger.warn("Failed to close Leader Selector for {}", entry.getKey(), e); } } leaderRoles.clear(); if (curatorClient != null) { curatorClient.close(); curatorClient = null; } logger.info("{} stopped and closed", this); } @Override public boolean isStopped() { return stopped; } @Override public String toString() { return "CuratorLeaderElectionManager[stopped=" + isStopped() + "]"; } private synchronized LeaderRole getLeaderRole(final String roleName) { return leaderRoles.get(roleName); } @Override public boolean isLeader(final String roleName) { final LeaderRole role = getLeaderRole(roleName); if (role == null) { return false; } return role.isLeader(); } @Override public String getLeader(final String roleName) { if (isStopped()) { return determineLeaderExternal(roleName); } final LeaderRole role = getLeaderRole(roleName); if (role == null) { return determineLeaderExternal(roleName); } Participant participant; try { participant = role.getLeaderSelector().getLeader(); } catch (Exception e) { logger.debug("Unable to determine leader for role '{}'; returning null", roleName); return null; } if (participant == null) { return null; } final String participantId = participant.getId(); if (StringUtils.isEmpty(participantId)) { return null; } return participantId; } /** * Determines whether or not leader election has already begun for the role with the given name * * @param roleName the role of interest * @return <code>true</code> if leader election has already begun, <code>false</code> if it has not or if unable to determine this. */ @Override public boolean isLeaderElected(final String roleName) { final String leaderAddress = determineLeaderExternal(roleName); return !StringUtils.isEmpty(leaderAddress); } /** * Use a new Curator client to determine which node is the elected leader for the given role. * * @param roleName the name of the role * @return the id of the elected leader, or <code>null</code> if no leader has been selected or if unable to determine * the leader from ZooKeeper */ private String determineLeaderExternal(final String roleName) { final CuratorFramework client = createClient(); try { final LeaderSelectorListener electionListener = new LeaderSelectorListener() { @Override public void stateChanged(CuratorFramework client, ConnectionState newState) { } @Override public void takeLeadership(CuratorFramework client) throws Exception { } }; final String electionPath = getElectionPath(roleName); // Note that we intentionally do not auto-requeue here, and we do not start the selector. We do not // want to join the leader election. We simply want to observe. final LeaderSelector selector = new LeaderSelector(client, electionPath, electionListener); try { final Participant leader = selector.getLeader(); return leader == null ? null : leader.getId(); } catch (final KeeperException.NoNodeException nne) { // If there is no ZNode, then there is no elected leader. return null; } catch (final Exception e) { logger.warn("Unable to determine the Elected Leader for role '{}' due to {}; assuming no leader has been elected", roleName, e.toString()); if (logger.isDebugEnabled()) { logger.warn("", e); } return null; } } finally { client.close(); } } private CuratorFramework createClient() { // Create a new client because we don't want to try indefinitely for this to occur. final RetryPolicy retryPolicy = new RetryNTimes(1, 100); final CuratorFramework client = CuratorFrameworkFactory.builder() .connectString(zkConfig.getConnectString()) .sessionTimeoutMs(zkConfig.getSessionTimeoutMillis()) .connectionTimeoutMs(zkConfig.getConnectionTimeoutMillis()) .retryPolicy(retryPolicy) .defaultData(new byte[0]) .build(); client.start(); return client; } private static class LeaderRole { private final LeaderSelector leaderSelector; private final ElectionListener electionListener; private final boolean participant; public LeaderRole(final LeaderSelector leaderSelector, final ElectionListener electionListener, final boolean participant) { this.leaderSelector = leaderSelector; this.electionListener = electionListener; this.participant = participant; } public LeaderSelector getLeaderSelector() { return leaderSelector; } public boolean isLeader() { return electionListener.isLeader(); } public boolean isParticipant() { return participant; } } private static class RegisteredRole { private final LeaderElectionStateChangeListener listener; private final String participantId; public RegisteredRole(final String participantId, final LeaderElectionStateChangeListener listener) { this.participantId = participantId; this.listener = listener; } public LeaderElectionStateChangeListener getListener() { return listener; } public String getParticipantId() { return participantId; } } private class ElectionListener extends LeaderSelectorListenerAdapter implements LeaderSelectorListener { private final String roleName; private final LeaderElectionStateChangeListener listener; private volatile boolean leader; public ElectionListener(final String roleName, final LeaderElectionStateChangeListener listener) { this.roleName = roleName; this.listener = listener; } public boolean isLeader() { return leader; } @Override public void stateChanged(final CuratorFramework client, final ConnectionState newState) { logger.info("{} Connection State changed to {}", this, newState.name()); super.stateChanged(client, newState); } @Override public void takeLeadership(final CuratorFramework client) throws Exception { leader = true; logger.info("{} This node has been elected Leader for Role '{}'", this, roleName); if (listener != null) { try { listener.onLeaderElection(); } catch (final Exception e) { logger.error("This node was elected Leader for Role '{}' but failed to take leadership. Will relinquish leadership role. Failure was due to: {}", roleName, e); logger.error("", e); leader = false; Thread.sleep(1000L); return; } } // Curator API states that we lose the leadership election when we return from this method, // so we will block as long as we are not interrupted or closed. Then, we will set leader to false. try { while (!isStopped()) { try { Thread.sleep(100L); } catch (final InterruptedException ie) { logger.info("{} has been interrupted; no longer leader for role '{}'", this, roleName); Thread.currentThread().interrupt(); return; } } } finally { leader = false; logger.info("{} This node is no longer leader for role '{}'", this, roleName); if (listener != null) { try { listener.onLeaderRelinquish(); } catch (final Exception e) { logger.error("This node is no longer leader for role '{}' but failed to shutdown leadership responsibilities properly due to: {}", roleName, e.toString()); if (logger.isDebugEnabled()) { logger.error("", e); } } } } } } }