/* * Copyright (C) 2012 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.zookeeper.app; import com.facebook.concurrency.ErrorLoggingRunnable; import com.facebook.concurrency.NamedThreadFactory; import com.facebook.zookeeper.connection.ZkConnectionManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooKeeper; import java.util.EnumMap; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; /** * ZkApplication is an abstract base class that provides a template-and-hook * style ZooKeeper state management framework. * * Features: * - Automatic connection and reconnection to ZooKeeper on disconnects and * expirations. * - Issues initialize(), repair(), and expire() callbacks to the application * as various connection events occur. * - Models application state with a finite state machine. States are queryable * from subclasses. * * =============================== STATES =============================== * * PRESTART: * This is the application's initial state before the user issues a start() * command to begin the application * * DISCONNECTED: * Application is not connected to ZooKeeper and does not have any * application state set in ZooKeeper (e.g. watches or ephemeral nodes). * Entry to this state automatically triggers a connection loop that retries * until successful. * * CONNECTED: * Application is connected to ZooKeeper, but application state in ZooKeeper * has not been fully initialized. Entry to this state automatically triggers * callbacks to the initialize() abstract method, looping until initialize() * returns true. * * FUNCTIONAL: * Application is connected to ZooKeeper and has full application state * initialized in ZooKeeper. At this point the application should be fully * functioning. * * SAFEMODE: * Application was successfully initialized, but became disconnected. Since * many applications will cache ZooKeeper state, this state signifies a * cache read-only mode where ZooKeeper is unavailable. Entry to this * state will automatically trigger a connection loop that retries until * reconnected, or until the session expires. * * SAFEMODE_REPAIR: * Application in safemode was successfully reconnected to ZooKeeper without * expiration. Entry to this state will automatically trigger callbacks to the * repair() abstract method, looping until repair() returns true. While * initialize() and repair() may do the same things, we make this distinction * as it is often possible to optimize the repair method to only repeat failed * commands. * * SHUTDOWN: * Application has been shut down by the user via the shutdown() method. * * ======================= STATE TRANSITION DIAGRAM ===================== * * The state transition diagram appears as follows: * * PRESTART (expire*) * | (start) | * v | * DISCONNECTED <---- --> SAFEMODE * | (connect) / | (connect) * v / v * CONNECTED (dc'ed) / SAFEMODE_REPAIR * \ / / * \ / / * (init) \ / / (repair) * \ / / * v / / * FUNCTIONAL <----- * * * (shutdown*) -----> SHUTDOWN * * NOTE: '*' denotes an event that unconditionally leads to a specific state, * regardles of the pre-existing state. */ public abstract class ZkApplication { private static final Logger APP_LOG = LoggerFactory.getLogger(ZkApplication.class); protected final ZkConnectionManager zkConnectionManager; private final ExecutorService watchExecutor; private final ScheduledExecutorService retryExecutor; private final long retryIntervalMillis; private final CountDownLatch initLatch = new CountDownLatch(1); private final StateContext context = new StateContext(); private volatile boolean isStarted = false; public enum State { PRESTART, DISCONNECTED, CONNECTED, FUNCTIONAL, SAFEMODE, SAFEMODE_REPAIR, SHUTDOWN, } // This constructor only exposes the executors for unit testing purposes protected ZkApplication( ZkConnectionManager zkConnectionManager, long retryIntervalMillis, ExecutorService watchExecutor, ScheduledExecutorService retryExecutor ) { this.zkConnectionManager = zkConnectionManager; this.retryIntervalMillis = retryIntervalMillis; this.watchExecutor = watchExecutor; this.retryExecutor = retryExecutor; } protected ZkApplication( ZkConnectionManager zkConnectionManager, long retryIntervalMillis ) { this( zkConnectionManager, retryIntervalMillis, Executors.newSingleThreadExecutor( new NamedThreadFactory("ZkApplication-watch") ), Executors.newSingleThreadScheduledExecutor( new NamedThreadFactory("ZkApplication-retry") ) ); } protected ZkApplication(ZkConnectionManager zkConnectionManager) { this(zkConnectionManager, 2000); } // Instance must be started before it becomes valid for use public synchronized void start() { if (isStarted) { throw new IllegalStateException("Should only be started once"); } ZooKeeper.States zkState = zkConnectionManager.registerWatcher(new ConnectionWatcher()); // Synchronize our application state with ZooKeeper state context.start((zkState == ZooKeeper.States.CONNECTED) ? State.CONNECTED : State.DISCONNECTED ); isStarted = true; // Allow watch signals to pass only after initialization initLatch.countDown(); } public boolean isFunctional() { return context.getState() == State.FUNCTIONAL; } public boolean isSafeMode() { return context.getState() == State.SAFEMODE || context.getState() == State.SAFEMODE_REPAIR; } public boolean isShutdown() { return context.getState() == State.SHUTDOWN; } public synchronized void shutdown() { if (!isStarted) { throw new IllegalStateException("Application not yet started"); } context.shutdown(); watchExecutor.shutdown(); retryExecutor.shutdown(); } // Derived classes need to provide implementations for the following methods: /** * Initializes the application such that it is fully functioning. * Implementations should be idempotent, and will be retried repeatedly * until it succeeds. * @return true if successfully initialized, false otherwise */ protected abstract boolean initialize(); /** * Repair the application state to a fully functioning state following * a ZooKeeper disconnect. Implementations should be idempotent, and will * be retried repeatedly until it succeeds. * @return true if successfully repaired, false otherwise */ protected abstract boolean repair(); /** * Cleans up application state as necessary following a ZooKeeper session * expiration. Implementations should be idempotent. */ protected abstract void expire(); // Internal helper classes private class ConnectionWatcher implements Watcher { @Override public void process(final WatchedEvent event) { watchExecutor.execute(new ErrorLoggingRunnable(new Runnable() { @Override public void run() { try { initLatch.await(); // Wait until we have been fully initialized } catch (InterruptedException e) { APP_LOG.error("Init latch interrupted, continuing..."); Thread.currentThread().interrupt(); } context.handleEvent(event.getState()); } })); } } private interface StateHandler { void handleEvent(Watcher.Event.KeeperState event); void inboundHook(); void outboundHook(); } private class StateContext { private volatile State state = State.PRESTART; private final Object transitionLock = new Object(); private final Map<State, StateHandler> handlerCache = new EnumMap<State, StateHandler>(State.class); private StateContext() { // Should be one entry per possible state handlerCache.put(State.PRESTART, new PreStartStateHandler()); handlerCache.put(State.DISCONNECTED, new DisconnectedStateHandler()); handlerCache.put(State.CONNECTED, new ConnectedStateHandler()); handlerCache.put(State.FUNCTIONAL, new FunctionalStateHandler()); handlerCache.put(State.SAFEMODE, new SafeModeStateHandler()); handlerCache.put(State.SAFEMODE_REPAIR, new SafeModeRepairStateHandler()); handlerCache.put(State.SHUTDOWN, new ShutdownStateHandler()); } // Instance must be started before it becomes valid for use public void start(State initialState) { synchronized (transitionLock) { transition(initialState); } } public void handleEvent(Watcher.Event.KeeperState event) { synchronized (transitionLock) { getHandler().handleEvent(event); } } public void shutdown() { synchronized (transitionLock) { if (state == State.SHUTDOWN) { APP_LOG.warn("Multiple shutdown calls"); return; } internalTransition(State.SHUTDOWN); } } public State getState() { return state; } private StateHandler getHandler() { return handlerCache.get(state); } private void transition(State newState) { if (newState == State.SHUTDOWN) { throw new IllegalArgumentException( "Set SHUTDOWN state by calling the shutdown method" ); } // Shutdown is a terminal state if (state != State.SHUTDOWN) { internalTransition(newState); } } private void internalTransition(State newState) { getHandler().outboundHook(); state = newState; getHandler().inboundHook(); } // StateHandler implementations private class PreStartStateHandler implements StateHandler { @Override public void handleEvent(Watcher.Event.KeeperState event) { } @Override public void inboundHook() { } @Override public void outboundHook() { } } private class DisconnectedStateHandler implements StateHandler { @Override public void handleEvent(Watcher.Event.KeeperState event) { switch (event) { case SyncConnected: context.transition(State.CONNECTED); break; case Disconnected: break; case Expired: break; } } @Override public void inboundHook() { } @Override public void outboundHook() { } } private class ConnectedStateHandler implements StateHandler { private volatile boolean isActive = false; private volatile boolean isScheduled = false; private final Object scheduleCheckLock = new Object(); @Override public void handleEvent(Watcher.Event.KeeperState event) { switch (event) { case SyncConnected: break; case Disconnected: context.transition(State.DISCONNECTED); break; case Expired: context.transition(State.DISCONNECTED); expire(); break; } } @Override public void inboundHook() { startInitLoop(); } private void startInitLoop() { isActive = true; if (scheduleCompareAndSet()) { retryExecutor.execute(new ErrorLoggingRunnable(new Runnable() { @Override public void run() { try { synchronized (transitionLock) { // Only initialize if we are in the same state if (context.getState() == State.CONNECTED) { // Attempt initialization if (initialize()) { context.transition(State.FUNCTIONAL); return; // Success, don't reschedule } } } } finally { isScheduled = false; } if (scheduleCompareAndSet()) { retryExecutor.schedule( this, retryIntervalMillis, TimeUnit.MILLISECONDS ); } } })); } } private boolean scheduleCompareAndSet() { synchronized (scheduleCheckLock) { if (isActive && !isScheduled) { isScheduled = true; return true; } return false; } } @Override public void outboundHook() { stopRepairLoop(); } private void stopRepairLoop() { isActive = false; } } private class FunctionalStateHandler implements StateHandler { @Override public void handleEvent(Watcher.Event.KeeperState event) { switch (event) { case SyncConnected: break; case Disconnected: context.transition(State.SAFEMODE); break; case Expired: context.transition(State.DISCONNECTED); expire(); break; } } @Override public void inboundHook() { } @Override public void outboundHook() { } } private class SafeModeStateHandler implements StateHandler { public void handleEvent(Watcher.Event.KeeperState event) { switch (event) { case SyncConnected: context.transition(State.SAFEMODE_REPAIR); break; case Disconnected: break; case Expired: context.transition(State.DISCONNECTED); expire(); break; } } @Override public void inboundHook() { } @Override public void outboundHook() { } } private class SafeModeRepairStateHandler implements StateHandler { private volatile boolean isActive = false; private volatile boolean isScheduled = false; private final Object scheduleCheckLock = new Object(); public void handleEvent(Watcher.Event.KeeperState event) { switch (event) { case SyncConnected: break; case Disconnected: context.transition(State.SAFEMODE); break; case Expired: context.transition(State.DISCONNECTED); expire(); break; } } @Override public void inboundHook() { startRepairLoop(); } private void startRepairLoop() { isActive = true; if (scheduleCompareAndSet()) { retryExecutor.execute(new ErrorLoggingRunnable(new Runnable() { @Override public void run() { try { synchronized (transitionLock) { // Only repair if we are still in the same state if (context.getState() == State.SAFEMODE_REPAIR) { // Attempt repair if (repair()) { context.transition(State.FUNCTIONAL); return; // Success, don't reschedule } } } } finally { isScheduled = false; } if (scheduleCompareAndSet()) { retryExecutor.schedule( this, retryIntervalMillis, TimeUnit.MILLISECONDS ); } } })); } } private boolean scheduleCompareAndSet() { synchronized (scheduleCheckLock) { if (isActive && !isScheduled) { isScheduled = true; return true; } return false; } } @Override public void outboundHook() { stopRepairLoop(); } private void stopRepairLoop() { isActive = false; } } private class ShutdownStateHandler implements StateHandler { @Override public void handleEvent(Watcher.Event.KeeperState event) { // Terminal state } @Override public void inboundHook() { expire(); } @Override public void outboundHook() { } } } }