/**
* Copyright 2014 Comcast Cable Communications Management, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.comcast.viper.flume2storm.zookeeper;
import java.io.IOException;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A wrapper around Zookeeper to facilitate usage, especially related to
* connection management. The configuration can be changed at any time. The
* change may not take effect necessarily right away, but on the next request.
* The point of the ZkClient is to keep the connection to ZooKeeper alive and
* well.
*/
public class ZkClient {
protected static final Logger LOG = LoggerFactory.getLogger(ZkClient.class);
protected final AtomicReference<ZkClientConfiguration> config;
protected final ReadWriteLock stateSync;
protected ZkClientState state;
// This state is related to the connection manager
protected final AtomicBoolean started;
protected final ZkClientListener zkClientListener;
protected final Queue<ZkClientStateListener> stateListeners;
protected ConnectionManager connectionManager;
// protected final Object zkSync;
protected final AtomicReference<ZooKeeper> zookeeper;
/**
* Construct a new ZkClient without listener
*/
public ZkClient() {
this(null);
}
/**
* Construct a new ZkClient
*
* @param listener
* The {@link ZkClientListener}. Can have only one of these, but it
* may be null.
*/
public ZkClient(final ZkClientListener listener) {
stateSync = new ReentrantReadWriteLock();
state = ZkClientState.STOPPED;
started = new AtomicBoolean(false);
config = new AtomicReference<ZkClientConfiguration>(new ZkClientConfiguration());
zkClientListener = listener;
stateListeners = new ConcurrentLinkedQueue<ZkClientStateListener>();
zookeeper = new AtomicReference<ZooKeeper>();
// zkSync = new Object();
}
/**
* @param configuration
* The new configuration to set
*/
public void configure(final ZkClientConfiguration configuration) {
config.set(configuration);
}
/**
* Starts ZkClient
*
* @return True if the ZkClient has been started (actually, merely requested
* to start). False if it was actually already started
*/
public synchronized boolean start() {
if (started.get()) {
LOG.warn("Already started");
return false;
}
LOG.debug("Starting...");
started.set(true);
connectionManager = new ConnectionManager();
addStateListener(connectionManager);
connectionManager.start();
LOG.info("Started");
return true;
}
/**
* Stops ZkClient
*
* @return True if the ZkClient has been stopped. False if it was actually
* already stopped
*/
public synchronized boolean stop() {
if (!started.get()) {
LOG.warn("Already stopped");
return false;
}
LOG.debug("Stopping...");
started.set(false);
try {
if (connectionManager != null) {
connectionManager.interrupt();
connectionManager.join(config.get().getTerminationTimeout());
}
} catch (final InterruptedException e) {
LOG.error("Failed to stop the connection manager thread");
}
removeStateListener(connectionManager);
LOG.info("Stopped");
return true;
}
/**
* @return The current configuration
*/
public ZkClientConfiguration getConfiguration() {
return config.get();
}
//
// State management
//
protected void setState(final ZkClientState newState) {
final ZkClientState oldState;
try {
stateSync.writeLock().lock();
assert state != null : "The state is never null";
// No change?
if (state.equals(newState)) {
LOG.debug("State unchanged to: {}", newState);
return;
}
// Recording the state change
oldState = state;
state = newState;
} finally {
stateSync.writeLock().unlock();
}
LOG.debug("State changed: {} -> {}", oldState, newState);
// Calling the listeners
assert oldState != null : "The old state is never null either";
for (final ZkClientStateListener listener : stateListeners) {
listener.onStateChange(oldState, newState);
}
}
/**
* @return The current state of the {@link ZkClient}
*/
public ZkClientState getState() {
try {
stateSync.readLock().lock();
return state;
} finally {
stateSync.readLock().unlock();
}
}
/**
* @return The session timeout once negotiated with the ZK server. If not
* connected to the server, it returns null
*/
public Integer getNegotiatedSessionTimeout() {
if (!getState().isStarted()) {
return null;
}
final ZkSession session = connectionManager.currentSession.get();
return session != null ? session.getTimeout() : null;
}
/**
* Be careful with this please!
*
* @return The {@link ZooKeeper}, or null if not active
*/
protected ZooKeeper getZooKeeper() {
return zookeeper.get();
}
/**
* @param listener
* The state listener to add
*/
public void addStateListener(final ZkClientStateListener listener) {
stateListeners.add(listener);
}
/**
* @param listener
* The state listener to remove
*/
public void removeStateListener(final ZkClientStateListener listener) {
stateListeners.remove(listener);
}
//
// Connection management (where the fun begins...)
//
/**
* The connection manager thread. <br />
* The way ZK connection seems to work is that there are 2 sockets being
* established, but only on the second one does the session gets actually
* fully created. This means that sometimes, the getSessionId call returns 0
* for up to a couple of seconds (the time for the second socket to be
* established). So in the main loop thread (i.e. ConnectionManager), we wait
* for the session to be fully established before continuing.
*/
protected class ConnectionManager extends Thread implements ZkClientStateListener {
// Programming note: current session NEVER points to null and ALWAYS to
// an immutable (as in "thread-safe") object
protected final AtomicReference<ZkSession> currentSession;
public ConnectionManager() {
setName("ConnectionManager");
setDaemon(true);
currentSession = new AtomicReference<ZkSession>(ZkSession.NO_SESSION);
}
private final void setZkSession() {
final ZooKeeper zk = zookeeper.get();
if (zk != null) {
currentSession.set(ZkSession.build(zk.getSessionId(), zk.getSessionPasswd(), zk.getSessionTimeout()));
LOG.debug("Stored Zk Session: {}", currentSession);
}
}
protected final void clearZkSession() {
if (currentSession.get().isSet()) {
LOG.debug("Clearing Zk Session: {}", currentSession);
currentSession.set(ZkSession.NO_SESSION);
}
}
/**
* This is the connection manager thread, where we mostly wait (so that we
* don't do it in the event thread - i.e. in the connection watcher process
* method)
*
* @see java.lang.Thread#run()
*/
@Override
public void run() {
LOG.info("Thread started");
boolean exitMainLoop = false;
while (!exitMainLoop) {
try {
switch (ZkClient.this.getState()) {
case STOPPED:
setState(ZkClientState.CONNECTING);
break;
//
case CONNECTING:
if (started.get()) {
try {
Thread.sleep(100);
} catch (final InterruptedException e) {
// Nothing to do
}
} else {
/*
* Programming note: we didn't initialize, so we shouldn't call
* the clean up
*/
exitMainLoop = true;
}
break;
//
case CONNECTED:
if (started.get()) {
final long t0 = System.currentTimeMillis();
while (zookeeper.get().getSessionId() == 0
&& System.currentTimeMillis() - t0 < config.get().getConnectionTimeout() && started.get()) {
Thread.sleep(100);
}
if (zookeeper.get().getSessionId() == 0) {
LOG.error("Failed to retrieve Zookeeper session id");
setState(ZkClientState.DISCONNECTING);
} else {
final boolean isRecovery = currentSession.get().sameAs(zookeeper.get().getSessionId());
setZkSession();
if (isRecovery) {
setState(ZkClientState.RECOVERING);
} else {
setState(ZkClientState.INITIALIZING);
}
}
} else {
/*
* Programming note: we didn't initialize, so we shouldn't call
* the clean up
*/
exitMainLoop = true;
}
break;
//
case INITIALIZING:
try {
if (zkClientListener != null) {
zkClientListener.initialize();
}
setState(ZkClientState.SETUP);
} catch (final Exception e) {
LOG.error("Failed to initialize ZkClient", e);
setState(ZkClientState.DISCONNECTING);
}
break;
//
case RECOVERING:
LOG.debug("Using Zk session: {}", currentSession);
setState(ZkClientState.SETUP);
break;
//
case SETUP:
if (started.get()) {
try {
Thread.sleep(100);
} catch (final InterruptedException e) {
// Nothing to do
}
} else {
setState(ZkClientState.CLEANING_UP);
}
break;
//
case CLEANING_UP:
try {
if (zkClientListener != null) {
zkClientListener.terminate();
}
} catch (final Exception e) {
LOG.error("Failed to clean up ZkClient", e);
}
setState(ZkClientState.DISCONNECTING);
break;
//
case DISCONNECTING:
try {
disconnect();
clearZkSession();
} catch (final Exception e) {
LOG.error("Failed to disconnect properly ZkClient from ZK quorum", e);
}
setState(ZkClientState.DISCONNECTED);
break;
//
case DISCONNECTED:
if (started.get()) {
try {
Thread.sleep(config.get().getReconnectionDelay());
} catch (final InterruptedException e) {
// Nothing to do
}
} else {
exitMainLoop = true;
}
break;
//
default:
throw new AssertionError("Forgetting states?");
}
} catch (final Exception e) {
LOG.error("Unexpected failure: " + e.getLocalizedMessage(), e);
}
}
setState(ZkClientState.STOPPED);
LOG.info("Thread terminated");
}
/**
* @see com.comcast.viper.flume2storm.zookeeper.ZkClientStateListener#onStateChange(com.comcast.viper.flume2storm.zookeeper.ZkClientState,
* com.comcast.viper.flume2storm.zookeeper.ZkClientState)
*/
@Override
public void onStateChange(final ZkClientState previousState, final ZkClientState newState) {
switch (newState) {
case CONNECTING:
// Programming note: the connected state is reached using ZK
// event
if (!connect()) {
setState(ZkClientState.DISCONNECTED);
}
break;
case CONNECTED:
if (zkClientListener != null) {
zkClientListener.onConnection();
}
break;
case RECOVERING:
if (zkClientListener != null) {
zkClientListener.onConnection();
}
break;
case DISCONNECTED:
if (zkClientListener != null) {
zkClientListener.onDisconnection();
}
break;
default:
break;
}
}
protected boolean connect() {
final ZkClientConfiguration configuration = config.get();
final ZkSession session = ZkSession.copyOf(currentSession.get());
try {
if (session.isSet()) {
LOG.debug("Reconnecting to ZK server using old session: {}...", session);
zookeeper.set(new ZooKeeper(configuration.getConnectionStr(), configuration.getSessionTimeout(),
connectionWatcher, session.getSessionId(), session.getSessionPwd()));
} else {
LOG.debug("Connecting to ZK server using new session...");
zookeeper.set(new ZooKeeper(configuration.getConnectionStr(), configuration.getSessionTimeout(),
connectionWatcher));
}
return true;
} catch (final IOException e) {
LOG.error("Failed to connection ZK quorum (to: " + configuration.getConnectionStr() + ")", e);
return false;
}
}
// TODO verify that the connection watcher keeps triggering after
// connection
protected final Watcher connectionWatcher = new Watcher() {
/**
* @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.WatchedEvent)
*/
@Override
public void process(final WatchedEvent event) {
LOG.debug("Got ZK event: {} from thread id: {}", event, Thread.currentThread().getId());
switch (event.getState()) {
case SyncConnected:
switch (ZkClient.this.getState()) {
case CONNECTING:
/*
* Regular connection: our (ZkClient) connection attempt has
* been successful
*/
setState(ZkClientState.CONNECTED);
break;
case DISCONNECTED:
/*
* Regular zookeeper reconnection: The zookeeper client
* re-established the connection by itself
*/
setState(ZkClientState.RECOVERING);
break;
default:
// Ignoring all other cases
break;
}
break;
case Disconnected:
setState(ZkClientState.DISCONNECTED);
break;
case Expired:
/*
* The session expired - this may happen when something went wrong
* in the comm between the client and the server (but both were
* still running). Zookeeper will attempt to reconnect
*/
LOG.warn("*** Detected session expiration, reconnecting...");
clearZkSession();
if (ZkClient.this.getState() == ZkClientState.DISCONNECTED) {
setState(ZkClientState.CONNECTING);
}
break;
case AuthFailed:
LOG.error("Authorization failure: {}", event);
break;
default:
throw new AssertionError("whoops!");
}
}
};
protected void disconnect() {
try {
final ZooKeeper zk = zookeeper.get();
if (zk != null) {
zk.close();
}
} catch (final InterruptedException e) {
LOG.error("Failed to disconnection ZK quorum", e);
}
}
}
}