/*
* Copyright 2010 Outerthought bvba
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.util.zookeeper;
import static org.apache.zookeeper.Watcher.Event.KeeperState.Disconnected;
import static org.apache.zookeeper.Watcher.Event.KeeperState.Expired;
import static org.apache.zookeeper.Watcher.Event.KeeperState.SyncConnected;
import static org.apache.zookeeper.ZooKeeper.States.CONNECTED;
import java.io.IOException;
import javax.annotation.PreDestroy;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
/**
* This implementation of {@link ZooKeeperItf} is meant for use as a global ZooKeeper handle
* within a ZK-dependent application.
*
* <p>It will:
*
* <ul>
* <li>on startup (= constructor) wait for the ZK connection to come up, if it does not
* come up within the session timeout an exception will be thrown. This avoids the
* remainder of the application starting up in the absence of a valid ZK connection.
* <li>when the session expires or the ZK connection is lost for longer than the session
* timeout, it will shut down the application.
* </ul>
*
* <p>So this is a good solution for applications which can not function in absence of ZooKeeper.
*/
public class StateWatchingZooKeeper extends ZooKeeperImpl {
private Log log = LogFactory.getLog(getClass());
private int requestedSessionTimeout;
private int sessionTimeout;
/**
* Ready becomes true once the ZooKeeper delegate has been set.
*/
private volatile boolean ready;
private volatile boolean stopping;
private volatile boolean connected;
private boolean firstConnect = true;
private Thread stateWatcherThread;
private Runnable endProcessHook;
public StateWatchingZooKeeper(String connectString, int sessionTimeout) throws IOException {
this(connectString, sessionTimeout, sessionTimeout);
}
public StateWatchingZooKeeper(String connectString, int sessionTimeout, int startupTimeOut) throws IOException {
super(connectString, sessionTimeout);
this.requestedSessionTimeout = sessionTimeout;
this.sessionTimeout = sessionTimeout;
ZooKeeper zk = new ZooKeeper(connectString, sessionTimeout, new MyWatcher());
setDelegate(zk);
ready = true;
// Wait for connection to come up: if we fail to connect to ZK now, we do not want to continue
// starting up the Lily node.
long waitUntil = System.currentTimeMillis() + startupTimeOut;
int count = 0;
while (zk.getState() != CONNECTED && waitUntil > System.currentTimeMillis()) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
break;
}
count++;
if (count == 30) {
// Output a message every 3s
log.info("Waiting for ZooKeeper connection to be established");
count = 0;
}
}
if (zk.getState() != CONNECTED) {
stopping = true;
try {
zk.close();
} catch (Throwable t) {
// ignore
}
throw new IOException("Failed to connect with Zookeeper within timeout " + startupTimeOut +
", connection string: " + connectString);
}
log.info("ZooKeeper session ID is 0x" + Long.toHexString(zk.getSessionId()));
}
@Override
@PreDestroy
public void shutdown() {
super.shutdown();
stopping = true;
if (stateWatcherThread != null) {
stateWatcherThread.interrupt();
}
close();
}
public void setEndProcessHook(Runnable endProcessHook) {
this.endProcessHook = endProcessHook;
}
private void endProcess(String message) {
if (stopping) {
return;
}
if (endProcessHook != null) {
endProcessHook.run();
}
super.shutdown();
log.error(message);
System.err.println(message);
System.exit(1);
}
private class MyWatcher implements Watcher {
@Override
public void process(WatchedEvent event) {
if (stopping) {
return;
}
zkEventThread = Thread.currentThread();
try {
if (event.getState() == Expired) {
endProcess("ZooKeeper session expired, shutting down.");
} else if (event.getState() == Disconnected) {
log.warn("Disconnected from ZooKeeper");
connected = false;
waitForZk();
if (stateWatcherThread != null) {
stateWatcherThread.interrupt();
}
stateWatcherThread = new Thread(new StateWatcher(), "LilyZkStateWatcher");
stateWatcherThread.start();
} else if (event.getState() == SyncConnected) {
if (firstConnect) {
firstConnect = false;
// For the initial connection, it is not interesting to log that we are connected.
} else {
log.warn("Connected to ZooKeeper");
}
connected = true;
waitForZk();
if (stateWatcherThread != null) {
stateWatcherThread.interrupt();
stateWatcherThread = null;
}
int negotiatedSessionTimeout = getSessionTimeout();
// It could be that we again lost the ZK connection by now, in which case getSessionTimeout() will
// return 0, and sessionTimeout should not be set to 0 since it is used to decide to shut down (see
// StateWatcher thread).
sessionTimeout = negotiatedSessionTimeout > 0 ? negotiatedSessionTimeout : requestedSessionTimeout;
if (negotiatedSessionTimeout == 0) {
// We could consider not even distributing this event further, but not sure about that, so
// just logging it for now.
log.info("The negotiated ZooKeeper session timeout is " + negotiatedSessionTimeout + ", which" +
"indicates that the connection has been lost again.");
} else if (sessionTimeout != requestedSessionTimeout) {
log.info("The negotiated ZooKeeper session timeout is different from the requested one." +
" Requested: " + requestedSessionTimeout + ", negotiated: " + sessionTimeout);
}
}
} catch (InterruptedException e) {
// someone wants us to stop
return;
}
setConnectedState(event);
for (Watcher watcher : additionalDefaultWatchers) {
watcher.process(event);
}
}
private void waitForZk() throws InterruptedException {
while (!ready) {
log.debug("Still waiting for reference to ZooKeeper.");
Thread.sleep(5);
}
}
}
private class StateWatcher implements Runnable {
private long startNotConnected;
@Override
public void run() {
startNotConnected = System.currentTimeMillis();
while (true) {
// We do not use ZooKeeper.getState() here, because I noticed that when we get a DisConnected
// event in the watcher, the state still takes some time to move to CONNECTING.
if (connected) {
// We are connected again, so we should not longer bother watching the state
return;
}
// Using a margin of twice the session timeout per
// http://markmail.org/thread/uvefxjnuliuqwwph
int margin = sessionTimeout * 2;
if (startNotConnected + margin < System.currentTimeMillis()) {
endProcess("ZooKeeper connection lost for longer than " + margin +
" ms. Session will already be expired by server so shutting down.");
return;
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
// Someone requested us to stop
return;
}
}
}
}
}