package com.xiaomi.infra.chronos.zookeeper;
import java.io.IOException;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import com.xiaomi.infra.chronos.ChronosServerWatcher;
/**
* Like ActiveMasterManager in HBase, FailoverWatcher implements master/backup servers switching
* with ZooKeeper. It will store the information of servers in znode and deal with the detail of
* blocking and notification for leader election.
*/
public class FailoverWatcher implements Watcher {
private static final Log LOG = LogFactory.getLog(FailoverWatcher.class);
protected final Properties properties;
protected final String baseZnode;
protected final String masterZnode;
protected final String backupServersZnode;
protected final String zkQuorum;
protected final int sessionTimeout;
protected final int connectRetryTimes;
protected final boolean isZkSecure;
protected final String zkAdmin;
protected final String jaasFile;
protected final String krb5File;
protected final HostPort hostPort;
protected ZooKeeper zooKeeper;
private final AtomicBoolean hasActiveServer = new AtomicBoolean(false);
/**
* Initialize FailoverWatcher with properties.
*
* @param properties the basic settings for FailoverWathcer
* @param canInitZnode should create the base znode or not
* @throws IOException throw when can't connect with ZooKeeper
*/
public FailoverWatcher(Properties properties, boolean canInitZnode) throws IOException {
this.properties = properties;
baseZnode = properties.getProperty(FailoverServer.BASE_ZNODE, "/failover-server");
masterZnode = baseZnode + "/master";
backupServersZnode = baseZnode + "/backup-servers";
zkQuorum = properties.getProperty(FailoverServer.ZK_QUORUM, "127.0.0.1:2181");
sessionTimeout = Integer.parseInt(properties
.getProperty(FailoverServer.SESSION_TIMEOUT, "5000"));
connectRetryTimes = Integer.parseInt(properties.getProperty(FailoverServer.CONNECT_RETRY_TIMES,
"10"));
isZkSecure = Boolean.parseBoolean(properties.getProperty(FailoverServer.ZK_SECURE, "false"));
zkAdmin = properties.getProperty(FailoverServer.ZK_ADMIN, "h_chronos_admin");
jaasFile = properties.getProperty(FailoverServer.JAAS_FILE, "../conf/jaas.conf");
krb5File = properties.getProperty(FailoverServer.KRB5_FILE, "/etc/krb5.conf");
String serverHost = properties.getProperty(FailoverServer.SERVER_HOST, "127.0.0.1");
int serverPort = Integer.parseInt(properties.getProperty(FailoverServer.SERVER_PORT, "10086"));
hostPort = new HostPort(serverHost, serverPort);
if (isZkSecure) {
LOG.info("Connect with secure ZooKeeper cluster, use " + jaasFile + " and " + krb5File);
System.setProperty("java.security.auth.login.config", jaasFile);
System.setProperty("java.security.krb5.conf", krb5File);
}
connectZooKeeper();
if (canInitZnode) {
initZnode();
}
}
/**
* Construct FailoverWatcher with properties, create znode by default.
*
* @param properties the properties of FailoverWatcher
* @throws IOException when error to construct FailoverWatcher
*/
public FailoverWatcher(Properties properties) throws IOException {
this(properties, true);
}
/**
* Connect with ZooKeeper with retries.
*
* @throws IOException when error to construct ZooKeeper object after retrying
*/
protected void connectZooKeeper() throws IOException {
LOG.info("Connecting ZooKeeper " + zkQuorum);
for (int i = 0; i <= connectRetryTimes; i++) {
try {
zooKeeper = new ZooKeeper(zkQuorum, sessionTimeout, this);
break;
} catch (IOException e) {
if (i == connectRetryTimes) {
throw new IOException("Can't connect ZooKeeper after retrying", e);
}
LOG.error("Exception to connect ZooKeeper, retry " + (i + 1) + " times");
}
}
}
/**
* Initialize the base znodes of chronos.
*/
protected void initZnode() {
try {
ZooKeeperUtil.createAndFailSilent(this, baseZnode);
ZooKeeperUtil.createAndFailSilent(this, backupServersZnode);
} catch (Exception e) {
LOG.fatal("Error to create znode " + baseZnode + " and " + backupServersZnode
+ ", exit immediately", e);
System.exit(0);
}
}
/**
* Override this mothod to deal with events for leader election.
*
* @param event the ZooKeeper event
*/
@Override
public void process(WatchedEvent event) {
if (LOG.isDebugEnabled()) {
LOG.debug("Received ZooKeeper Event, " + "type=" + event.getType() + ", " + "state="
+ event.getState() + ", " + "path=" + event.getPath());
}
switch (event.getType()) {
case None: {
processConnection(event);
break;
}
case NodeCreated: {
processNodeCreated(event.getPath());
break;
}
case NodeDeleted: {
processNodeDeleted(event.getPath());
break;
}
case NodeDataChanged: {
processDataChanged(event.getPath());
break;
}
case NodeChildrenChanged: {
processNodeChildrenChanged(event.getPath());
break;
}
default:
break;
}
}
/**
* Deal with connection event, exit current process if auth fails or session expires.
*
* @param event the ZooKeeper event
*/
protected void processConnection(WatchedEvent event) {
switch (event.getState()) {
case SyncConnected:
LOG.info(hostPort.getHostPort() + " sync connect from ZooKeeper");
try {
waitToInitZooKeeper(2000); // init zookeeper in another thread, wait for a while
} catch (Exception e) {
LOG.fatal("Error to init ZooKeeper object after sleeping 2000 ms, exit immediately");
System.exit(0);
}
break;
case Disconnected: // be triggered when kill the server or the leader of zk cluster
LOG.warn(hostPort.getHostPort() + " received disconnected from ZooKeeper");
break;
case AuthFailed:
LOG.fatal(hostPort.getHostPort() + " auth fail, exit immediately");
System.exit(0);
case Expired:
LOG.fatal(hostPort.getHostPort() + " received expired from ZooKeeper, exit immediately");
System.exit(0);
break;
default:
break;
}
}
/**
* Deal with create node event, just call the leader election.
*
* @param path which znode is created
*/
protected void processNodeCreated(String path) {
if (path.equals(masterZnode)) {
LOG.info(masterZnode + " created and try to become active master");
handleMasterNodeChange();
}
}
/**
* Deal with delete node event, just call the leader election.
*
* @param path which znode is deleted
*/
protected void processNodeDeleted(String path) {
if (path.equals(masterZnode)) {
LOG.info(masterZnode + " deleted and try to become active master");
handleMasterNodeChange();
}
}
/**
* Do nothing when data changes, should be overrided.
*
* @param path which znode's data is changed
*/
protected void processDataChanged(String path) {
}
/**
* Do nothing when children znode changes, should be overrided.
*
* @param path which znode's children is changed.
*/
protected void processNodeChildrenChanged(String path) {
}
/**
* Implement the logic of leader election.
*/
private void handleMasterNodeChange() {
try {
synchronized (hasActiveServer) {
if (ZooKeeperUtil.watchAndCheckExists(this, masterZnode)) {
// A master node exists, there is an active master
if (LOG.isDebugEnabled()) {
LOG.debug("A master is now available");
}
hasActiveServer.set(true);
} else {
// Node is no longer there, cluster does not have an active master
if (LOG.isDebugEnabled()) {
LOG.debug("No master available. Notifying waiting threads");
}
hasActiveServer.set(false);
// Notify any thread waiting to become the active master
hasActiveServer.notifyAll();
}
}
} catch (KeeperException ke) {
LOG.error("Received an unexpected KeeperException, aborting", ke);
}
}
/**
* Implement the logic of server to wait to become active master.
*
* @return false if error to wait to become active master
*/
public boolean blockUntilActive() {
while (true) {
try {
if (ZooKeeperUtil.createEphemeralNodeAndWatch(this, masterZnode, hostPort.getHostPort()
.getBytes())) {
// If we were a backup master before, delete our ZNode from the backup
// master directory since we are the active now
LOG.info("Deleting ZNode for " + backupServersZnode + "/" + hostPort.getHostPort()
+ " from backup master directory");
ZooKeeperUtil.deleteNodeFailSilent(this,
backupServersZnode + "/" + hostPort.getHostPort());
// We are the master, return
hasActiveServer.set(true);
LOG.info("Become active master in " + hostPort.getHostPort());
return true;
}
hasActiveServer.set(true);
/*
* Add a ZNode for ourselves in the backup master directory since we are not the active
* master. If we become the active master later, ActiveMasterManager will delete this node
* explicitly. If we crash before then, ZooKeeper will delete this node for us since it is
* ephemeral.
*/
LOG.info("Adding ZNode for " + backupServersZnode + "/" + hostPort.getHostPort()
+ " in backup master directory");
ZooKeeperUtil.createEphemeralNodeAndWatch(this,
backupServersZnode + "/" + hostPort.getHostPort(), hostPort.getHostPort().getBytes());
// we start the server with the same ip_port stored in master znode, that means we want to
// restart the server?
String msg;
byte[] bytes = ZooKeeperUtil.getDataAndWatch(this, masterZnode);
if (bytes == null) {
msg = ("A master was detected, but went down before its address "
+ "could be read. Attempting to become the next active master");
} else {
if (hostPort.getHostPort().equals(new String(bytes))) {
msg = ("Current master has this master's address, " + hostPort.getHostPort() + "; master was restarted? Deleting node.");
// Hurry along the expiration of the znode.
ZooKeeperUtil.deleteNode(this, masterZnode);
} else {
msg = "Another master " + new String(bytes) + " is the active master, "
+ hostPort.getHostPort() + "; waiting to become the next active master";
}
}
LOG.info(msg);
} catch (KeeperException ke) {
LOG.error("Received an unexpected KeeperException when block to become active, aborting",
ke);
return false;
}
synchronized (hasActiveServer) {
while (hasActiveServer.get()) {
try {
hasActiveServer.wait();
} catch (InterruptedException e) {
// We expect to be interrupted when a master dies, will fall out if so
if (LOG.isDebugEnabled()) {
LOG.debug("Interrupted while waiting to be master");
}
return false;
}
}
}
}
}
/**
* Close the ZooKeeper object.
*/
public void close() {
if (zooKeeper != null) {
try {
zooKeeper.close();
} catch (InterruptedException e) {
LOG.error("Interrupt when closing zookeeper connection", e);
}
}
}
/**
* Wait to init ZooKeeper object, only sleep when it's null.
*
* @param maxWaitMillis the max sleep time
* @throws Exception if ZooKeeper object is still null
*/
public void waitToInitZooKeeper(long maxWaitMillis) throws Exception {
long finished = System.currentTimeMillis() + maxWaitMillis;
while (System.currentTimeMillis() < finished) {
if (this.zooKeeper != null) {
return;
}
try {
Thread.sleep(1);
} catch (InterruptedException e) {
throw new Exception(e);
}
}
throw new Exception();
}
public String getBaseZnode() {
return baseZnode;
}
public String getMasterZnode() {
return masterZnode;
}
public String getBackupServersZnode() {
return backupServersZnode;
}
public ZooKeeper getZooKeeper() {
return zooKeeper;
}
public boolean hasActiveServer() {
return hasActiveServer.get();
}
public HostPort getHostPort() {
return hostPort;
}
public Properties getProperties() {
return properties;
}
public boolean isZkSecure() {
return isZkSecure;
}
public String getZkAdmin() {
return zkAdmin;
}
}