package com.xiaomi.infra.chronos.client;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import com.xiaomi.infra.chronos.client.ChronosClientWatcher;
import com.xiaomi.infra.chronos.generated.ChronosService;
/**
* ChronosClientWatcher listens to the change of master znode and always connect with the active
* chronos server.
*/
public class ChronosClientWatcher implements Watcher {
private static final Log LOG = LogFactory.getLog(ChronosClientWatcher.class);
private final String zkQuorum;
private final String baseZnode;
private final String masterZnode;
private final int sessionTimeout;
private final int connectRetryTimes;
private ZooKeeper zooKeeper;
private TTransport transport;
private TProtocol protocol;
private ChronosService.Client client;
/**
* Construct ChronosClientWatcher with properties.
*
* @param properties the properties of ChronosClientWatcher
* @throws IOException when error to connect ZooKeeper or ChronosServer
*/
public ChronosClientWatcher(Properties properties) throws IOException {
zkQuorum = properties.getProperty(ChronosClient.ZK_QUORUM, "127.0.0.1:2181");
baseZnode = "/chronos/" + properties.getProperty(ChronosClient.CLUSTER_NAME, "default-cluster");
masterZnode = baseZnode + "/master";
sessionTimeout = Integer.parseInt(properties.getProperty(ChronosClient.SESSION_TIMEOUT, "5000"));
connectRetryTimes = Integer.parseInt(properties.getProperty(ChronosClient.CONNECT_RETRY_TIMES, "10"));
connectZooKeeper();
connectChronosServer();
}
/**
* Initialize ZooKeeper object and connect with ZooKeeper with retries.
*
* @throws IOException when error to connect with ZooKeeper after retrying
*/
private void connectZooKeeper() throws IOException {
for (int i = 0; i <= connectRetryTimes; i++) {
try {
zooKeeper = new ZooKeeper(zkQuorum, sessionTimeout, this);
LOG.info("Connected ZooKeeper " + zkQuorum);
break;
} catch (IOException e) {
if (i == connectRetryTimes) {
throw new IOException("Can't connect ZooKeeper after retrying", e);
}
LOG.info("Exception to connect ZooKeeper, retry " + (i + 1) + " times");
}
}
}
/**
* Reconnect with ZooKeeper.
*
* @throws InterruptedException when interrupt close ZooKeeper object
* @throws IOException when error to connect with ZooKeeper
*/
private void reconnectZooKeeper() throws InterruptedException, IOException {
LOG.info("Try to reconnect ZooKeeper " + zkQuorum);
if (zooKeeper != null) {
zooKeeper.close();
}
connectZooKeeper();
}
/**
* Access ZooKeeper to get current master ChronosServer and connect with it.
*
* @throws IOException when error to access ZooKeeper or connect with ChronosServer
*/
private void connectChronosServer() throws IOException {
LOG.info("Try to connect chronos server");
byte[] hostPortBytes = getData(this, masterZnode);
if (hostPortBytes != null) {
String hostPort = new String(hostPortBytes); // e.g. 127.0.0.0_2181
LOG.info("Find the active chronos server in " + hostPort);
try {
transport = new TSocket(hostPort.split("_")[0], Integer.parseInt(hostPort.split("_")[1]));
transport.open();
protocol = new TBinaryProtocol(transport);
client = new ChronosService.Client(protocol);
} catch (TException e) {
new IOException("Exception to connect chronos server in " + hostPort);
}
} else {
throw new IOException("The data of " + masterZnode + " is null");
}
}
/**
* Reconnect with ChronosServer.
*
* @throws IOException when error to connect ChronosServer
*/
private void reconnectChronosServer() throws IOException {
LOG.info("Try to reconnect chronos server");
if (transport != null) {
transport.close();
}
connectChronosServer();
}
/**
* Send RPC request to get timestamp from ChronosServer. Use lazy strategy to detect failure.
* If request fails, reconnect ChronosServer. If request fails again, reconnect ZooKeeper.
*
* @param range the number of timestamps
* @return the first timestamp to use
* @throws IOException when error to connect ChronosServer or ZooKeeper
*/
public long getTimestamps(int range) throws IOException {
long timestamp;
try {
timestamp = client.getTimestamps(range);
} catch (TException e) {
LOG.info("Can't get timestamp, try to connect the active chronos server");
try {
reconnectChronosServer();
return client.getTimestamps(range);
} catch (Exception e1) {
LOG.info("Can't connect chronos server, try to connect ZooKeeper firstly");
try {
reconnectZooKeeper();
reconnectChronosServer();
return client.getTimestamps(range);
} catch (Exception e2) {
throw new IOException("Error to get timestamp after reconnecting ZooKeeper and chronos server", e2);
}
}
}
return timestamp;
}
/**
* Provider the convenient method to get single timestamp.
*
* @return the allocated timestamp
* @throws IOException when error to get timestamp from ChronosServer
*/
public long getTimestamp() throws IOException {
return getTimestamps(1);
}
/**
* Deal with connection event, just wait for a while when connected.
*
* @param event ZooKeeper events
*/
@Override
public void process(WatchedEvent event) {
if (LOG.isDebugEnabled()) {
LOG.info("Received ZooKeeper Event, " + "type=" + event.getType() + ", " + "state="
+ event.getState() + ", " + "path=" + event.getPath());
}
switch (event.getType()) {
case None: {
switch (event.getState()) {
case SyncConnected: {
try {
waitToInitZooKeeper(2000); // init zookeeper in another thread, wait for a while
} catch (Exception e) {
LOG.error("Error to init ZooKeeper object after sleeping 2000 ms, reconnect ZooKeeper");
try {
reconnectZooKeeper();
} catch (Exception e1) {
LOG.error("Error to reconnect with ZooKeeper", e1);
}
}
break;
}
default:
break;
}
break;
}
default:
break;
}
}
/**
* Wait to init ZooKeeper object, only sleep when it's null.
*
* @param maxWaitMillis the max sleep time
* @throws Exception if ZooKeeper object is still null
*/
public void waitToInitZooKeeper(long maxWaitMillis) throws Exception {
long finished = System.currentTimeMillis() + maxWaitMillis;
while (System.currentTimeMillis() < finished) {
if (this.zooKeeper != null) {
return;
}
try {
Thread.sleep(1);
} catch (InterruptedException e) {
throw new Exception(e);
}
}
throw new Exception();
}
/**
* Get the data from znode.
*
* @param chronosClientWatcher the ZooKeeper watcher
* @param znode the znode you want to access
* @return the byte array of value in znode
* @throws IOException when error to access ZooKeeper
*/
public byte[] getData(ChronosClientWatcher chronosClientWatcher, String znode)
throws IOException {
byte[] data = null;
for (int i = 0; i <= connectRetryTimes; i++) {
try {
data = chronosClientWatcher.getZooKeeper().getData(znode, null, null);
break;
} catch (Exception e) {
LOG.info("Exceptioin to get data from ZooKeeper, retry " + i +" times");
if (i == connectRetryTimes) {
throw new IOException("Error when getting data from " + znode + " after retrying");
}
}
}
return data;
}
/**
* Close the ZooKeeper object.
*/
public void close() {
if (zooKeeper != null) {
try {
zooKeeper.close();
} catch (InterruptedException e) {
LOG.error("Interrupt to close zookeeper connection", e);
}
}
}
public ZooKeeper getZooKeeper() {
return zooKeeper;
}
}