/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.jstorm.schedule;
import java.util.*;
import com.alibaba.jstorm.blobstore.BlobStore;
import com.alibaba.jstorm.blobstore.BlobStoreUtils;
import com.alibaba.jstorm.blobstore.BlobSynchronizer;
import com.alibaba.jstorm.blobstore.LocalFsBlobStore;
import com.alibaba.jstorm.callback.Callback;
import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.jstorm.callback.RunnableCallback;
import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.Cluster;
import com.alibaba.jstorm.cluster.StormClusterState;
import com.alibaba.jstorm.daemon.nimbus.NimbusData;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.NetWorkUtils;
import backtype.storm.Config;
import backtype.storm.utils.Utils;
public class FollowerRunnable implements Runnable {
private static final Logger LOG = LoggerFactory.getLogger(FollowerRunnable.class);
private NimbusData data;
private int sleepTime;
private volatile boolean state = true;
private RunnableCallback blobSyncCallback;
private Callback leaderCallback;
private final String hostPort;
public static final String NIMBUS_DIFFER_COUNT_ZK = "nimbus.differ.count.zk";
public static final Integer SLAVE_NIMBUS_WAIT_TIME = 60;
@SuppressWarnings("unchecked")
public FollowerRunnable(final NimbusData data, int sleepTime, Callback leaderCallback) {
this.data = data;
this.sleepTime = sleepTime;
this.leaderCallback = leaderCallback;
boolean isLocalIp;
if (!ConfigExtension.isNimbusUseIp(data.getConf())) {
this.hostPort = NetWorkUtils.hostname() + ":" + Utils.getInt(data.getConf().get(Config.NIMBUS_THRIFT_PORT));
isLocalIp = NetWorkUtils.hostname().equals("localhost");
} else {
this.hostPort = NetWorkUtils.ip() + ":" + Utils.getInt(data.getConf().get(Config.NIMBUS_THRIFT_PORT));
isLocalIp = NetWorkUtils.ip().equals("127.0.0.1");
}
try {
if (isLocalIp) {
throw new Exception("the hostname which nimbus get is localhost");
}
} catch (Exception e1) {
LOG.error("failed to get nimbus host!", e1);
throw new RuntimeException(e1);
}
try {
data.getStormClusterState().update_nimbus_slave(hostPort, data.uptime());
data.getStormClusterState().update_nimbus_detail(hostPort, null);
} catch (Exception e) {
LOG.error("failed to register nimbus host!", e);
throw new RuntimeException();
}
StormClusterState zkClusterState = data.getStormClusterState();
try {
if (!zkClusterState.leader_existed()) {
this.tryToBeLeader(data.getConf());
}
} catch (Exception e) {
LOG.error("failed to register nimbus details!", e);
throw new RuntimeException();
}
try {
if (!zkClusterState.leader_existed()) {
this.tryToBeLeader(data.getConf());
}
} catch (Exception e1) {
try {
data.getStormClusterState().unregister_nimbus_host(hostPort);
data.getStormClusterState().unregister_nimbus_detail(hostPort);
} catch (Exception e2) {
LOG.info("remove registered nimbus information due to task errors");
} finally {
LOG.error("try to be leader error.", e1);
throw new RuntimeException(e1);
}
}
blobSyncCallback = new RunnableCallback() {
@Override
public void run() {
blobSync();
}
};
if (data.getBlobStore() instanceof LocalFsBlobStore) {
try {
// register call back for blob-store
data.getStormClusterState().blobstore(blobSyncCallback);
setupBlobstore();
} catch (Exception e) {
LOG.error("setup blob store error", e);
}
}
}
// sets up blobstore state for all current keys
private void setupBlobstore() throws Exception {
BlobStore blobStore = data.getBlobStore();
StormClusterState clusterState = data.getStormClusterState();
Set<String> localSetOfKeys = Sets.newHashSet(blobStore.listKeys());
Set<String> allKeys = Sets.newHashSet(clusterState.active_keys());
Set<String> localAvailableActiveKeys = Sets.intersection(localSetOfKeys, allKeys);
// keys on local but not on zk, we will delete it
Set<String> keysToDelete = Sets.difference(localSetOfKeys, allKeys);
LOG.debug("deleting keys not on zookeeper {}", keysToDelete);
for (String key : keysToDelete) {
blobStore.deleteBlob(key);
}
LOG.debug("Creating list of key entries for blobstore inside zookeeper {} local {}",
allKeys, localAvailableActiveKeys);
for (String key : localAvailableActiveKeys) {
int versionForKey = BlobStoreUtils.getVersionForKey(key, data.getNimbusHostPortInfo(), data.getConf());
clusterState.setup_blobstore(key, data.getNimbusHostPortInfo(), versionForKey);
}
}
public boolean isLeader(String zkMaster) {
if (StringUtils.isBlank(zkMaster)) {
return false;
}
if (hostPort.equalsIgnoreCase(zkMaster)) {
return true;
}
// Two nimbus running on the same node isn't allowed
// so just checks ip is enough here
String[] part = zkMaster.split(":");
return NetWorkUtils.equals(part[0], NetWorkUtils.ip());
}
@Override
public void run() {
LOG.info("Follower thread starts!");
while (state) {
StormClusterState zkClusterState = data.getStormClusterState();
try {
Thread.sleep(sleepTime);
if (!zkClusterState.leader_existed()) {
this.tryToBeLeader(data.getConf());
continue;
}
String master = zkClusterState.get_leader_host();
boolean isZkLeader = isLeader(master);
if (isZkLeader) {
if (!data.isLeader()) {
zkClusterState.unregister_nimbus_host(hostPort);
zkClusterState.unregister_nimbus_detail(hostPort);
data.setLeader(true);
leaderCallback.execute();
}
continue;
} else {
if (data.isLeader()) {
LOG.info("New zk master is " + master);
JStormUtils.halt_process(1, "Lost zk master node, halt process");
return;
}
}
// here the nimbus is not leader
if (data.getBlobStore() instanceof LocalFsBlobStore) {
blobSync();
}
zkClusterState.update_nimbus_slave(hostPort, data.uptime());
update_nimbus_detail();
} catch (InterruptedException ignored) {
} catch (Exception e) {
if (state) {
LOG.error("Unknown exception ", e);
}
}
}
LOG.info("Follower thread has been closed!");
}
public void clean() {
state = false;
}
private synchronized void blobSync() {
if (!data.isLeader()) {
try {
BlobStore blobStore = data.getBlobStore();
StormClusterState clusterState = data.getStormClusterState();
Set<String> localKeys = Sets.newHashSet(blobStore.listKeys());
Set<String> zkKeys = Sets.newHashSet(clusterState.blobstore(blobSyncCallback));
BlobSynchronizer blobSynchronizer = new BlobSynchronizer(blobStore, data.getConf());
blobSynchronizer.setNimbusInfo(data.getNimbusHostPortInfo());
blobSynchronizer.setBlobStoreKeySet(localKeys);
blobSynchronizer.setZookeeperKeySet(zkKeys);
blobSynchronizer.syncBlobs();
} catch (Exception e) {
LOG.error("blob sync error", e);
}
}
}
private void tryToBeLeader(final Map conf) throws Exception {
boolean allowed = check_nimbus_priority();
if (allowed) {
RunnableCallback masterCallback = new RunnableCallback() {
@Override
public void run() {
try {
tryToBeLeader(conf);
} catch (Exception e) {
LOG.error("tryToBeLeader error", e);
// 30???
JStormUtils.halt_process(30, "Cant't be master" + e.getMessage());
}
}
};
LOG.info("This nimbus can be leader");
data.getStormClusterState().try_to_be_leader(Cluster.MASTER_SUBTREE, hostPort, masterCallback);
} else {
LOG.info("This nimbus can't be leader");
}
}
/**
* Compared with other nimbus to get priority of this nimbus
*/
private boolean check_nimbus_priority() throws Exception {
int gap = update_nimbus_detail();
if (gap == 0) {
return true;
}
int left = SLAVE_NIMBUS_WAIT_TIME;
while (left > 0) {
LOG.info("nimbus.differ.count.zk is {}, so after {} seconds, nimbus will try to be leader!", gap, left);
Thread.sleep(10 * 1000);
left -= 10;
}
StormClusterState zkClusterState = data.getStormClusterState();
List<String> followers = zkClusterState.list_dirs(Cluster.NIMBUS_SLAVE_DETAIL_SUBTREE, false);
if (followers == null || followers.size() == 0) {
return false;
}
for (String follower : followers) {
if (follower != null && !follower.equals(hostPort)) {
Map bMap = zkClusterState.get_nimbus_detail(follower, false);
if (bMap != null) {
Object object = bMap.get(NIMBUS_DIFFER_COUNT_ZK);
if (object != null && (JStormUtils.parseInt(object)) < gap) {
LOG.info("Current node can't be leader, due to {} has higher priority", follower);
return false;
}
}
}
}
return true;
}
private int update_nimbus_detail() throws Exception {
//update count = count of zk's binary files - count of nimbus's binary files
StormClusterState zkClusterState = data.getStormClusterState();
// if we use other blobstore, such as HDFS, all nimbus slave can be leader
// but if we use local blobstore, we should count topologies files
int diffCount = 0;
if (data.getBlobStore() instanceof LocalFsBlobStore) {
Set<String> keysOnZk = Sets.newHashSet(zkClusterState.active_keys());
Set<String> keysOnLocal = Sets.newHashSet(data.getBlobStore().listKeys());
// we count number of keys which is on zk but not on local
diffCount = Sets.difference(keysOnZk, keysOnLocal).size();
}
Map mtmp = zkClusterState.get_nimbus_detail(hostPort, false);
if (mtmp == null) {
mtmp = new HashMap();
}
mtmp.put(NIMBUS_DIFFER_COUNT_ZK, diffCount);
zkClusterState.update_nimbus_detail(hostPort, mtmp);
LOG.debug("update nimbus details " + mtmp);
return diffCount;
}
/**
* Check whether current node is master
*/
private void checkOwnMaster() throws Exception {
int retry_times = 10;
StormClusterState zkClient = data.getStormClusterState();
for (int i = 0; i < retry_times; i++, JStormUtils.sleepMs(sleepTime)) {
if (!zkClient.leader_existed()) {
continue;
}
String zkHost = zkClient.get_leader_host();
if (hostPort.equals(zkHost)) {
// current process own master
return;
}
LOG.warn("Current nimbus has started thrift, but fail to set as leader in zk:" + zkHost);
}
String err = "Current nimbus failed to set as leader in zk, halting process";
LOG.error(err);
JStormUtils.halt_process(0, err);
}
}