package resa.evaluation.scheduler; import backtype.storm.Config; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.generated.Nimbus; import backtype.storm.generated.NotAliveException; import backtype.storm.generated.RebalanceOptions; import backtype.storm.scheduler.Topologies; import backtype.storm.scheduler.TopologyDetails; import backtype.storm.utils.NimbusClient; import backtype.storm.utils.Utils; import org.apache.curator.framework.CuratorFramework; import org.apache.thrift7.TException; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import resa.util.ConfigUtil; import resa.util.ResaConfig; import resa.util.TopologyHelper; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.stream.Collectors; /** * Class responsible for watch topology's allocation, and calc a decision whether change of allocation * should be take effect. * * @author Troy Ding */ public class TopologyListener { private static final Logger LOG = LoggerFactory.getLogger(TopologyListener.class); public static class AllocationContext { private long lastRequest; private long lastRebalance; private Map<String, Integer> compExecutors; private TopologyDetails topologyDetails; public AllocationContext setCompExecutors(Map<String, Integer> compExecutors) { this.compExecutors = compExecutors; return this; } public AllocationContext setTopologyDetails(TopologyDetails topologyDetails) { this.topologyDetails = topologyDetails; return this; } public AllocationContext(TopologyDetails topologyDetails, Map<String, Integer> compExecutors) { this.lastRequest = System.currentTimeMillis(); this.compExecutors = compExecutors; this.topologyDetails = topologyDetails; } public AllocationContext updateLastRebalance() { lastRequest = (lastRebalance = System.currentTimeMillis()); return this; } public AllocationContext updateLastRequest() { lastRequest = System.currentTimeMillis(); return this; } } private CuratorFramework zk; private String rootPath; private Map<String, AllocationContext> watchingTopologies = new ConcurrentHashMap<>(); private Nimbus.Client nimbus; private final int maxExecutorsPerWorker; private final ExecutorService threadPool = Executors.newCachedThreadPool(); private final int rebalanceWaitingSecs; public TopologyListener(Map<String, Object> conf) { zk = Utils.newCuratorStarted(conf, (List<String>) conf.get(Config.STORM_ZOOKEEPER_SERVERS), conf.get(Config.STORM_ZOOKEEPER_PORT)); rootPath = (String) conf.getOrDefault(ResaConfig.ZK_ROOT_PATH, "/resa"); checkZKNode(); nimbus = NimbusClient.getConfiguredClient(conf).getClient(); maxExecutorsPerWorker = ConfigUtil.getInt(conf, ResaConfig.MAX_EXECUTORS_PER_WORKER, 10); rebalanceWaitingSecs = ConfigUtil.getInt(conf, ResaConfig.REBALANCE_WAITING_SECS, -1); } private void checkZKNode() { try { if (zk.checkExists().forPath(rootPath) == null) { zk.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT).forPath(rootPath); } } catch (Exception e) { throw new RuntimeException("Check root path failed: " + rootPath, e); } } /** * Synchronize the watching topologies with alive topologies in the cluster. Dead topology will * be removed from the watching list. * * @param topologies alive topologies */ public void synTopologies(Topologies topologies) { Set<String> aliveTopoIds = topologies.getTopologies().stream().map(TopologyDetails::getId) .collect(Collectors.toSet()); // remove topologies that dead watchingTopologies.keySet().retainAll(aliveTopoIds); topologies.getTopologies().stream().forEach(this::addOrUpdateTopology); } /** * Add a topology to the watching list or update corresponding TopologyDetails object * if this topology is under watching. * * @param topoDetails */ public void addOrUpdateTopology(TopologyDetails topoDetails) { String topoId = topoDetails.getId(); // For a new joined topology, set a new watcher on it and add it to watching list // For a watching topology, update its running detail watchingTopologies.compute(topoId, (topologyId, context) -> (context == null ? watchTopology(topoDetails) : context.setTopologyDetails(topoDetails))); } /* add a watcher on zk to get a notification when a new assignment is set */ private AllocationContext watchTopology(TopologyDetails topoDetails) { // get current assignment and set a watcher on the zk node Map<String, Integer> compExecutors = getCompExecutorsAndWatch(topoDetails.getId()); if (compExecutors == null) { return null; } LOG.info("Begin to watching topology " + topoDetails.getId()); return new AllocationContext(topoDetails, compExecutors); } private Map<String, Integer> getCompExecutorsAndWatch(String topoId) { String path = rootPath + '/' + topoId; try { byte[] data = zk.getData().usingWatcher(new TopologyWatcher(topoId)).forPath(path); if (data != null) { return (Map<String, Integer>) Utils.deserialize(data); } } catch (Exception e) { } return null; } /* zk watcher */ private class TopologyWatcher implements Watcher { private String topoId; private TopologyWatcher(String topoId) { this.topoId = topoId; } @Override public void process(WatchedEvent event) { if (event.getType() == Event.EventType.NodeDataChanged) { Map<String, Integer> newCompExecutors = getCompExecutorsAndWatch(topoId); if (newCompExecutors != null) { watchingTopologies.computeIfPresent(topoId, (topoId, context) -> { context.setCompExecutors(newCompExecutors); // do this in the other thread, so that watchingTopologies can't block threadPool.submit(() -> tryRebalance(topoId, context)); return context; }); } LOG.info("A new assignment is set for topology " + topoId); } else { // maybe something wrong with zk connection, force re-watch watchingTopologies.remove(topoId); LOG.warn("Receive a exception event for topology " + topoId + ", event type is " + event.getType() + ", state is " + event.getState()); } } } private void tryRebalance(String topoId, AllocationContext context) { if (needRebalance(context)) { LOG.info("Trying rebalance for topology " + topoId); requestRebalance(topoId, context); } else { LOG.info("Request rebalance denied for topology " + topoId); context.updateLastRequest(); } } /** * check whether a rebalance operation on the specified context is permitted */ protected boolean needRebalance(AllocationContext context) { if (context.compExecutors.isEmpty()) { return false; } return !context.compExecutors.equals(TopologyHelper.getComponentExecutorCount(context.topologyDetails)); } /* Send rebalance request to nimbus */ private void requestRebalance(String topoId, AllocationContext context) { int totolNumExecutors = context.compExecutors.values().stream().mapToInt(i -> i).sum(); int numWorkers = totolNumExecutors / maxExecutorsPerWorker; if (totolNumExecutors % maxExecutorsPerWorker > (int) (maxExecutorsPerWorker / 2)) { numWorkers++; } RebalanceOptions options = new RebalanceOptions(); //set rebalance options options.set_num_workers(numWorkers); options.set_num_executors(context.compExecutors); if (rebalanceWaitingSecs >= 0) { options.set_wait_secs(rebalanceWaitingSecs); } try { nimbus.rebalance(TopologyHelper.topologyId2Name(topoId), options); LOG.info("do rebalance successfully for topology " + topoId); context.updateLastRebalance(); } catch (TException e) { LOG.warn("do rebalance failed for topology " + topoId, e); } catch (NotAliveException | InvalidTopologyException e) { watchingTopologies.remove(topoId); LOG.warn("topology is not exist, maybe killed, remove from waiting list: " + topoId); } } }