package com.intrbiz.bergamot.cluster; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.log4j.Logger; import com.hazelcast.config.Config; import com.hazelcast.config.XmlConfigBuilder; import com.hazelcast.core.Cluster; import com.hazelcast.core.Hazelcast; import com.hazelcast.core.HazelcastInstance; import com.hazelcast.core.ILock; import com.hazelcast.core.IMap; import com.hazelcast.core.IQueue; import com.hazelcast.core.InitialMembershipEvent; import com.hazelcast.core.InitialMembershipListener; import com.hazelcast.core.Member; import com.hazelcast.core.MemberAttributeEvent; import com.hazelcast.core.MembershipEvent; import com.hazelcast.query.Predicates; import com.intrbiz.Util; import com.intrbiz.bergamot.cluster.controller.BergamotController; import com.intrbiz.bergamot.cluster.migration.ClusterMigration; import com.intrbiz.bergamot.cluster.migration.DeregisterPoolTask; import com.intrbiz.bergamot.cluster.migration.RegisterPoolTask; import com.intrbiz.bergamot.cluster.model.ProcessingPool; import com.intrbiz.bergamot.cluster.util.OwnerPredicate; import com.intrbiz.bergamot.cluster.util.SitePredicate; import com.intrbiz.bergamot.command.CommandProcessor; import com.intrbiz.bergamot.command.DefaultCommandProcessor; import com.intrbiz.bergamot.data.BergamotDB; import com.intrbiz.bergamot.model.Site; import com.intrbiz.bergamot.model.message.cluster.manager.ClusterManagerRequest; import com.intrbiz.bergamot.model.message.cluster.manager.ClusterManagerResponse; import com.intrbiz.bergamot.model.message.cluster.manager.request.DeinitSite; import com.intrbiz.bergamot.model.message.cluster.manager.request.FlushGlobalCaches; import com.intrbiz.bergamot.model.message.cluster.manager.request.InitSite; import com.intrbiz.bergamot.model.message.cluster.manager.response.ClusterManagerError; import com.intrbiz.bergamot.model.message.cluster.manager.response.DeinitedSite; import com.intrbiz.bergamot.model.message.cluster.manager.response.InitedSite; import com.intrbiz.bergamot.queue.BergamotClusterManagerQueue; import com.intrbiz.bergamot.result.DefaultResultProcessor; import com.intrbiz.bergamot.result.ResultProcessor; import com.intrbiz.bergamot.scheduler.Scheduler; import com.intrbiz.bergamot.scheduler.WheelScheduler; import com.intrbiz.data.DataException; import com.intrbiz.lamplighter.reading.DefaultReadingProcessor; import com.intrbiz.lamplighter.reading.ReadingProcessor; import com.intrbiz.queue.RPCHandler; import com.intrbiz.queue.RPCServer; /** * Manage scheduling and result processing services across the cluster * * Every site has a (configurable) number of processing pools, checks * are split across these pools. The processing pools are then split * across the members within the cluster. As such work should be * balanced across the scheduling and result processing resources * of the cluster. * * This cluster manager is handle migrating resources around the * cluster. The general idea is that when the cluster state changes, * the cluster members will race to acquire the management lock. * The winner will then decide the state of the cluster and issue * the required migration tasks. * * Migrations are placed into a queue named with the UUID of the * cluster member which should execute them. A cluster member will * consume and apply these migration tasks * */ public class ClusterManager implements RPCHandler<ClusterManagerRequest, ClusterManagerResponse> { private Config hazelcastConfig; private HazelcastInstance hazelcastInstance; private Logger logger = Logger.getLogger(ClusterManager.class); /** * The lock used when making decisions about managing the cluster. The general principle is that the first node to acquire the lock makes the decision about where resources run. */ private ILock clusterManagerLock; /** * The distributed map of processing pools */ private IMap<String, ProcessingPool> pools; /** * A queue of migration events we must apply */ private IQueue<ClusterMigration> migrations; private volatile boolean runMigrations = false; private Thread migrationsConsumer; private volatile boolean started = false; private Cluster cluster; /** * Our scheduler */ private Scheduler scheduler; /** * Our result processor */ private ResultProcessor resultProcessor; /** * Our reading processor */ private ReadingProcessor readingProcessor; /** * Our command processor */ private CommandProcessor commandProcessor; /** * Our controller */ private BergamotController controller; /** * Cluster manager queue */ private BergamotClusterManagerQueue queue; /** * Cluster manager server */ private RPCServer<ClusterManagerRequest, ClusterManagerResponse> server; public ClusterManager() { super(); this.scheduler = new WheelScheduler(); this.resultProcessor = new DefaultResultProcessor(); this.readingProcessor = new DefaultReadingProcessor(); this.commandProcessor = new DefaultCommandProcessor(); this.controller = new BergamotController(); this.queue = BergamotClusterManagerQueue.open(); this.server = this.queue.createBergamotClusterManagerRPCServer(this); } public Scheduler getScheduler() { return this.scheduler; } public ResultProcessor getResultProcessor() { return this.resultProcessor; } public ReadingProcessor getReadingProcessor() { return readingProcessor; } public CommandProcessor getCommandProcessor() { return this.commandProcessor; } public String getLocalMemberUUID() { return this.cluster.getLocalMember().getUuid(); } public void start(String instanceName) { this.start(null, instanceName); } public void shutdown() { this.server.close(); this.queue.close(); } public synchronized void start(Config config, String instanceName) { try { if (!this.started) { this.started = true; // start hazelcast if (config == null) { // setup config String hazelcastConfigFile = Util.coalesceEmpty(System.getProperty("hazelcast.config"), System.getenv("hazelcast_config")); if (hazelcastConfigFile != null) { // when using a config file, you must configure the balsa.sessions map this.hazelcastConfig = new XmlConfigBuilder(hazelcastConfigFile).build(); } else { // setup the default configuration this.hazelcastConfig = new Config(); } } else { this.hazelcastConfig = config; } // create the hazel cast instance if (instanceName == null) { this.hazelcastInstance = Hazelcast.newHazelcastInstance(this.hazelcastConfig); } else { // set the instance name this.hazelcastConfig.setInstanceName(instanceName); // create the instance this.hazelcastInstance = Hazelcast.getOrCreateHazelcastInstance(this.hazelcastConfig); } // setup // setup data structures this.clusterManagerLock = this.hazelcastInstance.getLock("bergamot.cluster.manager"); this.pools = this.hazelcastInstance.getMap("bergamot.pools"); this.cluster = this.hazelcastInstance.getCluster(); this.migrations = this.getMigrationQueue(this.cluster.getLocalMember().getUuid()); // listen to cluster state changes this.cluster.addMembershipListener(new InitialMembershipListener() { @Override public void memberAdded(MembershipEvent membershipEvent) { // when a member is added we will release some processing pools to it logger.info("Member added: " + membershipEvent.getMember() + " members: " + membershipEvent.getMembers()); giveUpPools(membershipEvent.getCluster().getLocalMember(), membershipEvent.getMember(), membershipEvent.getMembers()); } @Override public void memberRemoved(MembershipEvent membershipEvent) { // when a member is removed from the cluster we will redistribute its pools over the cluster logger.info("Member removed: " + membershipEvent.getMember() + " members: " + membershipEvent.getMembers()); logger.info("Taking over pools from node"); takeOverPools(membershipEvent.getCluster().getLocalMember(), membershipEvent.getMember(), membershipEvent.getMembers()); } @Override public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) { } @Override public void init(InitialMembershipEvent event) { } }); // start our scheduler and result processor logger.info("Starting command processor"); this.commandProcessor.start(); logger.info("Starting result processor"); this.resultProcessor.start(); logger.info("Starting reading processor"); this.readingProcessor.start(); logger.info("Starting scheduler"); this.scheduler.start(); logger.info("Starting controller"); this.controller.start(); // setup our migration task consumer this.runMigrations = true; this.migrationsConsumer = new Thread(new Runnable() { @Override public void run() { logger.info("Starting cluster migrations thread"); while (runMigrations) { try { runMigration(migrations.poll(10, TimeUnit.SECONDS)); } catch (InterruptedException e) { } } logger.info("Terminating cluster migrations thread"); } }, "BergamotClusterMigrator"); this.migrationsConsumer.start(); } } catch (Exception e) { throw new DataException("Failed to start Hazelcast Cluster Manager", e); } } private void runMigration(ClusterMigration migration) { try { if (migration != null) { logger.info("Executing cluster migration: " + migration); boolean result = migration.applyMigration(ClusterManager.this); logger.debug("Migration completed: " + result); } } catch (Exception e) { logger.error("Error applying migration", e); } } public void registerSite(Site site) { this.logger.info("Registering site " + site.getId() + " " + site.getName()); this.initPoolsForSite(this.cluster.getMembers(), site); } public void deregisterSite(Site site) { this.logger.info("Deregistering site " + site.getId() + " " + site.getName()); this.deinitPoolsForSite(this.cluster.getMembers(), site); } /** * Remove all pools for a site */ private void deinitPoolsForSite(Set<Member> memberSet, Site site) { this.clusterManagerLock.lock(); try { // add the pools for the given site this.logger.info("Uninitialising cluster state, members: " + memberSet); // get all the processing pools for this site for (ProcessingPool pool : this.pools.values(new SitePredicate(site.getId()))) { // ensure the processing pool is removed for (Member member : memberSet) { this.sendMigration(member, new DeregisterPoolTask(pool.getSite(), pool.getPool())); } } } finally { this.clusterManagerLock.unlock(); } } /** * Initially register the pools for the given site with this cluster. */ private void initPoolsForSite(Set<Member> memberSet, Site site) { this.clusterManagerLock.lock(); try { // add the pools for the given site this.logger.info("Initialising cluster state, members: " + memberSet); Set<ProcessingPool> altered = new HashSet<ProcessingPool>(); Member[] members = memberSet.stream().toArray((l) -> new Member[l]); for (int i = 0; i < site.getPoolCount(); i++) { ProcessingPool pool = new ProcessingPool(site.getId(), i); Member owner = members[Math.abs(pool.getKey().hashCode() % members.length)]; pool.setOwner(owner.getUuid()); ProcessingPool previous = this.pools.putIfAbsent(pool.getKey(), pool); this.logger.trace("Pool " + pool.getKey() + " owner " + owner.getUuid()); if (previous == null) altered.add(pool); } this.logger.info("Assigned " + altered.size() + " pools"); // setup our pools this.registerPools(altered, memberSet); } finally { this.clusterManagerLock.unlock(); } } /** * Take over pools from a failed member */ private void takeOverPools(Member local, Member removed, Set<Member> memberSet) { this.clusterManagerLock.lock(); try { // reassign pools this.logger.info("Taking over pools from member: " + removed); Member[] members = memberSet.stream().toArray((l) -> { return new Member[l]; }); Set<ProcessingPool> altered = new HashSet<ProcessingPool>(); for (ProcessingPool pool : this.pools.values(new OwnerPredicate(removed.getUuid()))) { Member newOwner = members[Math.abs(pool.getKey().hashCode() % members.length)]; this.logger.trace("Redistributing pool " + pool.getKey() + " from " + pool.getOwner() + " to " + newOwner.getUuid()); pool.migrate(newOwner.getUuid()); this.pools.put(pool.getKey(), pool); altered.add(pool); } this.logger.info("Taken over " + altered.size() + " pools"); // setup pools this.registerPools(altered, memberSet); // clear the queue of the removed member this.clearQueue(removed.getUuid()); } finally { this.clusterManagerLock.unlock(); } } /** * Release some pools for the new member to handle */ private void giveUpPools(Member local, Member added, Set<Member> memberSet) { this.clusterManagerLock.lock(); try { int poolsPerNode = Math.max(this.pools.size() / memberSet.size(), 1); // reassign pools this.logger.info("Giving up pools to member: " + added); Set<ProcessingPool> altered = new HashSet<ProcessingPool>(); for (ProcessingPool pool : this.pools.values(Predicates.not(new OwnerPredicate(added.getUuid())))) { this.logger.trace("Redistributing pool " + pool.getKey() + " from " + pool.getOwner() + " to " + added.getUuid()); pool.migrate(added.getUuid()); this.pools.put(pool.getKey(), pool); altered.add(pool); // have we reassigned enough pools if (altered.size() >= poolsPerNode) break; } this.logger.info("Released " + altered.size() + " pools"); // setup pools this.registerPools(altered, memberSet); this.deregisterPools(altered, memberSet); } finally { this.clusterManagerLock.unlock(); } } /** * Execute the register pool tasks on the various cluster members */ private void registerPools(Collection<ProcessingPool> pools, Set<Member> memberSet) { Map<String, Member> members = memberSet.stream().collect(Collectors.toMap((m) -> m.getUuid(), (m) -> m)); for (ProcessingPool pool : pools) { this.sendMigration(members.get(pool.getOwner()), new RegisterPoolTask(pool.getSite(), pool.getPool())); } } /** * Execute the deregister pool tasks of the various cluster members */ private void deregisterPools(Collection<ProcessingPool> pools, Set<Member> memberSet) { Map<String, Member> members = memberSet.stream().collect(Collectors.toMap((m) -> m.getUuid(), (m) -> m)); for (ProcessingPool pool : pools) { this.sendMigration(members.get(pool.getPreviousOwner()), new DeregisterPoolTask(pool.getSite(), pool.getPool())); } } private void sendMigration(Member runOn, ClusterMigration migration) { if (runOn != null) { if (runOn.localMember()) { // run it directly this.runMigration(migration); } else { // enqueue it to the target node try { this.getMigrationQueue(runOn.getUuid()).put(migration); } catch (Exception e) { logger.fatal("Failed to queue cluster migration task, cluster could be inconsistent!", e); } } } } private IQueue<ClusterMigration> getMigrationQueue(String memberUUID) { return this.hazelcastInstance.getQueue("bergamot.cluster.migrations." + memberUUID); } private void clearQueue(String memberUUID) { this.getMigrationQueue(memberUUID).clear(); } @Override public ClusterManagerResponse handleDevliery(ClusterManagerRequest event) throws IOException { try { if (event instanceof InitSite) { UUID siteId = ((InitSite) event).getSiteId(); String siteName = ((InitSite) event).getSiteName(); logger.info("Got request to init site: " + siteId + " - " + siteName); try (BergamotDB db = BergamotDB.connect()) { Site site = db.getSite(siteId); if (site == null) return new ClusterManagerError("Unknown site"); this.registerSite(site); } return new InitedSite(); } else if (event instanceof DeinitSite) { UUID siteId = ((DeinitSite) event).getSiteId(); String siteName = ((DeinitSite) event).getSiteName(); logger.info("Got request to deinit site: " + siteId + " - " + siteName); try (BergamotDB db = BergamotDB.connect()) { Site site = db.getSite(siteId); if (site == null) return new ClusterManagerError("Unknown site"); this.deregisterSite(site); } return new DeinitedSite(); } else if (event instanceof FlushGlobalCaches) { try (BergamotDB db = BergamotDB.connect()) { db.flushGlobalCaches(); } } return new ClusterManagerError("Unknown command"); } catch (Exception e) { logger.error("Failed to execute cluster manager command", e); return new ClusterManagerError(e.getMessage()); } } public int getMemberCount() { return this.hazelcastInstance.getCluster().getMembers().size(); } public int getProcessPoolCount() { return this.pools == null ? 0 : this.pools.size(); } }