package dmg.cells.nucleus; import com.google.common.io.BaseEncoding; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.curator.framework.CuratorFramework; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import dmg.util.TimebasedCounter; import static com.google.common.base.Preconditions.checkState; class CellGlue { private static final Logger LOGGER = LoggerFactory.getLogger(CellGlue.class); private final String _cellDomainName; private final ConcurrentMap<String, CellNucleus> _cellList = new ConcurrentHashMap<>(); private final ConcurrentMap<String, CellNucleus> _publicCellList = new ConcurrentHashMap<>(); private final ConcurrentMap<CellNucleus, ListenableFuture<?>> _killedCells = new ConcurrentHashMap<>(); private final Map<String, Object> _cellContext = new ConcurrentHashMap<>(); private final TimebasedCounter _uniqueCounter = new TimebasedCounter(); private final BaseEncoding COUNTER_ENCODING = BaseEncoding.base64Url().omitPadding(); private CellNucleus _systemNucleus; private final CellRoutingTable _routingTable = new CellRoutingTable(); private final ThreadGroup _masterThreadGroup; private final ThreadGroup _killerThreadGroup; private final ListeningExecutorService _killerExecutor; private final ListeningExecutorService _emergencyKillerExecutor; private final CellAddressCore _domainAddress; private final CuratorFramework _curatorFramework; CellGlue(String cellDomainName, @Nullable CuratorFramework curatorFramework) { String cellDomainNameLocal = cellDomainName; if (cellDomainName == null || cellDomainName.isEmpty()) { cellDomainNameLocal = "*"; } if (cellDomainNameLocal.charAt(cellDomainNameLocal.length() - 1) == '*') { cellDomainNameLocal = cellDomainNameLocal.substring(0, cellDomainNameLocal.length()) + System.currentTimeMillis(); } _cellDomainName = cellDomainNameLocal; _curatorFramework = curatorFramework; _domainAddress = new CellAddressCore("*", _cellDomainName); _masterThreadGroup = new ThreadGroup("Master-Thread-Group"); _killerThreadGroup = new ThreadGroup("Killer-Thread-Group"); ThreadFactory killerThreadFactory = new ThreadFactoryBuilder().setNameFormat("killer-%d").setThreadFactory(r -> newThread(_killerThreadGroup, r)).build(); _killerExecutor = MoreExecutors.listeningDecorator (new ThreadPoolExecutor(1, Integer.MAX_VALUE, 3L, TimeUnit.SECONDS, new SynchronousQueue<>(), killerThreadFactory)); ThreadPoolExecutor emergencyKillerExecutor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), killerThreadFactory); emergencyKillerExecutor.prestartCoreThread(); _emergencyKillerExecutor = MoreExecutors.listeningDecorator(emergencyKillerExecutor); } static Thread newThread(ThreadGroup threadGroup, Runnable r) { Thread thread = new Thread(threadGroup, r); /* By default threads inherit the daemon status and priority from the creating * thread. Thus we reset them. */ if (thread.isDaemon()) { thread.setDaemon(false); } if (thread.getPriority() != Thread.NORM_PRIORITY) { thread.setPriority(Thread.NORM_PRIORITY); } return thread; } static Thread newThread(ThreadGroup threadGroup, Runnable r, String name) { Thread thread = new Thread(threadGroup, r, name); /* By default threads inherit the daemon status and priority from the creating * thread. Thus we reset them. */ if (thread.isDaemon()) { thread.setDaemon(false); } if (thread.getPriority() != Thread.NORM_PRIORITY) { thread.setPriority(Thread.NORM_PRIORITY); } return thread; } ThreadGroup getMasterThreadGroup() { return _masterThreadGroup; } synchronized void registerCell(CellNucleus cell) throws IllegalStateException { if (cell.getThisCell() instanceof SystemCell) { checkState(_systemNucleus == null); _systemNucleus = cell; } String name = cell.getCellName(); if (_cellList.putIfAbsent(name, cell) != null) { throw new IllegalStateException("Cell " + name + " already exists."); } sendToAll(new CellEvent(name, CellEvent.CELL_CREATED_EVENT)); } synchronized void publishCell(CellNucleus cell) throws IllegalArgumentException { String name = cell.getCellName(); if (_cellList.get(name) != cell) { throw new IllegalStateException("Cell " + name + " does not exist."); } if (!_killedCells.containsKey(cell) && _publicCellList.putIfAbsent(name, cell) != null) { throw new IllegalStateException("Cell " + name + " is already published."); } } CellNucleus getSystemNucleus() { return _systemNucleus; } void consume(CellNucleus cell, String queue) { routeAdd(new CellRoute(queue, cell.getThisAddress(), CellRoute.QUEUE)); } void subscribe(CellNucleus cell, String topic) { routeAdd(new CellRoute(topic, cell.getThisAddress(), CellRoute.TOPIC)); } Map<String, Object> getCellContext() { return _cellContext; } Object getCellContext(String str) { return _cellContext.get(str); } public synchronized void routeAdd(CellRoute route) { CellAddressCore target = route.getTarget(); if (target.getCellDomainName().equals(getCellDomainName()) && !_publicCellList.containsKey(target.getCellName())) { throw new IllegalArgumentException("No such cell: " + target); } _routingTable.add(route); sendToAll(new CellEvent(route, CellEvent.CELL_ROUTE_ADDED_EVENT)); } public synchronized void routeDelete(CellRoute route) { _routingTable.delete(route); sendToAll(new CellEvent(route, CellEvent.CELL_ROUTE_DELETED_EVENT)); } CellRoutingTable getRoutingTable() { return _routingTable; } CellRoute[] getRoutingList() { return _routingTable.getRoutingList(); } List<CellTunnelInfo> getCellTunnelInfos() { List<CellTunnelInfo> v = new ArrayList<>(); for (CellNucleus cellNucleus : _publicCellList.values()) { Cell c = cellNucleus.getThisCell(); if (c instanceof CellTunnel) { v.add(((CellTunnel) c).getCellTunnelInfo()); } } return v; } List<String> getCellNames() { return new ArrayList<>(_cellList.keySet()); } String getUnique() { return COUNTER_ENCODING.encode(Longs.toByteArray(_uniqueCounter.next())); } CellInfo getCellInfo(String name) { CellNucleus nucleus = getCell(name); return (nucleus == null) ? null : nucleus._getCellInfo(); } Thread[] getThreads(String name) { CellNucleus nucleus = getCell(name); return (nucleus == null) ? null : nucleus.getThreads(); } private void sendToAll(CellEvent event) { for (CellNucleus nucleus : _publicCellList.values()) { nucleus.addToEventQueue(event); } } String getCellDomainName() { return _cellDomainName; } ListenableFuture<?> kill(CellNucleus nucleus) { return kill(nucleus, nucleus); } ListenableFuture<?> kill(CellNucleus sender, String cellName) { CellNucleus nucleus = _cellList.get(cellName); if (nucleus == null) { return Futures.immediateFailedFuture(new NoSuchElementException("No such cell: " + cellName)); } return kill(sender, nucleus); } /** * Log diagnostic information about a cell's ThreadGroup. */ void listThreadGroupOf(String cellName) { CellNucleus nucleus = _cellList.get(cellName); if (nucleus != null) { nucleus.threadGroupList(); } } void listKillerThreadGroup() { listThreadGroup(_killerThreadGroup); } /** * Log diagnostic information about a ThreadGroup. */ static void listThreadGroup(ThreadGroup threadGroup) { Thread[] threads = new Thread[threadGroup.activeCount()]; int n = threadGroup.enumerate(threads); for (int i = 0; i < n; i++) { Thread thread = threads[i]; if (thread.isAlive() && !thread.isDaemon() || LOGGER.isDebugEnabled()) { LOGGER.warn("Thread: {} [{}{}{}] ({}) {}", thread.getName(), (thread.isAlive() ? "A" : "-"), (thread.isDaemon() ? "D" : "-"), (thread.isInterrupted() ? "I" : "-"), thread.getPriority(), thread.getState()); for (StackTraceElement s : thread.getStackTrace()) { LOGGER.warn(" {}", s); } } } } /** * Returns a named cell. This method also returns cells that have * been killed, but which are not dead yet. * * @param cellName the name of the cell * @return The cell with the given name or null if there is no such * cell. */ CellNucleus getCell(String cellName) { return _cellList.get(cellName); } /** * Blocks until the given cell is dead. * * @param cellName the name of the cell * @param timeout the time to wait in milliseconds. A timeout * of 0 means to wait forever. * @return True if the cell died, false in case of a timeout. * @throws InterruptedException if another thread interrupted the * current thread before or while the current thread was * waiting for a notification. The interrupted status of * the current thread is cleared when this exception is * thrown. */ synchronized boolean join(String cellName, long timeout) throws InterruptedException { if (timeout == 0) { while (getCell(cellName) != null) { wait(); } return true; } else { while (getCell(cellName) != null && timeout > 0) { long time = System.currentTimeMillis(); wait(timeout); timeout = timeout - (System.currentTimeMillis() - time); } return (timeout > 0); } } synchronized void destroy(CellNucleus nucleus) { String cellName = nucleus.getCellName(); if (_publicCellList.remove(cellName, nucleus)) { LOGGER.warn("Apparently cell {} wasn't unpublished before being destroyed. Please contact support@dcache.org.", cellName); } if (!_cellList.remove(cellName, nucleus)) { LOGGER.warn("Apparently cell {} wasn't registered before being destroyed. Please contact support@dcache.org.", cellName); } if (_killedCells.remove(nucleus) == null) { LOGGER.warn("Apparently cell {} wasn't killed before being destroyed. Please contact support@dcache.org.", cellName); } notifyAll(); } private synchronized ListenableFuture<?> kill(CellNucleus source, CellNucleus destination) { String cellToKill = destination.getCellName(); if (_cellList.get(cellToKill) != destination) { return Futures.immediateFailedFuture(new NoSuchElementException("No such cell: " + cellToKill)); } /* Mark the cell as being killed to prevent it from being killed more * than once and to block certain operations while it is being killed. */ return _killedCells.computeIfAbsent(destination, d -> doKill(source, d)); } private synchronized ListenableFuture<?> doKill(CellNucleus source, CellNucleus destination) { String cellToKill = destination.getCellName(); /* Remove routes to this cell first to reduce the chance that * we try to route to a no longer existing cell... */ Collection<CellRoute> routes = _routingTable.delete(destination.getThisAddress()); /* ... then remove the cell to prevent further messages to be sent to it. */ _publicCellList.remove(cellToKill, destination); /* Notify others about the route removal. */ for (CellRoute route : routes) { sendToAll(new CellEvent(route, CellEvent.CELL_ROUTE_DELETED_EVENT)); } /* Post the obituary. */ CellPath sourceAddr = new CellPath(source.getCellName(), source.getCellDomainName()); KillEvent killEvent = new KillEvent(cellToKill, sourceAddr); sendToAll(new CellEvent(cellToKill, CellEvent.CELL_DIED_EVENT)); /* Put out a contract. */ Runnable command = () -> destination.shutdown(killEvent); try { return _killerExecutor.submit(command); } catch (OutOfMemoryError e) { /* This can signal that we cannot create any more threads. The emergency * pool has one thread preallocated for this situation. */ return _emergencyKillerExecutor.submit(command); } } private static final int MAX_ROUTE_LEVELS = 16; /** * Send a message to another cell. * * @param msg The cell envelope * @param resolveLocally Whether to deliver messages for @local addresses to local cells * @param resolveRemotely Whether to deliver messages for @local addresses through routes with * a domain address as a target * @throws SerializationException */ void sendMessage(CellMessage msg, boolean resolveLocally, boolean resolveRemotely) throws SerializationException { if (!msg.isStreamMode()) { msg = msg.encode(); } CellPath destination = msg.getDestinationPath(); LOGGER.trace("sendMessage : {} send to {}", msg.getUOID(), destination); sendMessage(msg, destination.getCurrent(), resolveLocally, resolveRemotely, MAX_ROUTE_LEVELS); } private void sendMessage(CellMessage msg, CellAddressCore address, boolean resolveLocally, boolean resolveRemotely, int steps) { CellPath destination = msg.getDestinationPath(); /* We track whether we advanced the current position in the destination path. If not, we refuse * to send the message back to the domain we got it from. */ boolean hasDestinationChanged = false; /* We track whether the message has been delivered with any topic routes. If so, failure to find * any other routes will not generate an error. */ boolean hasTopicRoutes = false; while (steps > 0) { /* Skip our own domain in the address as we are already here. */ while (address.equals(_domainAddress)) { if (!destination.next()) { sendException(msg, "*"); return; } address = destination.getCurrent(); hasDestinationChanged = true; } LOGGER.trace("sendMessage : next hop at {}: {}", steps, address); /* If explicitly addressed to a cell in our domain we have to deliver * it now. */ if (address.getCellDomainName().equals(_cellDomainName)) { if (!deliverLocally(msg, address)) { sendException(msg, address.toString()); } return; } /* If the address if not fully qualified we have the choice of resolving * it locally or through the routing table. */ if (address.isLocalAddress()) { if (resolveLocally && deliverLocally(msg, address)) { return; } /* Topic routes are special because they cause messages to be * duplicated. */ for (CellRoute route : _routingTable.findTopicRoutes(address)) { CellAddressCore target = route.getTarget(); boolean isLocalSubscriber = !target.isDomainAddress(); if (isLocalSubscriber || resolveRemotely) { CellMessage m = msg.clone(); if (isLocalSubscriber) { m.getDestinationPath().replaceCurrent(target); } sendMessage(m, target, true, resolveRemotely, steps - 1); } hasTopicRoutes = true; } } /* Unless we updated the destination path, there is no reason to send the message back to where * we got it from. Note that we cannot detect non-trivial loops, i.e. loops involving three * or more domains: Such loops may have legitimate alias-routes rewriting the destination * and sending the message to where it has been before may be perfectly reasonable. */ if (!hasDestinationChanged && msg.getSourcePath().getDestinationAddress().equals(address)) { if (!hasTopicRoutes) { sendException(msg, address.toString()); } return; } /* Lookup a route. */ CellRoute route = _routingTable.find(address, resolveRemotely); if (route == null) { LOGGER.trace("sendMessage : no route destination for : {}", address); if (!hasTopicRoutes) { sendException(msg, address.toString()); } return; } LOGGER.trace("sendMessage : using route : {}", route); address = route.getTarget(); /* Alias routes rewrite the address. */ if (route.getRouteType() == CellRoute.ALIAS || route.getRouteType() == CellRoute.QUEUE && !address.isDomainAddress()) { destination.replaceCurrent(address); hasDestinationChanged = true; } /* The delivery restrictions do not apply to routes. */ resolveLocally = true; resolveRemotely = true; steps--; } // end of big iteration loop LOGGER.trace("sendMessage : max route iteration reached: {}", destination); sendException(msg, address.toString()); } private boolean deliverLocally(CellMessage msg, CellAddressCore address) { CellNucleus destNucleus = _publicCellList.get(address.getCellName()); if (destNucleus != null) { /* Is the message addressed to the cell or is the cell merely a router. */ CellPath destinationPath = msg.getDestinationPath(); if (address.equals(destinationPath.getCurrent())) { try { destNucleus.addToEventQueue(new MessageEvent(msg.decode())); } catch (SerializationException e) { LOGGER.error("Received malformed message from {} with UOID {} and session [{}]: {}", msg.getSourcePath(), msg.getUOID(), msg.getSession(), e.getMessage()); sendException(msg, address.toString()); } } else if (msg.getSourcePath().hops() > 30) { LOGGER.error("Hop count exceeds 30: {}", msg); sendException(msg, address.toString()); } else { msg.addSourceAddress(_domainAddress); destNucleus.addToEventQueue(new RoutedMessageEvent(msg)); } return true; } return false; } private void sendException(CellMessage msg, String routeTarget) throws SerializationException { if (msg.getSourceAddress().getCellName().equals("*")) { Serializable messageObject = msg.decode().getMessageObject(); if (messageObject instanceof NoRouteToCellException) { LOGGER.info( "Unable to notify {} about delivery failure of message sent to {}: No route for {} in {}.", msg.getDestinationPath(), ((NoRouteToCellException) messageObject).getDestinationPath(), routeTarget, _cellDomainName); } else { LOGGER.warn( "Message from {} could not be delivered because no route to {} is known.", msg.getSourcePath(), routeTarget); } } else { LOGGER.debug( "Message from {} could not be delivered because no route to {} is known; the sender will be notified.", msg.getSourcePath(), routeTarget); CellMessage envelope = new CellMessage(msg.getSourcePath().revert(), new NoRouteToCellException(msg, "Route for >" + routeTarget + "< not found at >" + _cellDomainName + '<')); envelope.setLastUOID(msg.getUOID()); envelope.addSourceAddress(_domainAddress); sendMessage(envelope, true, true); } } @Override public String toString() { return _cellDomainName; } @Nullable public CuratorFramework getCuratorFramework() { return _curatorFramework; } public void shutdown() { if (_curatorFramework != null) { _curatorFramework.close(); } _killerExecutor.shutdown(); } }