/* * dCache - http://www.dcache.org/ * * Copyright (C) 2016 Deutsches Elektronen-Synchrotron * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package dmg.cells.services; import com.google.common.collect.HashMultimap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.concurrent.GuardedBy; import java.io.PrintWriter; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.stream.Stream; import dmg.cells.nucleus.CellAdapter; import dmg.cells.nucleus.CellAddressCore; import dmg.cells.nucleus.CellDomainInfo; import dmg.cells.nucleus.CellDomainRole; import dmg.cells.nucleus.CellEvent; import dmg.cells.nucleus.CellEventListener; import dmg.cells.nucleus.CellMessage; import dmg.cells.nucleus.CellNucleus; import dmg.cells.nucleus.CellPath; import dmg.cells.nucleus.CellRoute; import dmg.cells.nucleus.CellTunnelInfo; import dmg.cells.nucleus.FutureCellMessageAnswerable; import dmg.cells.nucleus.NoRouteToCellException; import org.dcache.util.Args; import static java.util.stream.Collectors.toMap; /** * Routing manager to publish exported cells and topics to peers. The routing manager receives * its own updates from its peers and manages QUEUE and TOPIC routes to other domains. * * Local exports are published to connected satellite domains, while both local exports and * local subscriptions are published to connected core domains. */ public class CoreRoutingManager extends CellAdapter implements CellEventListener { private static final Logger LOG = LoggerFactory.getLogger(CoreRoutingManager.class); private final CellNucleus nucleus; /** Local cells that consume from named queues. */ private final Multimap<String, String> localConsumers = HashMultimap.create(); /** Local cells that subscribed to topics. */ private final Multimap<String, String> localSubscriptions = HashMultimap.create(); /** Role of this domain. */ private final CellDomainRole role; /** * Routing updates from downstream domains are processed sequentially on a dedicated * thread. In the past we have observed that processing updates with many routes can * overwhelm a routing manager and the separate thread allows us to drop repeated * updates from the same domain. */ private final ExecutorService executor = Executors.newSingleThreadExecutor(getNucleus()); /** * Map to collapse update messages from connected domains. */ private final ConcurrentMap<String, CoreRouteUpdate> updates = new ConcurrentHashMap<>(); /** * Topic routes installed by routing manager. */ @GuardedBy("this") private final Multimap<String, String> topicRoutes = HashMultimap.create(); /** * Well known routes installed by routing manager. */ @GuardedBy("this") private final Multimap<String, String> queueRoutes = HashMultimap.create(); /** * Tunnels to core domains. */ @GuardedBy("this") private final Map<CellAddressCore,CellTunnelInfo> coreTunnels = new HashMap<>(); /** * Tunnels to satellite domains. */ @GuardedBy("this") private final Map<CellAddressCore,CellTunnelInfo> satelliteTunnels = new HashMap<>(); /** * Default routes that still have to be added. * * Routing manager when running in a satellite domain adds default routes whenever * it sees a new domain route to a core domain. To give other domains time to connect * to a newly created core domain, the installation of all but the first default route * is delayed for a moment. */ @GuardedBy("this") private final List<CellRoute> delayedDefaultRoutes = new ArrayList<>(); /** * A non-system cell that acts as an early warning if the domain is shutting down. Upon shutdown * we inform all downstream routing managers to remove their default routes pointing to this * domain. That way we can achieve a clean shutdown. */ private volatile CellAdapter canary; public CoreRoutingManager(String name, String arguments) { super(name, "System", arguments); nucleus = getNucleus(); role = getArgs().hasOption("role") ? CellDomainRole.valueOf(getArgs().getOption("role").toUpperCase()) : CellDomainRole.SATELLITE; } @Override protected void starting() throws ExecutionException, InterruptedException { if (role == CellDomainRole.CORE) { canary = new CellAdapter(getCellName() + "-canary", "Generic", "") { @Override protected void stopped() { notifyDownstreamOfDomainDeath(); } @Override public void getInfo(PrintWriter pw) { pw.println("If I die, downstream domains will drop their default routes to my domain."); } }; canary.start().get(); } } private void notifyDownstreamOfDomainDeath() { canary = null; ListenableFuture<List<CellMessage>> future; synchronized (this) { future = sendToPeers( new PeerShutdownNotification(getCellDomainName()), satelliteTunnels.values(), 7000); } try { future.get(); } catch (ExecutionException e) { LOG.info("Failed to notify downstream of shutdown: {}", e.toString()); } catch (InterruptedException ignored) { } } @Override public void stopped() { CellAdapter canary = this.canary; if (canary != null) { getNucleus().kill(canary.getCellName()); } executor.shutdown(); } @Override public synchronized void getInfo(PrintWriter pw) { pw.println("Local consumers: "); println(pw, localConsumers); pw.println(); pw.println("Local subscriptions:"); println(pw, localSubscriptions); pw.println(); pw.println("Managed topic routes:"); println(pw, topicRoutes); pw.println(); pw.println("Managed well-known routes:"); println(pw, queueRoutes); } private void println(PrintWriter pw, Multimap<String, String> map) { for (Map.Entry<String, Collection<String>> e : map.asMap().entrySet()) { pw.append(" ").append(e.getKey()).append(" : ").println(e.getValue()); } } private synchronized void sendToCoreDomains() { sendToPeers(new CoreRouteUpdate(localConsumers.values(), localSubscriptions.values()), coreTunnels.values()); } private synchronized void sendToSatelliteDomains() { sendToPeers(new CoreRouteUpdate(localConsumers.values()), satelliteTunnels.values()); } private void sendToPeers(Serializable msg, Collection<CellTunnelInfo> tunnels) { CellAddressCore peer = new CellAddressCore(nucleus.getCellName()); for (CellTunnelInfo tunnel : tunnels) { CellAddressCore domain = new CellAddressCore("*", tunnel.getRemoteCellDomainInfo().getCellDomainName()); nucleus.sendMessage(new CellMessage(new CellPath(domain, peer), msg), false, true, true); } } private ListenableFuture<List<CellMessage>> sendToPeers(Serializable msg, Collection<CellTunnelInfo> tunnels, long timeout) { List<FutureCellMessageAnswerable> futures = new ArrayList<>(); CellAddressCore peer = new CellAddressCore(nucleus.getCellName()); for (CellTunnelInfo tunnel : tunnels) { CellAddressCore domain = new CellAddressCore("*", tunnel.getRemoteCellDomainInfo().getCellDomainName()); FutureCellMessageAnswerable future = new FutureCellMessageAnswerable(); futures.add(future); nucleus.sendMessage(new CellMessage(new CellPath(domain, peer), msg), false, true, true, future, MoreExecutors.directExecutor(), timeout); } return Futures.allAsList(futures); } private void updateRoutes(String domain, Collection<String> destinations, Multimap<String, String> routes, int type) { Set<String> newDestinations = new HashSet<>(destinations); Iterator<String> iterator = routes.get(domain).iterator(); while (iterator.hasNext()) { String destination = iterator.next(); if (!newDestinations.remove(destination)) { try { nucleus.routeDelete(new CellRoute(destination, "*@" + domain, type)); iterator.remove(); } catch (IllegalArgumentException ignored) { // Route didn't exist } } } for (String destination : newDestinations) { try { nucleus.routeAdd(new CellRoute(destination, "*@" + domain, type)); routes.put(domain, destination); } catch (IllegalArgumentException ignored) { // Already exists } } } private synchronized void updateTopicRoutes(String domain, Collection<String> topics) { updateRoutes(domain, topics, topicRoutes, CellRoute.TOPIC); } private synchronized void updateQueueRoutes(String domain, Collection<String> cells) { updateRoutes(domain, cells, queueRoutes, CellRoute.QUEUE); } @Override public void messageArrived(CellMessage msg) { Serializable obj = msg.getMessageObject(); if (obj instanceof CoreRouteUpdate) { String domain = msg.getSourceAddress().getCellDomainName(); if (updates.put(domain, (CoreRouteUpdate) obj) == null) { executor.execute(() -> { CoreRouteUpdate update = updates.remove(domain); updateTopicRoutes(domain, update.getTopics()); updateQueueRoutes(domain, update.getExports()); }); } } else if (obj instanceof GetAllDomainsRequest) { CellTunnelInfo tunnel; synchronized (this) { tunnel = Iterables.getFirst(coreTunnels.values(), null); } if (role == CellDomainRole.SATELLITE && tunnel != null) { msg.getDestinationPath().insert(new CellPath(nucleus.getCellName(), tunnel.getRemoteCellDomainInfo().getCellDomainName())); nucleus.sendMessage(msg, false, true, false); } else { Map<String, Collection<String>> domains = new HashMap<>(); synchronized (this) { domains.put(getCellDomainName(), new ArrayList<>(localConsumers.values())); Stream.of(coreTunnels, satelliteTunnels) .flatMap(map -> map.values().stream()) .map(CellTunnelInfo::getRemoteCellDomainInfo) .map(CellDomainInfo::getCellDomainName) .forEach(domain -> domains.put(domain, new ArrayList<>())); queueRoutes.asMap().forEach( (domain, cells) -> domains.put(domain, Lists.newArrayList(cells))); } msg.revertDirection(); msg.setMessageObject(new GetAllDomainsReply(domains)); sendMessage(msg); } } else if (obj instanceof NoRouteToCellException) { LOG.info(((NoRouteToCellException) obj).getMessage()); } else if (obj instanceof PeerShutdownNotification) { PeerShutdownNotification notification = (PeerShutdownNotification) obj; String remoteDomain = notification.getDomainName(); synchronized (this) { coreTunnels.values().stream() .filter(i -> i.getRemoteCellDomainInfo().getCellDomainName().equals(remoteDomain)) .forEach(i -> { CellRoute route = new CellRoute(null, i.getTunnel(), CellRoute.DEFAULT); delayedDefaultRoutes.remove(route); if (!hasAlternativeDefaultRoute(route)) { installDefaultRoute(); } try { nucleus.routeDelete(route); } catch (IllegalArgumentException ignored) { } }); } msg.revertDirection(); sendMessage(msg); } else { LOG.warn("Unidentified message ignored: {}", obj); } } @Override public void cellCreated(CellEvent ce) { String name = (String)ce.getSource(); LOG.info("Cell created: {}", name); } @Override public synchronized void cellDied(CellEvent ce) { String name = (String) ce.getSource(); LOG.info("Cell died: {}", name); } @Override public synchronized void routeAdded(CellEvent ce) { super.routeAdded(ce); CellRoute cr = (CellRoute) ce.getSource(); CellAddressCore target = cr.getTarget(); LOG.info("Got 'route added' event: {}", cr); switch (cr.getRouteType()) { case CellRoute.DOMAIN: Optional<CellTunnelInfo> tunnelInfo = getTunnelInfo(target); tunnelInfo .filter(i -> i.getRemoteCellDomainInfo().getRole() == CellDomainRole.CORE) .ifPresent(i -> { coreTunnels.put(i.getTunnel(), i); sendToCoreDomains(); }); tunnelInfo .filter(i -> i.getRemoteCellDomainInfo().getRole() == CellDomainRole.SATELLITE) .ifPresent(i -> { satelliteTunnels.put(i.getTunnel(), i); sendToSatelliteDomains(); }); tunnelInfo .filter(i -> i.getLocalCellDomainInfo().getRole() == CellDomainRole.SATELLITE && i.getRemoteCellDomainInfo().getRole() == CellDomainRole.CORE) .ifPresent(i -> { delayedDefaultRoutes.add(new CellRoute(null, i.getTunnel(), CellRoute.DEFAULT)); if (nucleus.getRoutingTable().hasDefaultRoute()) { invokeLater(this::installDefaultRoute); } else { installDefaultRoute(); } }); break; case CellRoute.TOPIC: String topic = cr.getCellName(); if (target.getCellDomainName().equals(nucleus.getCellDomainName())) { localSubscriptions.put(target.getCellName(), topic); sendToCoreDomains(); } break; case CellRoute.QUEUE: String queue = cr.getCellName(); if (target.getCellDomainName().equals(nucleus.getCellDomainName())) { localConsumers.put(target.getCellName(), queue); sendToCoreDomains(); sendToSatelliteDomains(); } break; case CellRoute.DEFAULT: break; } } @Override public synchronized void routeDeleted(CellEvent ce) { CellRoute cr = (CellRoute) ce.getSource(); CellAddressCore target = cr.getTarget(); LOG.info("Got 'route deleted' event: {}", cr); switch (cr.getRouteType()) { case CellRoute.DOMAIN: updateTopicRoutes(cr.getDomainName(), Collections.emptyList()); updateQueueRoutes(cr.getDomainName(), Collections.emptyList()); coreTunnels.remove(target); satelliteTunnels.remove(target); delayedDefaultRoutes.remove(new CellRoute(null, target, CellRoute.DEFAULT)); break; case CellRoute.TOPIC: String topic = cr.getCellName(); if (target.getCellDomainName().equals(nucleus.getCellDomainName())) { if (localSubscriptions.remove(target.getCellName(), topic)) { sendToCoreDomains(); } } break; case CellRoute.QUEUE: String queue = cr.getCellName(); if (target.getCellDomainName().equals(nucleus.getCellDomainName())) { if (localSubscriptions.remove(target.getCellName(), queue)) { sendToCoreDomains(); sendToSatelliteDomains(); } } break; } } private synchronized void installDefaultRoute() { for (CellRoute route : delayedDefaultRoutes) { try { nucleus.routeAdd(route); } catch (IllegalArgumentException e) { LOG.info("Failed to add route: {}", e.getMessage()); } } delayedDefaultRoutes.clear(); } private boolean hasAlternativeDefaultRoute(CellRoute route) { return Stream.of(nucleus.getRoutingList()) .filter(r -> r.getRouteType() == CellRoute.DEFAULT) .anyMatch(r -> !r.equals(route)); } private Optional<CellTunnelInfo> getTunnelInfo(CellAddressCore tunnel) { return nucleus.getCellTunnelInfos().stream() .filter(i -> i.getTunnel().equals(tunnel)) .findAny(); } /** * This method returns the current state of the RoutingMgr cell as a (binary) Object. * <p> * NB. <b>This is a hack</b>. The correct method of receiving information from a * Cell is via a Vehicle. However, as the RoutingMgr is within the cells module (which * does not have the concept of Vehicles) this cannot be (easily) done. Instead, we * use the existing mechanism of obtaining a binary object via the admin interface and * flag this functionality as something that should be improved later. * * @return a representation of the RoutingManager's little brain. */ @Deprecated public synchronized Object ac_ls_$_0(Args args) { return new Object[] { getCellDomainName(), Sets.newHashSet(localConsumers.values()), queueRoutes.asMap().entrySet().stream().collect( toMap(Map.Entry::getKey, e -> Sets.newHashSet(e.getValue()))) }; } }