package dmg.cells.nucleus; import com.google.common.base.Throwables; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.Monitor; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.SettableFuture; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.curator.framework.CuratorFramework; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; import javax.annotation.Nonnull; import java.io.FileNotFoundException; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import dmg.cells.zookeeper.CellCuratorFramework; import dmg.util.Pinboard; import dmg.util.logback.FilterThresholdSet; import dmg.util.logback.RootFilterThresholds; import org.dcache.util.BoundedCachedExecutor; import org.dcache.util.BoundedExecutor; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.Iterables.consumingIterable; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static org.dcache.util.MathUtils.addWithInfinity; import static org.dcache.util.MathUtils.subWithInfinity; /** * * * @author Patrick Fuhrmann * @version 0.1, 15 Feb 1998 */ public class CellNucleus implements ThreadFactory { private static final Logger LOGGER = LoggerFactory.getLogger(CellNucleus.class); private enum State { NEW(CellInfo.INITIAL, false, false, true), PRE_STARTUP(CellInfo.ACTIVE, true, false, true), POST_STARTUP(CellInfo.ACTIVE, true, true, true), RUNNING(CellInfo.ACTIVE, false, true, true), FAILED(CellInfo.REMOVING, false, true, true), STOPPING(CellInfo.REMOVING, false, true, false), TERMINATED(CellInfo.DEAD, false, false, false); /** State included in CellInfo. */ int externalState; /** * Whether the cell is currently processing startup callbacks. */ boolean isStarting; /** * Whether it is legal for a cell to call {@link #sendMessage(CellMessage, * boolean, boolean, boolean, CellMessageAnswerable, Executor, long)}. */ boolean isSendWithCallbackAllowed; /** * Whether callbacks are guaranteed to be called. At some point * during shutdown, the timeout mechanism is stopped and callbacks * are no longer called automatically. */ boolean areCallbacksGuaranteed; State(int externalState, boolean isStarting, boolean isSendWithCallbackAllowed, boolean areCallbacksGuaranteed) { this.externalState = externalState; this.isStarting = isStarting; this.isSendWithCallbackAllowed = isSendWithCallbackAllowed; this.areCallbacksGuaranteed = areCallbacksGuaranteed; } } private static final int PINBOARD_DEFAULT_SIZE = 200; private static CellGlue __cellGlue; private final String _cellName; private final String _cellType; private final ThreadGroup _threads; private final AtomicInteger _threadCounter = new AtomicInteger(); private final Cell _cell; private final Date _creationTime = new Date(); private volatile State _state = State.NEW; private final ConcurrentMap<UOID, CellLock> _waitHash = new ConcurrentHashMap<>(); private String _cellClass; private final BoundedExecutor _messageExecutor; private final AtomicInteger _eventQueueSize = new AtomicInteger(); /** * Timer for periodic low-priority maintenance tasks. Shared among * all cell instances. Since a Timer is single-threaded, * it is important that the timer is not used for long-running or * blocking tasks, nor for time critical tasks. */ private static final ScheduledExecutorService _timer = Executors.newSingleThreadScheduledExecutor( new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Cell maintenance task timer").build()); /** * Task for calling the Cell nucleus message timeout mechanism. */ private Future<?> _timeoutTask; /** * Task starting the cell. */ private ListenableFuture<Void> _startup; private Pinboard _pinboard; private FilterThresholdSet _loggingThresholds; private final BlockingQueue<Runnable> _deferredTasks = new LinkedBlockingQueue<>(); private volatile long _lastQueueTime; private final CellCuratorFramework _curatorFramework; private final Monitor _lifeCycleMonitor = new Monitor(); private final List<CellEventListener> _cellEventListeners = new CopyOnWriteArrayList<>(); private final Monitor.Guard isNotStarting = new Monitor.Guard(_lifeCycleMonitor) { @Override public boolean isSatisfied() { return !_state.isStarting; } }; public CellNucleus(Cell cell, String name, String type, Executor executor) { String cellName = name.replace('@', '+'); if (cellName.isEmpty()) { cellName = "*"; } if (cellName.charAt(cellName.length() - 1) == '*') { if (cellName.length() == 1) { cellName = "$-" + getUnique(); } else { cellName = cellName.substring(0, cellName.length() - 1) + '-' + getUnique(); } } _cellName = cellName; _cellType = type; _cell = cell; _cellClass = _cell.getClass().getName(); setPinboard(new Pinboard(PINBOARD_DEFAULT_SIZE)); __cellGlue.registerCell(this); /* Instantiate management component for log filtering. */ CellNucleus parentNucleus = CellNucleus.getLogTargetForCell(MDC.get(CDC.MDC_CELL)); FilterThresholdSet parentThresholds = (parentNucleus.isSystemNucleus() || parentNucleus == this) ? RootFilterThresholds.getInstance() : parentNucleus.getLoggingThresholds(); setLoggingThresholds(new FilterThresholdSet(parentThresholds)); _threads = new ThreadGroup(__cellGlue.getMasterThreadGroup(), _cellName + "-threads"); _messageExecutor = (executor == null) ? new BoundedCachedExecutor(this, 1) : new BoundedExecutor(executor, 1); CuratorFramework curatorFramework = __cellGlue.getCuratorFramework(); if (curatorFramework != null) { _curatorFramework = new CellCuratorFramework(curatorFramework, _messageExecutor); _curatorFramework.start(); } else { _curatorFramework = null; } LOGGER.info("Created {}", cellName); } /** * Returns the CellNucleus to which log messages tagged with a * given cell are associated. */ public static CellNucleus getLogTargetForCell(String cell) { CellNucleus nucleus = null; if (__cellGlue != null) { if (cell != null) { nucleus = __cellGlue.getCell(cell); } if (nucleus == null) { nucleus = __cellGlue.getSystemNucleus(); } } return nucleus; } public static void initCellGlue(String cellDomainName, CuratorFramework curatorFramework) { checkState(__cellGlue == null); __cellGlue = new CellGlue(cellDomainName, curatorFramework); } public static void startCurator() { CuratorFramework curatorFramework = __cellGlue.getCuratorFramework(); if (curatorFramework != null) { curatorFramework.start(); } } public static void shutdownCellGlue() { if (__cellGlue != null) { __cellGlue.shutdown(); } } boolean isSystemNucleus() { return this == __cellGlue.getSystemNucleus(); } public String getCellName() { return _cellName; } public String getCellType() { return _cellType; } public String getCellClass() { return _cellClass; } public void setCellClass(String cellClass) { _cellClass = cellClass; } public CellAddressCore getThisAddress() { return new CellAddressCore(_cellName, __cellGlue.getCellDomainName()); } public String getCellDomainName() { return __cellGlue.getCellDomainName(); } public List<String> getCellNames() { return __cellGlue.getCellNames(); } public CellInfo getCellInfo(String name) { return __cellGlue.getCellInfo(name); } public CellInfo getCellInfo() { return _getCellInfo(); } public Map<String, Object> getDomainContext() { return __cellGlue.getCellContext(); } public Reader getDomainContextReader(String contextName) throws FileNotFoundException { Object o = __cellGlue.getCellContext(contextName); if (o == null) { throw new FileNotFoundException("Context not found : " + contextName); } return new StringReader(o.toString()); } public void setDomainContext(String contextName, Object context) { __cellGlue.getCellContext().put(contextName, context); } public Object getDomainContext(String str) { return __cellGlue.getCellContext(str); } Cell getThisCell() { return _cell; } CellInfo _getCellInfo() { CellInfo info = new CellInfo(); info.setCellName(getCellName()); info.setDomainName(getCellDomainName()); info.setCellType(getCellType()); info.setCreationTime(_creationTime); try { info.setCellVersion(_cell.getCellVersion()); } catch(Exception e) {} try { info.setPrivateInfo(_cell.getInfo()); } catch(Exception e) { info.setPrivateInfo("Not yet/No more available\n"); } try { info.setShortInfo(_cell.toString()); } catch(Exception e) { info.setShortInfo("Not yet/No more available"); } info.setCellClass(_cellClass); try { int eventQueueSize = getEventQueueSize(); info.setEventQueueSize(eventQueueSize); info.setExpectedQueueTime((eventQueueSize == 0) ? 0 : _lastQueueTime); info.setState(_state.externalState); info.setThreadCount(_threads.activeCount()); } catch(Exception e) { info.setEventQueueSize(0); info.setState(0); info.setThreadCount(0); } return info; } public void setLoggingThresholds(FilterThresholdSet thresholds) { _loggingThresholds = thresholds; } public FilterThresholdSet getLoggingThresholds() { return _loggingThresholds; } public synchronized void setPinboard(Pinboard pinboard) { _pinboard = pinboard; } public synchronized Pinboard getPinboard() { return _pinboard; } public void setMaximumPoolSize(int size) { _messageExecutor.setMaximumPoolSize(size); } public int getMaximumPoolSize() { return _messageExecutor.getMaximumPoolSize(); } public void setMaximumQueueSize(int size) { _messageExecutor.setMaximumQueueSize(size); } public int getMaximumQueueSize() { return _messageExecutor.getMaximumQueueSize(); } public void sendMessage(CellMessage msg, boolean locally, boolean remotely, boolean shouldAddSource) throws SerializationException { checkArgument(!msg.isFinalDestination(), "Message has no next destination: %s", msg.getDestinationPath()); if (shouldAddSource) { msg.addSourceAddress(getThisAddress()); } EventLogger.sendBegin(msg, "async"); try { __cellGlue.sendMessage(msg, locally, remotely); } finally { EventLogger.sendEnd(msg); } } /** * Sends <code>envelope</code> and waits <code>timeout</code> * milliseconds for an answer to arrive. The answer will bypass * the ordinary queuing mechanism and will be delivered before any * other asynchronous message. The answer need to have the * getLastUOID set to the UOID of the message send with * sendAndWait. If the answer does not arrive withing the specified * time interval, the method returns <code>null</code> and the * answer will be handled as if it was an ordinary asynchronous * message. * * This method mostly exists for backwards compatibility. dCache code * should use CellStub or CellEndpoint. * * @param envelope the cell message to be sent. * @param timeout milliseconds to wait for an answer. * @return the answer or null if the timeout was reached. * @throws SerializationException if the payload object of this * message is not serializable. * @throws NoRouteToCellException if the destination * could not be reached. * @throws ExecutionException if an exception was returned. */ public CellMessage sendAndWait(CellMessage envelope, long timeout) throws SerializationException, NoRouteToCellException, InterruptedException, ExecutionException { final SettableFuture<CellMessage> future = SettableFuture.create(); sendMessage(envelope, true, true, true, new CellMessageAnswerable() { @Override public void answerArrived(CellMessage request, CellMessage answer) { future.set(answer); } @Override public void exceptionArrived(CellMessage request, Exception exception) { future.setException(exception); } @Override public void answerTimedOut(CellMessage request) { future.set(null); } }, directExecutor(), timeout); try { return future.get(timeout, TimeUnit.MILLISECONDS); } catch (TimeoutException e) { return null; } catch (ExecutionException e) { Throwables.throwIfInstanceOf(e.getCause(), NoRouteToCellException.class); Throwables.throwIfInstanceOf(e.getCause(), SerializationException.class); throw e; } } public Map<UOID,CellLock > getWaitQueue() { return Collections.unmodifiableMap(_waitHash); } private void executeMaintenanceTasks() { long now = System.currentTimeMillis(); _waitHash.entrySet().stream() .filter(e -> e.getValue().getTimeout() < now) .forEach(e -> timeOutMessage(e.getKey(), e.getValue(), this::reregisterCallback)); // Execute delayed tasks; since those tasks may themselves add new deferred // tasks we limit the operation to the number of tasks we started out with // to avoid an infinite loop. Iterables.limit(consumingIterable(_deferredTasks), _deferredTasks.size()).forEach(Runnable::run); } /** * Sends <code>msg</code>. * * The <code>callback</code> argument specifies an object which is informed * as soon as an has answer arrived or if the timeout has expired. * * The callback is run in the supplied executor. The executor may * execute the callback inline, but such an executor must only be * used if the callback is non-blocking, and the callback should * refrain from CPU heavy operations. Care should be taken that * the executor isn't blocked by tasks waiting for the callback; * such tasks could lead to a deadlock. * * @param msg the cell message to be sent. * @param local whether to attempt delivery to cells in the same domain * @param remote whether to attempt delivery to cells in other domains * @param shouldAddSource whether to add this cell to the source path * @param callback specifies an object class which will be informed * as soon as the message arrives. * @param executor the executor to run the callback in * @param timeout is the timeout in msec. * @exception SerializationException if the payload object of this * message is not serializable. */ public void sendMessage(CellMessage msg, boolean local, boolean remote, boolean shouldAddSource, CellMessageAnswerable callback, Executor executor, long timeout) throws SerializationException { checkState(_state.isSendWithCallbackAllowed); checkArgument(!msg.isFinalDestination(), "Message has no next destination: %s", msg.getDestinationPath()); if (shouldAddSource) { msg.addSourceAddress(getThisAddress()); } else { checkArgument(msg.getSourcePath().hops() > 0, "Message has no source address."); } msg.setTtl(timeout); UOID uoid = msg.getUOID(); CellLock lock = new CellLock(msg, callback, executor, timeout); EventLogger.sendBegin(msg, "callback"); /* Ordering here is important - need to insert into waitHash before checking the state * to avoid a race with shutdown. */ _waitHash.put(uoid, lock); if (!_state.areCallbacksGuaranteed) { /* Cell is shutting down so timeout the message. */ timeOutMessage(uoid, lock, (u, l) -> {}); return; } try { __cellGlue.sendMessage(msg, local, remote); } catch (SerializationException e) { if (_waitHash.remove(uoid, lock)) { EventLogger.sendEnd(msg); } throw e; } catch (RuntimeException e) { if (_waitHash.remove(uoid, lock)) { try { executor.execute(() -> { try { callback.exceptionArrived(msg, e); EventLogger.sendEnd(msg); } catch (RejectedExecutionException e1) { /* May happen when the callback itself tries to schedule the call * on an executor. Put the request back and let it time out. */ LOGGER.error("Failed to invoke callback: {}", e1.toString()); reregisterCallback(uoid, lock); } }); } catch (RejectedExecutionException e1) { /* Put it back and let it time out. */ LOGGER.error("Failed to invoke callback: {}", e1.toString()); reregisterCallback(uoid, lock); } } else { LOGGER.error("Failed to send message: {}", e.toString()); } } } public void addCellEventListener(CellEventListener listener) { _cellEventListeners.add(listener); } void addToEventQueue(CellEvent ce) { try { _eventQueueSize.incrementAndGet(); _messageExecutor.execute(new CellEventTask(ce)); } catch (RejectedExecutionException e) { _eventQueueSize.decrementAndGet(); LOGGER.error("Dropping event: {}", e.getMessage()); } } public void consume(String queue) { __cellGlue.consume(this, queue); } public void subscribe(String topic) { __cellGlue.subscribe(this, topic); } /** * * The kill method schedules the specified cell for deletion. * The actual remove operation will run in a different * thread. So on return of this method the cell may * or may not be alive. */ public ListenableFuture<?> kill() { return __cellGlue.kill(this); } /** * * The kill method schedules this Cell for deletion. * The actual remove operation will run in a different * thread. So on return of this method the cell may * or may not be alive. */ public ListenableFuture<?> kill(String cellName) { return __cellGlue.kill(this, cellName); } /** * Log the threads of some cell. This is * intended for diagnostic information. */ public static void listThreadGroupOf(String cellName) { __cellGlue.listThreadGroupOf(cellName); } /** * Log the killer threads. This is * intended for diagnostic information. */ public static void listKillerThreadGroup() { __cellGlue.listKillerThreadGroup(); } /** * Print diagnostic information about currently running * threads at warn level. */ public void threadGroupList() { CellGlue.listThreadGroup(_threads); } /** * Blocks until the given cell is dead. * * @throws InterruptedException if another thread interrupted the * current thread before or while the current thread was waiting * for a notification. The interrupted status of the current * thread is cleared when this exception is thrown. * @return True if the cell died, false in case of a timeout. */ public boolean join(String cellName) throws InterruptedException { return __cellGlue.join(cellName, 0); } /** * Blocks until the given cell is dead. * * @param timeout the maximum time to wait in milliseconds. * @throws InterruptedException if another thread interrupted the * current thread before or while the current thread was waiting * for a notification. The interrupted status of the current * thread is cleared when this exception is thrown. * @return True if the cell died, false in case of a timeout. */ public boolean join(String cellName, long timeout) throws InterruptedException { return __cellGlue.join(cellName, timeout); } /** * Returns the non-daemon threads of a thread group. */ private Collection<Thread> getNonDaemonThreads(ThreadGroup group) { Thread[] threads = new Thread[group.activeCount()]; int count = group.enumerate(threads); Collection<Thread> nonDaemonThreads = new ArrayList<>(count); for (int i = 0; i < count; i++) { Thread thread = threads[i]; if (!thread.isDaemon()) { nonDaemonThreads.add(thread); } } return nonDaemonThreads; } /** * Waits for at most timeout milliseconds for the termination of a * set of threads. * * @return true if all threads terminated, false otherwise */ private boolean joinThreads(Collection<Thread> threads, long timeout) throws InterruptedException { long deadline = addWithInfinity(System.currentTimeMillis(), timeout); for (Thread thread: threads) { if (thread.isAlive()) { long wait = subWithInfinity(deadline, System.currentTimeMillis()); if (wait <= 0) { return false; } thread.join(wait); if (thread.isAlive()) { return false; } } } return true; } /** * Interrupts a set of threads. */ private void killThreads(Collection<Thread> threads) { for (Thread thread: threads) { if (thread.isAlive()) { LOGGER.warn("Forcefully interrupting thread {} during shutdown.", thread.getName()); thread.interrupt(); } } } private Runnable wrapLoggingContext(final Runnable runnable) { return () -> { try (CDC ignored = CDC.reset(CellNucleus.this)) { runnable.run(); } }; } private <T> Callable<T> wrapLoggingContext(final Callable<T> callable) { return () -> { try (CDC ignored = CDC.reset(CellNucleus.this)) { return callable.call(); } }; } /** * Submits a task for execution on the message thread. */ <T> Future<T> invokeOnMessageThread(Callable<T> task) { return _messageExecutor.submit(wrapLoggingContext(task)); } /** * Submits a task for execution on the message thread. */ Future<?> invokeOnMessageThread(Runnable task) { return _messageExecutor.submit(wrapLoggingContext(task)); } void invokeLater(Runnable runnable) { _deferredTasks.add(wrapLoggingContext(runnable)); } void runDeferredTasksNow() { _timer.execute(() -> consumingIterable(_deferredTasks).forEach(Runnable::run)); } @Override @Nonnull public Thread newThread(@Nonnull Runnable target) { return newThread(target, getCellName() + '-' + _threadCounter.getAndIncrement()); } @Nonnull public Thread newThread(@Nonnull Runnable target, @Nonnull String name) { return CellGlue.newThread(_threads, wrapLoggingContext(target), name); } // // package // Thread [] getThreads(String cellName) { return __cellGlue.getThreads(cellName); } public ThreadGroup getThreadGroup() { return _threads; } Thread [] getThreads() { if (_threads == null) { return new Thread[0]; } int threadCount = _threads.activeCount(); Thread [] list = new Thread[threadCount]; int rc = _threads.enumerate(list); if (rc == list.length) { return list; } Thread [] ret = new Thread[rc]; System.arraycopy(list, 0, ret, 0, rc); return ret; } private String getUnique() { return __cellGlue.getUnique(); } int getEventQueueSize() { return _eventQueueSize.get(); } void addToEventQueue(MessageEvent ce) { CellMessage msg = ce.getMessage(); LOGGER.trace("addToEventQueue : message arrived : {}", msg); CellLock lock = _waitHash.remove(msg.getLastUOID()); if (lock != null) { // // we were waiting for you (sync or async) // LOGGER.trace("addToEventQueue : lock found for : {}", msg); try { _eventQueueSize.incrementAndGet(); lock.getExecutor().execute(new CallbackTask(lock, msg)); } catch (RejectedExecutionException e) { _eventQueueSize.decrementAndGet(); /* Put it back; the timeout handler will eventually take care of it. */ LOGGER.error("Dropping reply: {}", e.getMessage()); reregisterCallback(msg.getLastUOID(), lock); } } else { /* Fail fast for requests if the cell is busy. We consider the cell busy * if the last queue time exceeds the TTL of the request. */ if (_eventQueueSize.get() == 0) { _lastQueueTime = 0; } else if (!msg.isReply()) { long queueTime = _lastQueueTime; if (msg.getTtl() < queueTime) { CellMessage envelope = new CellMessage(msg.getSourcePath().revert(), new NoRouteToCellException(msg, getCellName() + '@' + getCellDomainName() + " is busy (its estimated response time of " + queueTime + " ms is longer than the message TTL of " + msg.getTtl() + " ms).")); envelope.setLastUOID(msg.getUOID()); sendMessage(envelope, true, true, true); } } try { EventLogger.queueBegin(ce); _eventQueueSize.incrementAndGet(); _messageExecutor.execute(new DeliverMessageTask(ce)); } catch (RejectedExecutionException e) { EventLogger.queueEnd(ce); _eventQueueSize.decrementAndGet(); LOGGER.error("Dropping message: {}", e.getMessage()); } } } private void setState(State newState) { _lifeCycleMonitor.enter(); try { _state = newState; } finally { _lifeCycleMonitor.leave(); } } /** * Starts the cell asynchronously. * * Calls the startup callbacks of the cell, registers the cell with the cell glue and * initiates cell message delivery. If startup fails, the cell is torn down. * * Must only be called once. */ public ListenableFuture<Void> start() { _lifeCycleMonitor.enter(); try { checkState(_state == State.NEW); _state = State.PRE_STARTUP; _startup = _messageExecutor.submit(wrapLoggingContext(this::doStart)); } finally { _lifeCycleMonitor.leave(); } return Futures.nonCancellationPropagating(_startup); } private Void doStart() throws Exception { try { checkState(_state == State.PRE_STARTUP); _timeoutTask = _timer.scheduleWithFixedDelay(wrapLoggingContext(this::executeMaintenanceTasks), 20, 20, TimeUnit.SECONDS); StartEvent event = new StartEvent(new CellPath(_cellName), 0); try { EventLogger.prepareSetupBegin(_cell, event); _cell.prepareStartup(event); } finally { EventLogger.prepareSetupEnd(_cell, event); } setState(State.POST_STARTUP); __cellGlue.publishCell(this); try { EventLogger.postStartupBegin(_cell, event); _cell.postStartup(event); } finally { EventLogger.postStartupEnd(_cell, event); } setState(State.RUNNING); } catch (Throwable e) { setState(State.FAILED); __cellGlue.kill(CellNucleus.this); throw e; } return null; } void shutdown(KillEvent event) { try (CDC ignored = CDC.reset(CellNucleus.this)) { LOGGER.trace("Received {}", event); /* Wait for cell initialization to complete to ensure sequential execution of callbacks. */ boolean wasRunning = false; _lifeCycleMonitor.enter(); try { if (!_lifeCycleMonitor.waitForUninterruptibly(isNotStarting, 2, TimeUnit.SECONDS)) { _startup.cancel(true); _lifeCycleMonitor.waitForUninterruptibly(isNotStarting); } State state = _state; checkState(state == State.NEW || state == State.RUNNING || state == State.FAILED); wasRunning = (state == State.RUNNING); _state = State.STOPPING; } finally { _lifeCycleMonitor.leave(); } /* Stop executing deferred tasks. */ if (_timeoutTask != null) { _timeoutTask.cancel(false); try { Uninterruptibles.getUninterruptibly(_timeoutTask); } catch (CancellationException | ExecutionException ignore) { } } /* Only call prepareRemoval if prepareStartup completed successfully. */ if (wasRunning) { try { Uninterruptibles.getUninterruptibly(_messageExecutor.submit(() -> { try { EventLogger.prepareRemovalBegin(_cell, event); _cell.prepareRemoval(event); } finally { EventLogger.prepareRemovalEnd(_cell, event); }})); } catch (Throwable e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } } /* Cancel callbacks. */ _waitHash.forEach((uoid, lock) -> timeOutMessage(uoid, lock, (u, l) -> {})); /* Shut down the curator decorator; this just kills the internal executor of the decorator * while still allowing it to be used for operations without callbacks. */ if (_curatorFramework != null) { _curatorFramework.close(); } /* Shut down message executor. */ if (!MoreExecutors.shutdownAndAwaitTermination(_messageExecutor, 2, TimeUnit.SECONDS)) { LOGGER.warn("Failed to flush message queue during shutdown."); } /* Shut down cell. */ try { EventLogger.postRemovalBegin(_cell, event); _cell.postRemoval(event); } catch (Throwable e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } finally { EventLogger.postRemovalEnd(_cell, event); } /* Shut down remaining threads. */ LOGGER.debug("Waiting for all threads in {} to finish", _threads); try { Collection<Thread> threads = getNonDaemonThreads(_threads); /* Some threads shut down asynchronously. Give them * one second before we start to kill them. */ while (!joinThreads(threads, 1000)) { threadGroupList(); killThreads(threads); } _threads.destroy(); } catch (IllegalThreadStateException e) { _threads.setDaemon(true); } catch (InterruptedException e) { LOGGER.warn("Interrupted while waiting for threads"); } /* Declare the cell as dead. */ __cellGlue.destroy(CellNucleus.this); setState(State.TERMINATED); } } /** * Registers a callback, considering that the cell may have already shut down. */ private void reregisterCallback(UOID uoid, CellLock lock) { /* Ordering here is important - need to insert into waitHash before checking the state * to avoid a race with shutdown. */ _waitHash.put(uoid, lock); if (!_state.areCallbacksGuaranteed) { /* The cell is shutting down so we time out the message right away. */ timeOutMessage(uoid, lock, (u, l) -> {}); } } /** * Unregisters the callback and calls its timeout method. If scheduling * the callback fails that task is reregistered for later processing. */ private void timeOutMessage(UOID uoid, CellLock lock, BiConsumer<UOID, CellLock> reregister) { if (_waitHash.remove(uoid, lock)) { try (CDC ignored = lock.getCdc().restore()) { try { lock.getExecutor().execute(() -> { try (CDC ignored2 = lock.getCdc().restore()) { CellMessage envelope = lock.getMessage(); try { lock.getCallback().answerTimedOut(envelope); EventLogger.sendEnd(envelope); } catch (RejectedExecutionException e) { LOGGER.warn("Failed to invoke callback: {}", e.toString()); reregister.accept(uoid, lock); } catch (RuntimeException e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } } }); } catch (RejectedExecutionException e) { /* Put it back and deal with it later. */ reregister.accept(uoid, lock); LOGGER.warn("Failed to invoke callback: {}", e.toString()); } } } } //////////////////////////////////////////////////////////// // // // the routing stuff // /** * Installs a new route in the routing table. * * @param route The route to add * @throws IllegalArgumentException If the route is a duplicate or if it routes through * a non-existing local cell. */ public void routeAdd(CellRoute route) throws IllegalArgumentException { __cellGlue.routeAdd(route); } public void routeDelete(CellRoute route) throws IllegalArgumentException { __cellGlue.routeDelete(route); } CellRoute routeFind(CellAddressCore addr) { return __cellGlue.getRoutingTable().find(addr, true); } public CellRoutingTable getRoutingTable() { return __cellGlue.getRoutingTable(); } public CellRoute [] getRoutingList() { return __cellGlue.getRoutingList(); } // public List<CellTunnelInfo> getCellTunnelInfos() { return __cellGlue.getCellTunnelInfos(); } public CuratorFramework getCuratorFramework() { return _curatorFramework; } // private class CallbackTask implements Runnable { private final CellLock _lock; private final CellMessage _message; public CallbackTask(CellLock lock, CellMessage message) { _lock = lock; _message = message; } @Override public void run() { _eventQueueSize.decrementAndGet(); try (CDC ignored = _lock.getCdc().restore()) { try { _message.getDestinationPath().next(); CellMessageAnswerable callback = _lock.getCallback(); CellMessage request = _lock.getMessage(); try { Object obj = _message.getMessageObject(); if (obj instanceof Exception) { callback.exceptionArrived(request, (Exception) obj); } else { callback.answerArrived(request, _message); } EventLogger.sendEnd(request); } catch (RejectedExecutionException e) { /* May happen when the callback itself tries to schedule the call * on an executor. Put the request back and let it time out. */ LOGGER.error("Failed to invoke callback: {}", e.toString()); reregisterCallback(request.getUOID(), _lock); } LOGGER.trace("addToEventQueue : callback done for : {}", _message); } catch (Throwable e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } } } @Override public String toString() { return "Delivery-of-" + _message; } } private class DeliverMessageTask implements Runnable { private final MessageEvent _event; public DeliverMessageTask(MessageEvent event) { _event = event; } @Override public void run() { try (CDC ignored = CDC.reset(CellNucleus.this)) { try { EventLogger.queueEnd(_event); _lastQueueTime = _event.getMessage().getLocalAge(); _eventQueueSize.decrementAndGet(); if (_event instanceof RoutedMessageEvent) { _cell.messageArrived(_event); } else { CellMessage msg = _event.getMessage(); CDC.setMessageContext(msg); msg.getDestinationPath().next(); try { _cell.messageArrived(_event); } catch (RuntimeException e) { if (!msg.isReply()) { msg.revertDirection(); msg.setMessageObject(e); sendMessage(msg, true, true, true); } throw e; } } } catch (Throwable e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } } } @Override public String toString() { return "Delivery-of-" + _event; } } private class CellEventTask implements Runnable { private final CellEvent _event; public CellEventTask(CellEvent event) { this._event = event; } @Override public void run() { _eventQueueSize.decrementAndGet(); try (CDC ignored = CDC.reset(CellNucleus.this)) { for (CellEventListener listener : _cellEventListeners) { try { switch (_event.getEventType()) { case CellEvent.CELL_CREATED_EVENT: listener.cellCreated(_event); break; case CellEvent.CELL_DIED_EVENT: listener.cellDied(_event); break; case CellEvent.CELL_ROUTE_ADDED_EVENT: listener.routeAdded(_event); break; case CellEvent.CELL_ROUTE_DELETED_EVENT: listener.routeDeleted(_event); break; } } catch (Throwable e) { Thread t = Thread.currentThread(); t.getUncaughtExceptionHandler().uncaughtException(t, e); } } } } } }