/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.geode.distributed.internal; import org.apache.geode.*; import org.apache.geode.admin.GemFireHealthConfig; import org.apache.geode.distributed.DistributedMember; import org.apache.geode.distributed.DistributedSystemDisconnectedException; import org.apache.geode.distributed.Locator; import org.apache.geode.distributed.Role; import org.apache.geode.distributed.internal.locks.ElderState; import org.apache.geode.distributed.internal.membership.*; import org.apache.geode.i18n.StringId; import org.apache.geode.internal.*; import org.apache.geode.internal.admin.remote.AdminConsoleDisconnectMessage; import org.apache.geode.internal.admin.remote.RemoteGfManagerAgent; import org.apache.geode.internal.admin.remote.RemoteTransportConfig; import org.apache.geode.internal.cache.InitialImageOperation; import org.apache.geode.internal.i18n.LocalizedStrings; import org.apache.geode.internal.logging.LogService; import org.apache.geode.internal.logging.LoggingThreadGroup; import org.apache.geode.internal.logging.log4j.AlertAppender; import org.apache.geode.internal.logging.log4j.LocalizedMessage; import org.apache.geode.internal.logging.log4j.LogMarker; import org.apache.geode.internal.net.SocketCreator; import org.apache.geode.internal.sequencelog.MembershipLogger; import org.apache.geode.internal.tcp.Connection; import org.apache.geode.internal.tcp.ConnectionTable; import org.apache.geode.internal.tcp.ReenteredConnectException; import org.apache.geode.internal.util.concurrent.StoppableReentrantLock; import org.apache.logging.log4j.Logger; import java.io.*; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.*; import java.util.concurrent.*; /** * The <code>DistributionManager</code> uses a {@link MembershipManager} to distribute * {@link DistributionMessage messages} queued in {@link MQueue}s. * * <P> * * Code that wishes to send a {@link DistributionMessage} must get the * <code>DistributionManager</code> and invoke {@link #putOutgoing}. * * <P> * * Prior to GemFire 4.0, <code>DistributionManager</code> was an abstract class with two concrete * subclasses, <code>LocalDistributionManager</code> and <code>ConsoleDistributionManager</code>. We * decided that <code>ConsoleDistributionManager</code> (which was used for the GUI console and * admin APIs) did not offer enough interesting functionality to warrant a separate class. More * importantly, it prevented the Cache and admin APIs from being used in the same VM. So, we * refactored the code of those two subclasses into <code>DistributionManager</code>. * * @since GemFire 2.0 * * @see DistributionMessage#process * @see IgnoredByManager */ public class DistributionManager implements DM { private static final Logger logger = LogService.getLogger(); private static final boolean SYNC_EVENTS = Boolean.getBoolean("DistributionManager.syncEvents"); /** * WARNING: setting this to true may break dunit tests. * <p> * see org.apache.geode.cache30.ClearMultiVmCallBkDUnitTest */ public static final boolean INLINE_PROCESS = !Boolean.getBoolean("DistributionManager.enqueueOrderedMessages"); /** * Flag indicating whether to use single Serial-Executor thread or Multiple Serial-executor * thread, */ public static final boolean MULTI_SERIAL_EXECUTORS = !Boolean.getBoolean("DistributionManager.singleSerialExecutor"); /** The name of the distribution manager (identifies it in GemFire) */ public static final String NAME = "GemFire"; /** * The number of milliseconds to wait for distribution-related things to happen */ public static final long TIMEOUT = Long.getLong("DistributionManager.TIMEOUT", -1).longValue(); public static final int PUSHER_THREADS = Integer.getInteger("DistributionManager.PUSHER_THREADS", 50).intValue(); public static final int PUSHER_QUEUE_SIZE = Integer.getInteger("DistributionManager.PUSHER_QUEUE_SIZE", 4096).intValue(); public static final int MAX_WAITING_THREADS = Integer.getInteger("DistributionManager.MAX_WAITING_THREADS", Integer.MAX_VALUE).intValue(); public static final int MAX_PR_META_DATA_CLEANUP_THREADS = Integer.getInteger("DistributionManager.MAX_PR_META_DATA_CLEANUP_THREADS", 1).intValue(); public static final int MAX_THREADS = Integer.getInteger("DistributionManager.MAX_THREADS", 100).intValue(); public static final int MAX_PR_THREADS = Integer.getInteger("DistributionManager.MAX_PR_THREADS", Math.max(Runtime.getRuntime().availableProcessors() * 4, 16)).intValue(); public static final int MAX_FE_THREADS = Integer.getInteger("DistributionManager.MAX_FE_THREADS", Math.max(Runtime.getRuntime().availableProcessors() * 4, 16)).intValue(); // Integer.getInteger("DistributionManager.MAX_THREADS", // max(Runtime.getRuntime().availableProcessors()*2, 2)).intValue(); public static final int INCOMING_QUEUE_LIMIT = Integer.getInteger("DistributionManager.INCOMING_QUEUE_LIMIT", 80000).intValue(); public static final int INCOMING_QUEUE_THROTTLE = Integer.getInteger("DistributionManager.INCOMING_QUEUE_THROTTLE", (int) (INCOMING_QUEUE_LIMIT * 0.75)).intValue(); /** Throttling based on the Queue byte size */ public static final double THROTTLE_PERCENT = (double) (Integer .getInteger("DistributionManager.SERIAL_QUEUE_THROTTLE_PERCENT", 75).intValue()) / 100; public static final int SERIAL_QUEUE_BYTE_LIMIT = Integer .getInteger("DistributionManager.SERIAL_QUEUE_BYTE_LIMIT", (40 * (1024 * 1024))).intValue(); public static final int SERIAL_QUEUE_THROTTLE = Integer.getInteger("DistributionManager.SERIAL_QUEUE_THROTTLE", (int) (SERIAL_QUEUE_BYTE_LIMIT * THROTTLE_PERCENT)).intValue(); public static final int TOTAL_SERIAL_QUEUE_BYTE_LIMIT = Integer.getInteger("DistributionManager.TOTAL_SERIAL_QUEUE_BYTE_LIMIT", (80 * (1024 * 1024))) .intValue(); public static final int TOTAL_SERIAL_QUEUE_THROTTLE = Integer.getInteger("DistributionManager.TOTAL_SERIAL_QUEUE_THROTTLE", (int) (SERIAL_QUEUE_BYTE_LIMIT * THROTTLE_PERCENT)).intValue(); /** Throttling based on the Queue item size */ public static final int SERIAL_QUEUE_SIZE_LIMIT = Integer.getInteger("DistributionManager.SERIAL_QUEUE_SIZE_LIMIT", 20000).intValue(); public static final int SERIAL_QUEUE_SIZE_THROTTLE = Integer.getInteger("DistributionManager.SERIAL_QUEUE_SIZE_THROTTLE", (int) (SERIAL_QUEUE_SIZE_LIMIT * THROTTLE_PERCENT)).intValue(); /** Max number of serial Queue executors, in case of multi-serial-queue executor */ public static final int MAX_SERIAL_QUEUE_THREAD = Integer.getInteger("DistributionManager.MAX_SERIAL_QUEUE_THREAD", 20).intValue(); /** * Whether or not to include link local addresses in the list of addresses we use to determine if * two members are no the same host. * * Added for normura issue 7033 - they have duplicate link local addresses on different boxes */ public static volatile boolean INCLUDE_LINK_LOCAL_ADDRESSES = Boolean.getBoolean(DistributionConfig.GEMFIRE_PREFIX + "IncludeLinkLocalAddresses"); /** The DM type for regular distribution managers */ public static final int NORMAL_DM_TYPE = 10; /** * The DM type for locator distribution managers * * @since GemFire 7.0 */ public static final int LOCATOR_DM_TYPE = 11; /** The DM type for Console (admin-only) distribution managers */ public static final int ADMIN_ONLY_DM_TYPE = 12; public static final int LONER_DM_TYPE = 13; /** * an NIO priority type * * @see org.apache.geode.distributed.internal.PooledDistributionMessage * @see #SERIAL_EXECUTOR * @see #HIGH_PRIORITY_EXECUTOR * @see #WAITING_POOL_EXECUTOR */ public static final int STANDARD_EXECUTOR = 73; /** * an NIO priority type * * @see org.apache.geode.distributed.internal.SerialDistributionMessage * @see #STANDARD_EXECUTOR */ public static final int SERIAL_EXECUTOR = 74; /** * an NIO priority type * * @see org.apache.geode.distributed.internal.HighPriorityDistributionMessage * @see #STANDARD_EXECUTOR */ public static final int HIGH_PRIORITY_EXECUTOR = 75; // 76 not in use /** * an NIO priority type * * @see org.apache.geode.internal.cache.InitialImageOperation * @see #STANDARD_EXECUTOR */ public static final int WAITING_POOL_EXECUTOR = 77; /** * an NIO priority type * * @see org.apache.geode.internal.cache.InitialImageOperation * @see #STANDARD_EXECUTOR */ public static final int PARTITIONED_REGION_EXECUTOR = 78; /** * Executor for view related messages * * @see org.apache.geode.distributed.internal.membership.gms.messages.ViewAckMessage * @see #STANDARD_EXECUTOR */ public static final int VIEW_EXECUTOR = 79; public static final int REGION_FUNCTION_EXECUTION_EXECUTOR = 80; /** The number of open distribution managers in this VM */ private static int openDMs = 0; // /** The stack trace of the last time a console DM was opened */ // private static Exception openStackTrace; /** * Is this VM dedicated to administration (like a GUI console or a JMX agent)? If so, then it * creates {@link #ADMIN_ONLY_DM_TYPE} type distribution managers. * * @since GemFire 4.0 */ public static volatile boolean isDedicatedAdminVM = false; /** * Is this admin agent used for a command line console. This flag controls whether connect will * throw an exception or just wait for a DS if one is not available. If true, we will throw an * exception. * */ public static volatile boolean isCommandLineAdminVM = false; ///////////////////// Instance Fields ////////////////////// /** The id of this distribution manager */ final protected InternalDistributedMember myid; /** The distribution manager type of this dm; set in its constructor. */ private final int dmType; /** * The <code>MembershipListener</code>s that are registered on this manager. */ private final ConcurrentMap membershipListeners; /** A lock to hold while adding and removing membership listeners */ protected final Object membershipListenersLock = new MembershipListenersLock(); /** * The <code>MembershipListener</code>s that are registered on this manager for ALL members. * * @since GemFire 5.7 */ protected volatile Set allMembershipListeners = Collections.EMPTY_SET; /** * A lock to hold while adding and removing all membership listeners. * * @since GemFire 5.7 */ protected final Object allMembershipListenersLock = new MembershipListenersLock(); /** A queue of MemberEvent instances */ protected final BlockingQueue membershipEventQueue = new LinkedBlockingQueue(); /** Used to invoke registered membership listeners in the background. */ private Thread memberEventThread; /** A brief description of this DistributionManager */ protected final String description; /** Statistics about distribution */ protected /* final */ DistributionStats stats; /** Did an exception occur in one of the DM threads? */ protected boolean exceptionInThreads; static ThreadLocal isStartupThread = new ThreadLocal(); protected volatile boolean shutdownMsgSent = false; /** Set to true when this manager is being shutdown */ protected volatile boolean closeInProgress = false; private volatile boolean receivedStartupResponse = false; private volatile String rejectionMessage = null; protected MembershipManager membershipManager; /** The channel through which distributed communication occurs. */ protected DistributionChannel channel; /** * The (non-admin-only) members of the distributed system. This is a map of memberid->memberid for * fast access to canonical ID references. All accesses to this field must be synchronized on * {@link #membersLock}. */ private Map<InternalDistributedMember, InternalDistributedMember> members = Collections.emptyMap(); /** * All (admin and non-admin) members of the distributed system. All accesses to this field must be * synchronized on {@link #membersLock}. */ private Set membersAndAdmin = Collections.emptySet(); /** * Map of all locator members of the distributed system. The value is a collection of locator * strings that are hosted in that member. All accesses to this field must be synchronized on * {@link #membersLock}. */ private Map<InternalDistributedMember, Collection<String>> hostedLocatorsAll = Collections.emptyMap(); /** * Map of all locator members of the distributed system which have the shared configuration. The * value is a collection of locator strings that are hosted in that member. All accesses to this * field must be synchronized on {@link #membersLock}. */ private Map<InternalDistributedMember, Collection<String>> hostedLocatorsWithSharedConfiguration = Collections.emptyMap(); /** * Since 6.6.2 and hereafter we will save the versions here. But pre-6.6.2's * StartupResponseMessage does not contain version. We will assign a default version for them. */ public static final String DEFAULT_VERSION_PRE_6_6_2 = "6.6.0.0"; /** * The lock held while accessing the field references to the following:<br> * 1) {@link #members}<br> * 2) {@link #membersAndAdmin}<br> * 3) {@link #hostedLocatorsAll}<br> * 4) {@link #hostedLocatorsWithSharedConfiguration}<br> */ private final Object membersLock = new MembersLock(); /** * The lock held while writing {@link #adminConsoles}. */ private final Object adminConsolesLock = new Object(); /** * The ids of all known admin consoles Uses Copy on Write. Writers must sync on adminConsolesLock. * Readers don't need to sync. */ private volatile Set<InternalDistributedMember> adminConsoles = Collections.emptySet(); /** The pusher thread */ // private Thread pusher; /** The group of distribution manager threads */ protected LoggingThreadGroup threadGroup; /** Message processing thread pool */ private ThreadPoolExecutor threadPool; /** * High Priority processing thread pool, used for initializing messages such as UpdateAttributes * and CreateRegion messages */ private ThreadPoolExecutor highPriorityPool; /** * Waiting Pool, used for messages that may have to wait on something. Use this separate pool with * an unbounded queue so that waiting runnables don't get in the way of other processing threads. * Used for threads that will most likely have to wait for a region to be finished initializing * before it can proceed */ private ThreadPoolExecutor waitingPool; private ThreadPoolExecutor prMetaDataCleanupThreadPool; /** * Thread used to decouple {@link org.apache.geode.internal.cache.partitioned.PartitionMessage}s * from {@link org.apache.geode.internal.cache.DistributedCacheOperation}s </b> * * @see #SERIAL_EXECUTOR */ private ThreadPoolExecutor partitionedRegionThread; private ThreadPoolExecutor partitionedRegionPool; private ThreadPoolExecutor functionExecutionThread; private ThreadPoolExecutor functionExecutionPool; /** Message processing executor for serial, ordered, messages. */ private ThreadPoolExecutor serialThread; /** * Message processing executor for view messages * * @see org.apache.geode.distributed.internal.membership.gms.messages.ViewAckMessage */ private ThreadPoolExecutor viewThread; /** * If using a throttling queue for the serialThread, we cache the queue here so we can see if * delivery would block */ private ThrottlingMemLinkedQueueWithDMStats serialQueue; /** a map keyed on InternalDistributedMember, to direct channels to other systems */ // protected final Map channelMap = CFactory.createCM(); protected volatile boolean readyForMessages = false; /** * Set to true once this DM is ready to send messages. Note that it is always ready to send the * startup message. */ private volatile boolean readyToSendMsgs = false; private final Object readyToSendMsgsLock = new Object(); /** Is this distribution manager closed? */ protected volatile boolean closed = false; /** * The distributed system to which this distribution manager is connected. */ private InternalDistributedSystem system; /** The remote transport configuration for this dm */ private RemoteTransportConfig transport; /** * The administration agent associated with this distribution manager. */ private volatile RemoteGfManagerAgent agent; private SerialQueuedExecutorPool serialQueuedExecutorPool; private final Semaphore parallelGIIs = new Semaphore(InitialImageOperation.MAX_PARALLEL_GIIS); /** * Map of InetAddress to HashSets of InetAddress, to define equivalences between network interface * cards and hosts. */ private final HashMap<InetAddress, Set<InetAddress>> equivalentHosts = new HashMap<InetAddress, Set<InetAddress>>(); private int distributedSystemId = DistributionConfig.DEFAULT_DISTRIBUTED_SYSTEM_ID; private final Map<InternalDistributedMember, String> redundancyZones = Collections.synchronizedMap(new HashMap<InternalDistributedMember, String>()); private boolean enforceUniqueZone = false; private volatile boolean isSharedConfigEnabledForDS = false; @Override public boolean isSharedConfigurationServiceEnabledForDS() { return isSharedConfigEnabledForDS; } /** * Identifier for function execution threads and any of their children */ public static final InheritableThreadLocal<Boolean> isFunctionExecutionThread = new InheritableThreadLocal<Boolean>() { @Override protected Boolean initialValue() { return Boolean.FALSE; } }; ////////////////////// Static Methods ////////////////////// /** * Given two DistributionManager ids, check to see if they are from the same host address. * * @param id1 a DistributionManager id * @param id2 a DistributionManager id * @return true if id1 and id2 are from the same host, false otherwise */ public static boolean isSameHost(InternalDistributedMember id1, InternalDistributedMember id2) { return (id1.getInetAddress().equals(id2.getInetAddress())); } /** * Creates a new distribution manager and discovers the other members of the distributed system. * Note that it does not check to see whether or not this VM already has a distribution manager. * * @param system The distributed system to which this distribution manager will send messages. */ public static DistributionManager create(InternalDistributedSystem system) { DistributionManager distributionManager = null; try { int vmKind; if (Boolean.getBoolean(InternalLocator.FORCE_LOCATOR_DM_TYPE)) { // if this DM is starting for a locator, set it to be a locator DM vmKind = LOCATOR_DM_TYPE; } else if (isDedicatedAdminVM) { vmKind = ADMIN_ONLY_DM_TYPE; } else { vmKind = NORMAL_DM_TYPE; } RemoteTransportConfig transport = new RemoteTransportConfig(system.getConfig(), vmKind); transport.setIsReconnectingDS(system.isReconnectingDS()); transport.setOldDSMembershipInfo(system.oldDSMembershipInfo()); long start = System.currentTimeMillis(); distributionManager = new DistributionManager(system, transport); distributionManager.assertDistributionManagerType(); { InternalDistributedMember id = distributionManager.getDistributionManagerId(); if (!"".equals(id.getName())) { for (InternalDistributedMember m : (List<InternalDistributedMember>) distributionManager .getViewMembers()) { if (m.equals(id)) { // I'm counting on the members returned by getViewMembers being ordered such that // members that joined before us will precede us AND members that join after us // will succeed us. // SO once we find ourself break out of this loop. break; } if (id.getName().equals(m.getName())) { if (distributionManager.getMembershipManager().verifyMember(m, "member is using the name of " + id)) { throw new IncompatibleSystemException("Member " + id + " could not join this distributed system because the existing member " + m + " used the same name. Set the \"name\" gemfire property to a unique value."); } } } } distributionManager.addNewMember(id); // add ourselves distributionManager.selectElder(); // ShutdownException could be thrown here } // Send out a StartupMessage to the other members. StartupOperation op = new StartupOperation(distributionManager, transport); try { if (!distributionManager.sendStartupMessage(op, true)) { // We'll we didn't hear back from anyone else. We assume that // we're the first one. if (distributionManager.getOtherDistributionManagerIds().size() == 0) { logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_DIDNT_HEAR_BACK_FROM_ANY_OTHER_SYSTEM_I_AM_THE_FIRST_ONE)); } else if (transport.isMcastEnabled()) { // perform a multicast ping test if (!distributionManager.testMulticast()) { logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_RECEIVED_NO_STARTUP_RESPONSES_BUT_OTHER_MEMBERS_EXIST_MULTICAST_IS_NOT_RESPONSIVE)); } } } } catch (InterruptedException ex) { Thread.currentThread().interrupt(); // This is ALWAYS bad; don't consult a CancelCriterion. throw new InternalGemFireException( LocalizedStrings.DistributionManager_INTERRUPTED_WHILE_WAITING_FOR_FIRST_STARTUPRESPONSEMESSAGE .toLocalizedString(), ex); } catch (IncompatibleSystemException ex) { logger.fatal(ex.getMessage(), ex); throw ex; } finally { distributionManager.readyToSendMsgs(); } if (logger.isInfoEnabled()) { long delta = System.currentTimeMillis() - start; Object[] logArgs = new Object[] {distributionManager.getDistributionManagerId(), transport, Integer.valueOf(distributionManager.getOtherDistributionManagerIds().size()), distributionManager.getOtherDistributionManagerIds(), (logger.isInfoEnabled(LogMarker.DM) ? " (VERBOSE, took " + delta + " ms)" : ""), ((distributionManager.getDMType() == ADMIN_ONLY_DM_TYPE) ? " (admin only)" : (distributionManager.getDMType() == LOCATOR_DM_TYPE) ? " (locator)" : "")}; logger.info(LogMarker.DM, LocalizedMessage.create( LocalizedStrings.DistributionManager_DISTRIBUTIONMANAGER_0_STARTED_ON_1_THERE_WERE_2_OTHER_DMS_3_4_5, logArgs)); MembershipLogger.logStartup(distributionManager.getDistributionManagerId()); } return distributionManager; } catch (RuntimeException r) { if (distributionManager != null) { if (logger.isDebugEnabled()) { logger.debug("cleaning up incompletely started DistributionManager due to exception", r); } distributionManager.uncleanShutdown(true); } throw r; } } void runUntilShutdown(Runnable r) { try { r.run(); } catch (CancelException e) { if (logger.isTraceEnabled()) { logger.trace("Caught shutdown exception", e); } } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); if (isCloseInProgress()) { logger.debug("Caught unusual exception during shutdown: {}", t.getMessage(), t); } else { logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_TASK_FAILED_WITH_EXCEPTION), t); } } } volatile Throwable rootCause = null; private static class Stopper extends CancelCriterion { private DistributionManager dm; // validateDM is commented out because expiry threads hit it with // an ugly failure... use only for debugging lingering DM bugs // private String validateDM() { // GemFireCache cache = GemFireCache.getInstance(); // if (cache == null) { // return null; // Distributed system with no cache // } // Object obj = cache.getDistributedSystem(); // if (obj == null) { // return null; // Cache is very dead // } // InternalDistributedSystem ids = (InternalDistributedSystem)obj; // DM current = ids.getDistributionManager(); // if (current != dm) { // String response = // LocalizedStrings.DistributionManager_CURRENT_CACHE_DISTRIBUTIONMANAGER_0_IS_NOT_THE_SAME_AS_1 // .toLocalizedString(new Object[] { current, dm}); // return response; // } // return null; // } Stopper(DistributionManager dm) { this.dm = dm; } @Override public String cancelInProgress() { checkFailure(); // remove call to validateDM() to fix bug 38356 if (dm.shutdownMsgSent) { return LocalizedStrings.DistributionManager__0_MESSAGE_DISTRIBUTION_HAS_TERMINATED .toLocalizedString(dm.toString()); } if (dm.rootCause != null) { return dm.toString() + ": " + dm.rootCause.getMessage(); } // Nope. return null; } @Override public RuntimeException generateCancelledException(Throwable e) { String reason = cancelInProgress(); if (reason == null) { return null; } Throwable rc = dm.rootCause; // volatile read if (rc == null) { // No root cause, specify the one given and be done with it. return new DistributedSystemDisconnectedException(reason, e); } if (e == null) { // Caller did not specify any root cause, so just use our own. return new DistributedSystemDisconnectedException(reason, rc); } // Attempt to stick rootCause at tail end of the exception chain. Throwable nt = e; while (nt.getCause() != null) { nt = nt.getCause(); } if (nt == rc) { // Root cause already in place; we're done return new DistributedSystemDisconnectedException(reason, e); } try { nt.initCause(rc); return new DistributedSystemDisconnectedException(reason, e); } catch (IllegalStateException e2) { // Bug 39496 (Jrockit related) Give up. The following // error is not entirely sane but gives the correct general picture. return new DistributedSystemDisconnectedException(reason, rc); } } } private final Stopper stopper = new Stopper(this); public CancelCriterion getCancelCriterion() { return stopper; } /////////////////////// Constructors /////////////////////// /** * Creates a new <code>DistributionManager</code> by initializing itself, creating the membership * manager and executors * * @param transport The configuration for the communications transport * */ private DistributionManager(RemoteTransportConfig transport, InternalDistributedSystem system) { this.dmType = transport.getVmKind(); this.system = system; this.elderLock = new StoppableReentrantLock(stopper); this.transport = transport; this.membershipListeners = new ConcurrentHashMap(); this.distributedSystemId = system.getConfig().getDistributedSystemId(); { long statId = OSProcess.getId(); /* * deadcoded since we don't know the channel id yet. if (statId == 0 || statId == -1) { statId * = getChannelId(); } */ this.stats = new DistributionStats(system, statId); DistributionStats.enableClockStats = system.getConfig().getEnableTimeStatistics(); } this.exceptionInThreads = false; // Start the processing threads final LoggingThreadGroup group = LoggingThreadGroup.createThreadGroup("DistributionManager Threads", logger); this.threadGroup = group; boolean finishedConstructor = false; try { if (MULTI_SERIAL_EXECUTORS) { if (logger.isInfoEnabled(LogMarker.DM)) { logger.info(LogMarker.DM, "Serial Queue info :" + " THROTTLE_PERCENT: " + THROTTLE_PERCENT + " SERIAL_QUEUE_BYTE_LIMIT :" + SERIAL_QUEUE_BYTE_LIMIT + " SERIAL_QUEUE_THROTTLE :" + SERIAL_QUEUE_THROTTLE + " TOTAL_SERIAL_QUEUE_BYTE_LIMIT :" + TOTAL_SERIAL_QUEUE_BYTE_LIMIT + " TOTAL_SERIAL_QUEUE_THROTTLE :" + TOTAL_SERIAL_QUEUE_THROTTLE + " SERIAL_QUEUE_SIZE_LIMIT :" + SERIAL_QUEUE_SIZE_LIMIT + " SERIAL_QUEUE_SIZE_THROTTLE :" + SERIAL_QUEUE_SIZE_THROTTLE); } // when TCP/IP is disabled we can't throttle the serial queue or we run the risk of // distributed deadlock when we block the UDP reader thread boolean throttlingDisabled = system.getConfig().getDisableTcp(); this.serialQueuedExecutorPool = new SerialQueuedExecutorPool(this.threadGroup, this.stats, throttlingDisabled); } { BlockingQueue poolQueue; if (SERIAL_QUEUE_BYTE_LIMIT == 0) { poolQueue = new OverflowQueueWithDMStats(this.stats.getSerialQueueHelper()); } else { this.serialQueue = new ThrottlingMemLinkedQueueWithDMStats(TOTAL_SERIAL_QUEUE_BYTE_LIMIT, TOTAL_SERIAL_QUEUE_THROTTLE, SERIAL_QUEUE_SIZE_LIMIT, SERIAL_QUEUE_SIZE_THROTTLE, this.stats.getSerialQueueHelper()); poolQueue = this.serialQueue; } ThreadFactory tf = new ThreadFactory() { public Thread newThread(final Runnable command) { DistributionManager.this.stats.incSerialThreadStarts(); final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incNumSerialThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); // command.run(); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incNumSerialThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_SERIAL_MESSAGE_PROCESSOR.toLocalizedString()); thread.setDaemon(true); return thread; } }; SerialQueuedExecutorWithDMStats executor = new SerialQueuedExecutorWithDMStats(poolQueue, this.stats.getSerialProcessorHelper(), tf); this.serialThread = executor; } { BlockingQueue q = new LinkedBlockingQueue(); ThreadFactory tf = new ThreadFactory() { public Thread newThread(final Runnable command) { DistributionManager.this.stats.incViewThreadStarts(); final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incNumViewThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incNumViewThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_VIEW_MESSAGE_PROCESSOR.toLocalizedString()); thread.setDaemon(true); return thread; } }; this.viewThread = new SerialQueuedExecutorWithDMStats(q, this.stats.getViewProcessorHelper(), tf); } { BlockingQueue poolQueue; if (INCOMING_QUEUE_LIMIT == 0) { poolQueue = new OverflowQueueWithDMStats(this.stats.getOverflowQueueHelper()); } else { poolQueue = new OverflowQueueWithDMStats(INCOMING_QUEUE_LIMIT, this.stats.getOverflowQueueHelper()); } ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incProcessingThreadStarts(); final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incNumProcessingThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incNumProcessingThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_POOLED_MESSAGE_PROCESSOR.toLocalizedString() + (next++)); thread.setDaemon(true); return thread; } }; ThreadPoolExecutor pool = new PooledExecutorWithDMStats(poolQueue, MAX_THREADS, this.stats.getNormalPoolHelper(), tf); this.threadPool = pool; } { BlockingQueue poolQueue; if (INCOMING_QUEUE_LIMIT == 0) { poolQueue = new OverflowQueueWithDMStats(this.stats.getHighPriorityQueueHelper()); } else { poolQueue = new OverflowQueueWithDMStats(INCOMING_QUEUE_LIMIT, this.stats.getHighPriorityQueueHelper()); } ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incHighPriorityThreadStarts(); final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incHighPriorityThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incHighPriorityThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_POOLED_HIGH_PRIORITY_MESSAGE_PROCESSOR .toLocalizedString() + (next++)); thread.setDaemon(true); return thread; } }; this.highPriorityPool = new PooledExecutorWithDMStats(poolQueue, MAX_THREADS, this.stats.getHighPriorityPoolHelper(), tf); } { ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incWaitingThreadStarts(); final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incWaitingThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incWaitingThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_POOLED_WAITING_MESSAGE_PROCESSOR .toLocalizedString() + (next++)); thread.setDaemon(true); return thread; } }; BlockingQueue poolQueue; if (MAX_WAITING_THREADS == Integer.MAX_VALUE) { // no need for a queue since we have infinite threads poolQueue = new SynchronousQueue(); } else { poolQueue = new OverflowQueueWithDMStats(this.stats.getWaitingQueueHelper()); } this.waitingPool = new PooledExecutorWithDMStats(poolQueue, MAX_WAITING_THREADS, this.stats.getWaitingPoolHelper(), tf); } { ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incWaitingThreadStarts();// will it be ok? final Runnable r = new Runnable() { public void run() { DistributionManager.this.stats.incWaitingThreads(1);// will it be ok try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); DistributionManager.this.stats.incWaitingThreads(-1); } } }; Thread thread = new Thread(group, r, LocalizedStrings.DistributionManager_PR_META_DATA_CLEANUP_MESSAGE_PROCESSOR .toLocalizedString() + (next++)); thread.setDaemon(true); return thread; } }; BlockingQueue poolQueue; poolQueue = new OverflowQueueWithDMStats(this.stats.getWaitingQueueHelper()); this.prMetaDataCleanupThreadPool = new PooledExecutorWithDMStats(poolQueue, MAX_PR_META_DATA_CLEANUP_THREADS, this.stats.getWaitingPoolHelper(), tf); } { BlockingQueue poolQueue; if (INCOMING_QUEUE_LIMIT == 0) { poolQueue = new OverflowQueueWithDMStats(this.stats.getPartitionedRegionQueueHelper()); } else { poolQueue = new OverflowQueueWithDMStats(INCOMING_QUEUE_LIMIT, this.stats.getPartitionedRegionQueueHelper()); } ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incPartitionedRegionThreadStarts(); final Runnable r = new Runnable() { public void run() { stats.incPartitionedRegionThreads(1); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); stats.incPartitionedRegionThreads(-1); } } }; Thread thread = new Thread(group, r, "PartitionedRegion Message Processor" + (next++)); thread.setDaemon(true); return thread; } }; if (MAX_PR_THREADS > 1) { this.partitionedRegionPool = new PooledExecutorWithDMStats(poolQueue, MAX_PR_THREADS, this.stats.getPartitionedRegionPoolHelper(), tf); } else { SerialQueuedExecutorWithDMStats executor = new SerialQueuedExecutorWithDMStats(poolQueue, this.stats.getPartitionedRegionPoolHelper(), tf); this.partitionedRegionThread = executor; } } { BlockingQueue poolQueue; if (INCOMING_QUEUE_LIMIT == 0) { poolQueue = new OverflowQueueWithDMStats(this.stats.getFunctionExecutionQueueHelper()); } else { poolQueue = new OverflowQueueWithDMStats(INCOMING_QUEUE_LIMIT, this.stats.getFunctionExecutionQueueHelper()); } ThreadFactory tf = new ThreadFactory() { private int next = 0; public Thread newThread(final Runnable command) { DistributionManager.this.stats.incFunctionExecutionThreadStarts(); final Runnable r = new Runnable() { public void run() { stats.incFunctionExecutionThreads(1); isFunctionExecutionThread.set(Boolean.TRUE); try { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); runUntilShutdown(command); } finally { ConnectionTable.releaseThreadsSockets(); stats.incFunctionExecutionThreads(-1); } } }; Thread thread = new Thread(group, r, "Function Execution Processor" + (next++)); thread.setDaemon(true); return thread; } }; if (MAX_FE_THREADS > 1) { this.functionExecutionPool = new FunctionExecutionPooledExecutor(poolQueue, MAX_FE_THREADS, this.stats.getFunctionExecutionPoolHelper(), tf, true /* for fn exec */); } else { SerialQueuedExecutorWithDMStats executor = new SerialQueuedExecutorWithDMStats(poolQueue, this.stats.getFunctionExecutionPoolHelper(), tf); this.functionExecutionThread = executor; } } if (!SYNC_EVENTS) { this.memberEventThread = new Thread(group, new MemberEventInvoker(), "DM-MemberEventInvoker"); this.memberEventThread.setDaemon(true); } StringBuffer sb = new StringBuffer(" (took "); long start = System.currentTimeMillis(); // Create direct channel first // DirectChannel dc = new DirectChannel(new MyListener(this), system.getConfig(), logger, // null); // setDirectChannelPort(dc.getPort()); // store in a thread local // connect to JGroups start = System.currentTimeMillis(); MyListener l = new MyListener(this); membershipManager = MemberFactory.newMembershipManager(l, system.getConfig(), transport, stats); sb.append(System.currentTimeMillis() - start); this.myid = membershipManager.getLocalMember(); // dc.patchUpAddress(this.myid); // id.setDirectChannelPort(dc.getPort()); // create the distribution channel this.channel = new DistributionChannel(membershipManager); membershipManager.postConnect(); // Assert.assertTrue(this.getChannelMap().size() >= 1); // System.out.println("Channel Map:"); // for (Iterator iter = this.getChannelMap().entrySet().iterator(); // iter.hasNext(); ) { // Map.Entry entry = (Map.Entry) iter.next(); // Object key = entry.getKey(); // System.out.println(" " + key + " a " + // key.getClass().getName() + " -> " + // entry.getValue()); // } sb.append(" ms)"); logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_STARTING_DISTRIBUTIONMANAGER_0_1, new Object[] {this.myid, (logger.isInfoEnabled(LogMarker.DM) ? sb.toString() : "")})); this.description = NAME + " on " + this.myid + " started at " + (new Date(System.currentTimeMillis())).toString(); finishedConstructor = true; } finally { if (!finishedConstructor) { askThreadsToStop(); // fix for bug 42039 } } } /** * Creates a new distribution manager * * @param system The distributed system to which this distribution manager will send messages. */ private DistributionManager(InternalDistributedSystem system, RemoteTransportConfig transport) { this(transport, system); boolean finishedConstructor = false; try { isStartupThread.set(Boolean.TRUE); startThreads(); // Since we need a StartupResponseMessage to make sure licenses // are compatible the following has been deadcoded. // // For the time being, invoke processStartupResponse() // String rejectionMessage = null; // if (GemFireVersion.getGemFireVersion(). // equals(state.getGemFireVersion())) { // rejectionMessage = "Rejected new system node " + // this.getDistributionManagerId() + " with version \"" + // GemFireVersion.getGemFireVersion() + // "\" because the distributed system's version is \"" + // state.getGemFireVersion() + "\"."; // } // this.processStartupResponse(state.getCacheTime(), // rejectionMessage); // Allow events to start being processed. membershipManager.startEventProcessing(); for (;;) { this.getCancelCriterion().checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { membershipManager.waitForEventProcessing(); break; } catch (InterruptedException e) { interrupted = true; } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } synchronized (DistributionManager.class) { openDMs++; } finishedConstructor = true; } finally { if (!finishedConstructor) { askThreadsToStop(); // fix for bug 42039 } } } //////////////////// Instance Methods ///////////////////// /** * Returns true if the two members are on the same equivalent host based on overlapping IP * addresses collected for all NICs during exchange of startup messages. * * @param member1 First member * @param member2 Second member */ public boolean areOnEquivalentHost(InternalDistributedMember member1, InternalDistributedMember member2) { Set<InetAddress> equivalents1 = getEquivalents(member1.getInetAddress()); return equivalents1.contains(member2.getInetAddress()); } /** * Set the host equivalencies for a given host. This overrides any previous information in the * tables. * * @param equivs list of InetAddress's that all point at same host */ public void setEquivalentHosts(Set<InetAddress> equivs) { Iterator<InetAddress> it = equivs.iterator(); synchronized (equivalentHosts) { while (it.hasNext()) { equivalentHosts.put(it.next(), Collections.unmodifiableSet(equivs)); } } } public HashMap<InetAddress, Set<InetAddress>> getEquivalentHostsSnapshot() { synchronized (this.equivalentHosts) { return new HashMap<InetAddress, Set<InetAddress>>(this.equivalentHosts); } } /** * Return all of the InetAddress's that are equivalent to the given one (same host) * * @param in host to match up * @return all the addresses thus equivalent */ public Set<InetAddress> getEquivalents(InetAddress in) { Set<InetAddress> result; synchronized (equivalentHosts) { result = equivalentHosts.get(in); } // DS 11/25/08 - It appears that when using VPN, the distributed member // id is the vpn address, but that doesn't show up in the equivalents. if (result == null) { result = Collections.singleton(in); } return result; } public void setRedundancyZone(InternalDistributedMember member, String redundancyZone) { if (redundancyZone != null && !redundancyZone.equals("")) { this.redundancyZones.put(member, redundancyZone); } if (member != getDistributionManagerId()) { String relationship = areInSameZone(getDistributionManagerId(), member) ? "" : "not "; Object[] logArgs = new Object[] {member, relationship}; logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_DISTRIBUTIONMANAGER_MEMBER_0_IS_1_EQUIVALENT, logArgs)); } } /** * Set the flag indicating that we should enforce unique zones. If we are already enforcing unique * zones, keep it that way. */ public void setEnforceUniqueZone(boolean enforceUniqueZone) { this.enforceUniqueZone |= enforceUniqueZone; } public boolean enforceUniqueZone() { return enforceUniqueZone; } public String getRedundancyZone(InternalDistributedMember member) { return redundancyZones.get(member); } /** * Asserts that distributionManagerType is LOCAL, GEMFIRE, or ADMIN_ONLY. Also asserts that the * distributionManagerId (jgroups DistributedMember) has a VmKind that matches. */ private void assertDistributionManagerType() { // Assert that dmType is one of the three DM types... int theDmType = getDMType(); switch (theDmType) { case NORMAL_DM_TYPE: case LONER_DM_TYPE: case ADMIN_ONLY_DM_TYPE: case LOCATOR_DM_TYPE: break; default: Assert.assertTrue(false, "unknown distribution manager type"); } // Assert InternalDistributedMember VmKind matches this DistributionManagerType... final InternalDistributedMember theId = getDistributionManagerId(); final int vmKind = theId.getVmKind(); if (theDmType != vmKind) { Assert.assertTrue(false, "InternalDistributedMember has a vmKind of " + vmKind + " instead of " + theDmType); } } public int getDMType() { return this.dmType; } public List<InternalDistributedMember> getViewMembers() { NetView result = null; DistributionChannel ch = this.channel; if (ch != null) { MembershipManager mgr = ch.getMembershipManager(); if (mgr != null) { result = mgr.getView(); } } if (result == null) { result = new NetView(); } return result.getMembers(); } /* implementation of DM.getOldestMember */ public DistributedMember getOldestMember(Collection c) throws NoSuchElementException { List<InternalDistributedMember> view = getViewMembers(); for (int i = 0; i < view.size(); i++) { Object viewMbr = view.get(i); Iterator it = c.iterator(); while (it.hasNext()) { Object nextMbr = it.next(); if (viewMbr.equals(nextMbr)) { return (DistributedMember) nextMbr; } } } throw new NoSuchElementException( LocalizedStrings.DistributionManager_NONE_OF_THE_GIVEN_MANAGERS_IS_IN_THE_CURRENT_MEMBERSHIP_VIEW .toLocalizedString()); } private boolean testMulticast() { return this.membershipManager.testMulticast(); } /** * Print a membership view (list of {@link InternalDistributedMember}s) * * @param v the list * @return String */ static public String printView(NetView v) { if (v == null) return "null"; return v.toString(); } /** * Need to do this outside the constructor so that the child constructor can finish. */ protected void startThreads() { this.system.setDM(this); // fix for bug 33362 if (this.memberEventThread != null) this.memberEventThread.start(); try { // And the distinguished guests today are... NetView v = membershipManager.getView(); logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_INITIAL_MEMBERSHIPMANAGER_VIEW___0, printView(v))); // Add them all to our view Iterator<InternalDistributedMember> it = v.getMembers().iterator(); while (it.hasNext()) { addNewMember(it.next()); } // Figure out who the elder is... selectElder(); // ShutdownException could be thrown here } catch (Exception ex) { throw new InternalGemFireException( LocalizedStrings.DistributionManager_COULD_NOT_PROCESS_INITIAL_VIEW.toLocalizedString(), ex); } try { getWaitingThreadPool().execute(new Runnable() { public void run() { // call in background since it might need to send a reply // and we are not ready to send messages until startup is finished isStartupThread.set(Boolean.TRUE); readyForMessages(); } }); } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); logger.fatal(LocalizedMessage.create( LocalizedStrings.DistributionManager_UNCAUGHT_EXCEPTION_CALLING_READYFORMESSAGES), t); } } protected void readyForMessages() { synchronized (this) { this.readyForMessages = true; this.notifyAll(); } membershipManager.startEventProcessing(); } protected void waitUntilReadyForMessages() { if (readyForMessages) return; // membershipManager.waitForEventProcessing(); synchronized (this) { for (;;) { if (readyForMessages) break; stopper.checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { this.wait(); } catch (InterruptedException e) { interrupted = true; stopper.checkCancelInProgress(e); } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } // for } // synchronized } /** * Call when the DM is ready to send messages. */ private void readyToSendMsgs() { synchronized (this.readyToSendMsgsLock) { this.readyToSendMsgs = true; this.readyToSendMsgsLock.notifyAll(); } } /** * Return when DM is ready to send out messages. * * @param msg the messsage that is currently being sent */ protected void waitUntilReadyToSendMsgs(DistributionMessage msg) { if (this.readyToSendMsgs) { return; } // another process may have been started in the same view, so we need // to be responsive to startup messages and be able to send responses if (msg instanceof StartupMessage || msg instanceof StartupResponseMessage || msg instanceof AdminMessageType) { return; } if (isStartupThread.get() != null) { // let the startup thread send messages // the only case I know of that does this is if we happen to log a // message during startup and an alert listener has registered. return; } // membershipManager.waitForEventProcessing(); synchronized (this.readyToSendMsgsLock) { for (;;) { if (this.readyToSendMsgs) break; stopper.checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { this.readyToSendMsgsLock.wait(); } catch (InterruptedException e) { interrupted = true; stopper.checkCancelInProgress(e); } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } // for } // synchronized } // DM method @Override public void forceUDPMessagingForCurrentThread() { membershipManager.forceUDPMessagingForCurrentThread(); } // DM method @Override public void releaseUDPMessagingForCurrentThread() { membershipManager.releaseUDPMessagingForCurrentThread(); } /** * Did an exception occur in one of the threads launched by this distribution manager? */ public boolean exceptionInThreads() { return this.exceptionInThreads || this.threadGroup.getUncaughtExceptionsCount() > 0; } /** * Clears the boolean that determines whether or not an exception occurred in one of the worker * threads. This method should be used for testing purposes only! */ void clearExceptionInThreads() { this.exceptionInThreads = false; this.threadGroup.clearUncaughtExceptionsCount(); } /** * Returns the current "cache time" in milliseconds since the epoch. The "cache time" takes into * account skew among the local clocks on the various machines involved in the cache. */ public long cacheTimeMillis() { return this.system.getClock().cacheTimeMillis(); } /** * Returns the id of this distribution manager. */ public InternalDistributedMember getDistributionManagerId() { return this.myid; } /** * Returns an unmodifiable set containing the identities of all of the known (non-admin-only) * distribution managers. */ public Set getDistributionManagerIds() { // access to members synchronized under membersLock in order to // ensure serialization synchronized (this.membersLock) { return this.members.keySet(); } } /** * Adds the entry in {@link #hostedLocatorsAll} for a member with one or more hosted locators. The * value is a collection of host[port] strings. If a bind-address was used for a locator then the * form is bind-addr[port]. * * @since GemFire 6.6.3 */ public void addHostedLocators(InternalDistributedMember member, Collection<String> locators, boolean isSharedConfigurationEnabled) { synchronized (this.membersLock) { if (locators == null || locators.isEmpty()) { throw new IllegalArgumentException("Cannot use empty collection of locators"); } if (this.hostedLocatorsAll.isEmpty()) { this.hostedLocatorsAll = new HashMap<InternalDistributedMember, Collection<String>>(); } if (!this.isSharedConfigEnabledForDS) { this.isSharedConfigEnabledForDS = isSharedConfigurationEnabled; } Map<InternalDistributedMember, Collection<String>> tmp = new HashMap<InternalDistributedMember, Collection<String>>(this.hostedLocatorsAll); tmp.remove(member); tmp.put(member, locators); tmp = Collections.unmodifiableMap(tmp); this.hostedLocatorsAll = tmp; if (isSharedConfigurationEnabled) { if (locators == null || locators.isEmpty()) { throw new IllegalArgumentException("Cannot use empty collection of locators"); } if (this.hostedLocatorsWithSharedConfiguration.isEmpty()) { this.hostedLocatorsWithSharedConfiguration = new HashMap<InternalDistributedMember, Collection<String>>(); } tmp = new HashMap<InternalDistributedMember, Collection<String>>( this.hostedLocatorsWithSharedConfiguration); tmp.remove(member); tmp.put(member, locators); tmp = Collections.unmodifiableMap(tmp); this.hostedLocatorsWithSharedConfiguration = tmp; } } } private void removeHostedLocators(InternalDistributedMember member) { synchronized (this.membersLock) { if (this.hostedLocatorsAll.containsKey(member)) { Map<InternalDistributedMember, Collection<String>> tmp = new HashMap<InternalDistributedMember, Collection<String>>(this.hostedLocatorsAll); tmp.remove(member); if (tmp.isEmpty()) { tmp = Collections.emptyMap(); } else { tmp = Collections.unmodifiableMap(tmp); } this.hostedLocatorsAll = tmp; } if (this.hostedLocatorsWithSharedConfiguration.containsKey(member)) { Map<InternalDistributedMember, Collection<String>> tmp = new HashMap<InternalDistributedMember, Collection<String>>( this.hostedLocatorsWithSharedConfiguration); tmp.remove(member); if (tmp.isEmpty()) { tmp = Collections.emptyMap(); } else { tmp = Collections.unmodifiableMap(tmp); } this.hostedLocatorsWithSharedConfiguration = tmp; } } } /** * Gets the value in {@link #hostedLocatorsAll} for a member with one or more hosted locators. The * value is a collection of host[port] strings. If a bind-address was used for a locator then the * form is bind-addr[port]. * * @since GemFire 6.6.3 */ public Collection<String> getHostedLocators(InternalDistributedMember member) { synchronized (this.membersLock) { return this.hostedLocatorsAll.get(member); } } /** * Returns a copy of the map of all members hosting locators. The key is the member, and the value * is a collection of host[port] strings. If a bind-address was used for a locator then the form * is bind-addr[port]. * * The member is the vm that hosts one or more locator, if another locator starts up linking to * this locator, it will put that member in this map as well, and this member will in the map on * the other locato vm as well. * * The keyset of the map are the locator vms in this cluster. * * the value is a collection of strings in case one vm can have multiple locators ???? * * @since GemFire 6.6.3 */ public Map<InternalDistributedMember, Collection<String>> getAllHostedLocators() { synchronized (this.membersLock) { return this.hostedLocatorsAll; } } /** * Returns a copy of the map of all members hosting locators with shared configuration. The key is * the member, and the value is a collection of host[port] strings. If a bind-address was used for * a locator then the form is bind-addr[port]. * * @since GemFire 8.0 */ @Override public Map<InternalDistributedMember, Collection<String>> getAllHostedLocatorsWithSharedConfiguration() { synchronized (this.membersLock) { return this.hostedLocatorsWithSharedConfiguration; } } /** * Returns an unmodifiable set containing the identities of all of the known (including admin) * distribution managers. */ public Set getDistributionManagerIdsIncludingAdmin() { // access to members synchronized under membersLock in order to // ensure serialization synchronized (this.membersLock) { return this.membersAndAdmin; } } /** * Returns the low-level distribution channel for this distribution manager. (brought over from * ConsoleDistributionManager) * * @since GemFire 4.0 */ public DistributionChannel getDistributionChannel() { return this.channel; } /** * Returns a private-memory list containing the identities of all the other known distribution * managers not including me. */ public Set getOtherDistributionManagerIds() { // We return a modified copy of the list, so // collect the old list and copy under the lock. Set result = new HashSet(getDistributionManagerIds()); InternalDistributedMember me = getDistributionManagerId(); result.remove(me); // It's okay for my own id to not be in the list of all ids yet. return result; } @Override public Set getOtherNormalDistributionManagerIds() { // We return a modified copy of the list, so // collect the old list and copy under the lock. Set result = new HashSet(getNormalDistributionManagerIds()); InternalDistributedMember me = getDistributionManagerId(); result.remove(me); // It's okay for my own id to not be in the list of all ids yet. return result; } public InternalDistributedMember getCanonicalId(DistributedMember id) { // the members set is copy-on-write, so it is safe to iterate over it InternalDistributedMember result = this.members.get(id); if (result == null) { return (InternalDistributedMember) id; } return result; } /** * Add a membership listener and return other DistribtionManagerIds as an atomic operation */ public Set addMembershipListenerAndGetDistributionManagerIds(MembershipListener l) { // switched sync order to fix bug 30360 synchronized (this.membersLock) { // Don't let the members come and go while we are adding this // listener. This ensures that the listener (probably a // ReplyProcessor) gets a consistent view of the members. addMembershipListener(l); // Note it is ok to return the members set // because we will never modify the returned set. return members.keySet(); } } public void addNewMember(InternalDistributedMember member) { // This is the place to cleanup the zombieMembers int vmType = member.getVmKind(); switch (vmType) { case ADMIN_ONLY_DM_TYPE: handleConsoleStartup(member); break; case LOCATOR_DM_TYPE: case NORMAL_DM_TYPE: handleManagerStartup(member); break; default: throw new InternalGemFireError(LocalizedStrings.DistributionManager_UNKNOWN_MEMBER_TYPE_0 .toLocalizedString(Integer.valueOf(vmType))); } } /** * Returns the identity of this <code>DistributionManager</code> */ public InternalDistributedMember getId() { return this.myid; } /** * Returns the id of the underlying distribution channel used for communication. * * @since GemFire 3.0 */ public long getChannelId() { return this.channel.getId(); } /** * Adds a message to the outgoing queue. Note that <code>message</code> should not be modified * after it has been added to the queue. After <code>message</code> is distributed, it will be * recycled. * * @return list of recipients who did not receive the message * @throws NotSerializableException if the content is not serializable */ public Set putOutgoingUserData(final DistributionMessage message) throws NotSerializableException { return sendMessage(message); } /** * Send outgoing data; message is guaranteed to be serialized. * * @return list of recipients who did not receive the message * @throws InternalGemFireException if message is not serializable */ public Set putOutgoing(final DistributionMessage msg) { try { DistributionMessageObserver observer = DistributionMessageObserver.getInstance(); if (observer != null) { observer.beforeSendMessage(this, msg); } return sendMessage(msg); } catch (NotSerializableException e) { throw new InternalGemFireException(e); } catch (ToDataException e) { // exception from user code throw e; } } @Override public String toString() { return this.description; } /** * @see #closeInProgress */ private final Object shutdownMutex = new Object(); /** * Informs other members that this dm is shutting down. Stops the pusher, puller, and processor * threads and closes the connection to the transport layer. */ protected void shutdown() { // Make sure only one thread initiates shutdown... synchronized (shutdownMutex) { if (closeInProgress) { return; } this.closeInProgress = true; } // synchronized // [bruce] log shutdown at info level and with ID to balance the // "Starting" message. recycleConn.conf is hard to debug w/o this final String exceptionStatus = (this.exceptionInThreads() ? LocalizedStrings.DistributionManager_AT_LEAST_ONE_EXCEPTION_OCCURRED.toLocalizedString() : ""); logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_SHUTTING_DOWN_DISTRIBUTIONMANAGER_0_1, new Object[] {this.myid, exceptionStatus})); final long start = System.currentTimeMillis(); try { if (this.rootCause instanceof ForcedDisconnectException) { if (logger.isDebugEnabled()) { logger.debug( "inhibiting sending of shutdown message to other members due to forced-disconnect"); } } else { // Don't block indefinitely trying to send the shutdown message, in // case other VMs in the system are ill-behaved. (bug 34710) final Runnable r = new Runnable() { public void run() { try { ConnectionTable.threadWantsSharedResources(); sendShutdownMessage(); } catch (final CancelException e) { // We were terminated. logger.debug("Cancelled during shutdown message", e); } } }; final Thread t = new Thread(threadGroup, r, LocalizedStrings.DistributionManager_SHUTDOWN_MESSAGE_THREAD_FOR_0 .toLocalizedString(this.myid)); t.start(); boolean interrupted = Thread.interrupted(); try { t.join(MAX_STOP_TIME); } catch (final InterruptedException e) { interrupted = true; t.interrupt(); logger.warn( LocalizedMessage.create( LocalizedStrings.DistributionManager_INTERRUPTED_SENDING_SHUTDOWN_MESSAGE_TO_PEERS), e); } finally { if (interrupted) { Thread.currentThread().interrupt(); } } if (t.isAlive()) { t.interrupt(); logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_FAILED_SENDING_SHUTDOWN_MESSAGE_TO_PEERS_TIMEOUT)); } } } finally { this.shutdownMsgSent = true; // in case sendShutdownMessage failed.... try { this.uncleanShutdown(false); } finally { final Long delta = Long.valueOf(System.currentTimeMillis() - start); logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_DISTRIBUTIONMANAGER_STOPPED_IN_0_MS, delta)); } } } private void askThreadsToStop() { // Stop executors after they have finished ExecutorService es; es = this.serialThread; if (es != null) { es.shutdown(); } es = this.viewThread; if (es != null) { // Hmmm...OK, I'll let any view events currently in the queue be // processed. Not sure it's very important whether they get // handled... es.shutdown(); } if (this.serialQueuedExecutorPool != null) { this.serialQueuedExecutorPool.shutdown(); } es = this.functionExecutionThread; if (es != null) { es.shutdown(); } es = this.functionExecutionPool; if (es != null) { es.shutdown(); } es = this.partitionedRegionThread; if (es != null) { es.shutdown(); } es = this.partitionedRegionPool; if (es != null) { es.shutdown(); } es = this.highPriorityPool; if (es != null) { es.shutdown(); } es = this.waitingPool; if (es != null) { es.shutdown(); } es = this.prMetaDataCleanupThreadPool; if (es != null) { es.shutdown(); } es = this.threadPool; if (es != null) { es.shutdown(); } Thread th = this.memberEventThread; if (th != null) th.interrupt(); } private void waitForThreadsToStop(long timeInMillis) throws InterruptedException { long start = System.currentTimeMillis(); long remaining = timeInMillis; ExecutorService[] allExecutors = new ExecutorService[] {this.serialThread, this.viewThread, this.functionExecutionThread, this.functionExecutionPool, this.partitionedRegionThread, this.partitionedRegionPool, this.highPriorityPool, this.waitingPool, this.prMetaDataCleanupThreadPool, this.threadPool}; for (ExecutorService es : allExecutors) { if (es != null) { es.awaitTermination(remaining, TimeUnit.MILLISECONDS); } remaining = timeInMillis - (System.currentTimeMillis() - start); if (remaining <= 0) { return; } } this.serialQueuedExecutorPool.awaitTermination(remaining, TimeUnit.MILLISECONDS); remaining = timeInMillis - (System.currentTimeMillis() - start); if (remaining <= 0) { return; } Thread th = this.memberEventThread; if (th != null) { th.interrupt(); // bug #43452 - this thread sometimes eats interrupts, so we interrupt it // again here th.join(remaining); } } /** * maximum time, in milliseconds, to wait for all threads to exit */ static private final int MAX_STOP_TIME = 20000; /** * Time to sleep, in milliseconds, while polling to see if threads have finished */ static private final int STOP_PAUSE_TIME = 1000; /** * Maximum number of interrupt attempts to stop a thread */ static private final int MAX_STOP_ATTEMPTS = 10; /** * Cheap tool to kill a referenced thread * * @param t the thread to kill */ private void clobberThread(Thread t) { if (t == null) return; if (t.isAlive()) { logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_FORCING_THREAD_STOP_ON__0_, t)); // Start by being nice. t.interrupt(); // we could be more violent here... // t.stop(); try { for (int i = 0; i < MAX_STOP_ATTEMPTS && t.isAlive(); i++) { t.join(STOP_PAUSE_TIME); t.interrupt(); } } catch (InterruptedException ex) { logger.debug("Interrupted while attempting to terminate threads."); Thread.currentThread().interrupt(); // just keep going } if (t.isAlive()) { logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_CLOBBERTHREAD_THREAD_REFUSED_TO_DIE__0, t)); } } } /** * Cheap tool to examine an executor to see if it is still working * * @param tpe * @return true if executor is still active */ private boolean executorAlive(ThreadPoolExecutor tpe, String name) { if (tpe == null) { return false; } else { int ac = tpe.getActiveCount(); // boolean result = tpe.getActiveCount() > 0; if (ac > 0) { if (logger.isDebugEnabled()) { logger.debug("Still waiting for {} threads in '{}' pool to exit", ac, name); } return true; } else { return false; } } } /** * Wait for the ancillary queues to exit. Kills them if they are still around. * */ private void forceThreadsToStop() { long endTime = System.currentTimeMillis() + MAX_STOP_TIME; String culprits = ""; for (;;) { boolean stillAlive = false; culprits = ""; if (executorAlive(this.serialThread, "serial thread")) { stillAlive = true; culprits = culprits + " serial thread;"; } if (executorAlive(this.viewThread, "view thread")) { stillAlive = true; culprits = culprits + " view thread;"; } if (executorAlive(this.partitionedRegionThread, "partitioned region thread")) { stillAlive = true; culprits = culprits + " partitioned region thread;"; } if (executorAlive(this.partitionedRegionPool, "partitioned region pool")) { stillAlive = true; culprits = culprits + " partitioned region pool;"; } if (executorAlive(this.highPriorityPool, "high priority pool")) { stillAlive = true; culprits = culprits + " high priority pool;"; } if (executorAlive(this.waitingPool, "waiting pool")) { stillAlive = true; culprits = culprits + " waiting pool;"; } if (executorAlive(this.prMetaDataCleanupThreadPool, "prMetaDataCleanupThreadPool")) { stillAlive = true; culprits = culprits + " special waiting pool;"; } if (executorAlive(this.threadPool, "thread pool")) { stillAlive = true; culprits = culprits + " thread pool;"; } if (!stillAlive) return; long now = System.currentTimeMillis(); if (now >= endTime) break; try { Thread.sleep(STOP_PAUSE_TIME); } catch (InterruptedException e) { Thread.currentThread().interrupt(); // Desperation, the shutdown thread is being killed. Don't // consult a CancelCriterion. logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_INTERRUPTED_DURING_SHUTDOWN), e); break; } } // for logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_DAEMON_THREADS_ARE_SLOW_TO_STOP_CULPRITS_INCLUDE_0, culprits)); // Kill with no mercy if (this.serialThread != null) { this.serialThread.shutdownNow(); } if (this.viewThread != null) { this.viewThread.shutdownNow(); } if (this.functionExecutionThread != null) { this.functionExecutionThread.shutdownNow(); } if (this.functionExecutionPool != null) { this.functionExecutionPool.shutdownNow(); } if (this.partitionedRegionThread != null) { this.partitionedRegionThread.shutdownNow(); } if (this.partitionedRegionPool != null) { this.partitionedRegionPool.shutdownNow(); } if (this.highPriorityPool != null) { this.highPriorityPool.shutdownNow(); } if (this.waitingPool != null) { this.waitingPool.shutdownNow(); } if (this.prMetaDataCleanupThreadPool != null) { this.prMetaDataCleanupThreadPool.shutdownNow(); } if (this.threadPool != null) { this.threadPool.shutdownNow(); } Thread th = this.memberEventThread; if (th != null) { clobberThread(th); } } private volatile boolean shutdownInProgress = false; /** guard for membershipViewIdAcknowledged */ private final Object membershipViewIdGuard = new Object(); /** the latest view ID that has been processed by all membership listeners */ private long membershipViewIdAcknowledged; public boolean shutdownInProgress() { return this.shutdownInProgress; } /** * Stops the pusher, puller and processor threads and closes the connection to the transport * layer. This should only be used from shutdown() or from the dm initialization code */ private void uncleanShutdown(boolean duringStartup) { try { this.closeInProgress = true; // set here also to fix bug 36736 removeAllHealthMonitors(); shutdownInProgress = true; if (this.channel != null) { this.channel.setShutDown(); } askThreadsToStop(); // wait a moment before asking threads to terminate try { waitForThreadsToStop(1000); } catch (InterruptedException ie) { // No need to reset interrupt bit, we're really trying to quit... } forceThreadsToStop(); // // bug36329: desperation measure, send a second interrupt? // try { Thread.sleep(1000); } // catch (InterruptedException ie) { // // No need to reset interrupt bit, we're really trying to quit... // } // forceThreadsToStop(); } // try finally { // ABSOLUTELY ESSENTIAL that we close the distribution channel! try { // For safety, but channel close in a finally AFTER this... if (this.stats != null) { this.stats.close(); try { Thread.sleep(100); } catch (InterruptedException ie) { // No need to reset interrupt bit, we're really trying to quit... } } } finally { if (this.channel != null) { logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_NOW_CLOSING_DISTRIBUTION_FOR__0, this.myid)); this.channel.disconnect(duringStartup); // this.channel = null; DO NOT NULL OUT INSTANCE VARIABLES AT SHUTDOWN - bug #42087 } } } } /** * Returns the distributed system to which this distribution manager is connected. */ public InternalDistributedSystem getSystem() { return this.system; } /** * Returns the transport configuration for this distribution manager * * @since GemFire 5.0 */ public RemoteTransportConfig getTransport() { return this.transport; } /** * Adds a <code>MembershipListener</code> to this distribution manager. */ public void addMembershipListener(MembershipListener l) { this.membershipListeners.putIfAbsent(l, Boolean.TRUE); } /** * Removes a <code>MembershipListener</code> from this distribution manager. * * @throws IllegalArgumentException <code>l</code> was not registered on this distribution manager */ public void removeMembershipListener(MembershipListener l) { this.membershipListeners.remove(l); } /** * Adds a <code>MembershipListener</code> to this distribution manager. * * @since GemFire 5.7 */ public void addAllMembershipListener(MembershipListener l) { synchronized (this.allMembershipListenersLock) { Set newAllMembershipListeners = new HashSet(this.allMembershipListeners); newAllMembershipListeners.add(l); this.allMembershipListeners = newAllMembershipListeners; } } /** * Removes a <code>MembershipListener</code> listening for all members from this distribution * manager. * * @throws IllegalArgumentException <code>l</code> was not registered on this distribution manager * @since GemFire 5.7 */ public void removeAllMembershipListener(MembershipListener l) { synchronized (this.allMembershipListenersLock) { Set newAllMembershipListeners = new HashSet(this.allMembershipListeners); if (!newAllMembershipListeners.remove(l)) { // There seems to be a race condition in which // multiple departure events can be registered // on the same peer. We regard this as benign. // FIXME when membership events become sane again // String s = "MembershipListener was never registered"; // throw new IllegalArgumentException(s); } this.allMembershipListeners = newAllMembershipListeners; } } /** * Returns true if this DM or the DistributedSystem owned by it is closing or is closed. */ private boolean isCloseInProgress() { if (closeInProgress) { return true; } InternalDistributedSystem ds = getSystem(); if (ds != null && ds.isDisconnecting()) { return true; } return false; } private void handleViewInstalledEvent(ViewInstalledEvent ev) { synchronized (this.membershipViewIdGuard) { this.membershipViewIdAcknowledged = ev.getViewId(); this.membershipViewIdGuard.notifyAll(); } } /** * This stalls waiting for the current membership view (as seen by the membership manager) to be * acknowledged by all membership listeners */ public void waitForViewInstallation(long id) throws InterruptedException { if (id <= this.membershipViewIdAcknowledged) { return; } synchronized (this.membershipViewIdGuard) { while (this.membershipViewIdAcknowledged < id && !this.stopper.isCancelInProgress()) { if (logger.isDebugEnabled()) { logger.debug("waiting for view {}. Current DM view processed by all listeners is {}", id, this.membershipViewIdAcknowledged); } this.membershipViewIdGuard.wait(); } } } protected void handleMemberEvent(MemberEvent ev) { ev.handleEvent(this); } /** * This thread processes member events as they occur. * * @see org.apache.geode.distributed.internal.DistributionManager.MemberCrashedEvent * @see org.apache.geode.distributed.internal.DistributionManager.MemberJoinedEvent * @see org.apache.geode.distributed.internal.DistributionManager.MemberDepartedEvent * */ protected class MemberEventInvoker implements Runnable { @SuppressWarnings("synthetic-access") public void run() { for (;;) { SystemFailure.checkFailure(); // bug 41539 - member events need to be delivered during shutdown // or reply processors may hang waiting for replies from // departed members // if (getCancelCriterion().isCancelInProgress()) { // break; // no message, just quit // } if (!DistributionManager.this.system.isConnected && DistributionManager.this.isClosed()) { break; } try { MemberEvent ev = (MemberEvent) DistributionManager.this.membershipEventQueue.take(); handleMemberEvent(ev); } catch (InterruptedException e) { if (isCloseInProgress()) { if (logger.isTraceEnabled()) { logger.trace("MemberEventInvoker: InterruptedException during shutdown"); } } else { logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_UNEXPECTED_INTERRUPTEDEXCEPTION), e); } break; } catch (DistributedSystemDisconnectedException e) { break; } catch (CancelException e) { if (isCloseInProgress()) { if (logger.isTraceEnabled()) { logger.trace("MemberEventInvoker: cancelled"); } } else { logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_UNEXPECTED_CANCELLATION), e); } break; } catch (Exception e) { logger.fatal( LocalizedMessage.create( LocalizedStrings.DistributionManager_UNCAUGHT_EXCEPTION_PROCESSING_MEMBER_EVENT), e); } } // for if (logger.isTraceEnabled()) { logger.trace("MemberEventInvoker on {} stopped", DistributionManager.this); } } } private void addMemberEvent(MemberEvent ev) { if (SYNC_EVENTS) { handleMemberEvent(ev); } else { stopper.checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { this.membershipEventQueue.put(ev); } catch (InterruptedException ex) { interrupted = true; stopper.checkCancelInProgress(ex); handleMemberEvent(ev); // FIXME why??? } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } } /** * Stops the threads associated with this distribution manager and closes the connection to the * transport layer. */ public void close() { if (!closed) { this.shutdown(); logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_MARKING_DISTRIBUTIONMANAGER_0_AS_CLOSED, this.myid)); MembershipLogger.logShutdown(this.myid); closed = true; synchronized (DistributionManager.class) { openDMs--; } } } public void throwIfDistributionStopped() { if (this.shutdownMsgSent) { throw new DistributedSystemDisconnectedException( LocalizedStrings.DistributionManager_MESSAGE_DISTRIBUTION_HAS_TERMINATED .toLocalizedString(), this.getRootCause()); } } /** * Returns true if this distribution manager has been closed. */ public boolean isClosed() { return this.closed; } /** * Makes note of a new administration console (admin-only member). */ public void addAdminConsole(InternalDistributedMember theId) { logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_NEW_ADMINISTRATION_MEMBER_DETECTED_AT_0, theId)); synchronized (this.adminConsolesLock) { HashSet tmp = new HashSet(this.adminConsoles); tmp.add(theId); this.adminConsoles = Collections.unmodifiableSet(tmp); } } public DMStats getStats() { return this.stats; } public DistributionConfig getConfig() { DistributionConfig result = null; InternalDistributedSystem sys = getSystem(); if (sys != null) { result = system.getConfig(); } return result; } // /** // * Initializes and returns a <code>DistributedSystem</code> to be // * sent to new members of the distributed system. // * // * @since GemFire 3.0 // */ // protected DistributedState getNewDistributedState() { // DistributedState state = new DistributedState(); // state.setGemFireVersion(GemFireVersion.getGemFireVersion()); // state.setCacheTime(this.cacheTimeMillis()); // return state; // } private static final int STARTUP_TIMEOUT = Integer.getInteger("DistributionManager.STARTUP_TIMEOUT", 15000).intValue(); public static final boolean DEBUG_NO_ACKNOWLEDGEMENTS = Boolean.getBoolean("DistributionManager.DEBUG_NO_ACKNOWLEDGEMENTS"); public Set getAllOtherMembers() { Set result = new HashSet(getDistributionManagerIdsIncludingAdmin()); result.remove(getDistributionManagerId()); return result; } @Override // DM method public void retainMembersWithSameOrNewerVersion(Collection<InternalDistributedMember> members, Version version) { for (Iterator<InternalDistributedMember> it = members.iterator(); it.hasNext();) { InternalDistributedMember id = it.next(); if (id.getVersionObject().compareTo(version) < 0) { it.remove(); } } } @Override // DM method public void removeMembersWithSameOrNewerVersion(Collection<InternalDistributedMember> members, Version version) { for (Iterator<InternalDistributedMember> it = members.iterator(); it.hasNext();) { InternalDistributedMember id = it.next(); if (id.getVersionObject().compareTo(version) >= 0) { it.remove(); } } } /** * Add a membership listener for all members and return other DistribtionManagerIds as an atomic * operation * * @since GemFire 5.7 */ public Set addAllMembershipListenerAndGetAllIds(MembershipListener l) { // TO fix this deadlock: // "View Message Processor": // waiting to lock monitor 0x080f691c (object 0xe3ba7680, a // org.apache.geode.distributed.internal.DistributionManager$MembersLock), // which is held by "RMI TCP Connection(259)-10.80.10.55" // "RMI TCP Connection(259)-10.80.10.55": // waiting to lock monitor 0x080f6598 (object 0xe3bacd90, a // org.apache.geode.distributed.internal.membership.jgroup.JGroupMembershipManager$ViewLock), // which is held by "View Message Processor" // NEED to prevent view changes while installing a listener. DistributionChannel ch = this.channel; if (ch != null) { MembershipManager mgr = ch.getMembershipManager(); if (mgr != null) { mgr.getViewLock().writeLock().lock(); try { synchronized (this.membersLock) { // Don't let the members come and go while we are adding this // listener. This ensures that the listener (probably a // ReplyProcessor) gets a consistent view of the members. addAllMembershipListener(l); return getDistributionManagerIdsIncludingAdmin(); } } finally { mgr.getViewLock().writeLock().unlock(); } } } // If we have no channel or MembershipManager then the view is empty synchronized (this.membersLock) { // Don't let the members come and go while we are adding this // listener. This ensures that the listener (probably a // ReplyProcessor) gets a consistent view of the members. addAllMembershipListener(l); return Collections.EMPTY_SET; } } /** * Sends a startup message and waits for a response. Returns true if response received; false if * it timed out or there are no peers. */ protected boolean sendStartupMessage(StartupOperation startupOperation, boolean cancelOnTimeout) throws InterruptedException { if (Thread.interrupted()) throw new InterruptedException(); this.receivedStartupResponse = false; boolean ok = false; // Be sure to add ourself to the equivalencies list! Set equivs = StartupMessage.getMyAddresses(this); if (equivs == null || equivs.size() == 0) { // no network interface equivs = new HashSet(); try { equivs.add(SocketCreator.getLocalHost()); } catch (UnknownHostException e) { // can't even get localhost if (getViewMembers().size() > 1) { throw new SystemConnectException( "Unable to examine network cards and other members exist"); } } } setEquivalentHosts(equivs); setEnforceUniqueZone(getConfig().getEnforceUniqueHost()); String redundancyZone = getConfig().getRedundancyZone(); if (redundancyZone != null && !redundancyZone.equals("")) { setEnforceUniqueZone(true); } setRedundancyZone(getDistributionManagerId(), redundancyZone); if (logger.isDebugEnabled()) { StringBuffer sb = new StringBuffer(); sb.append("Equivalent IPs for this host: "); Iterator it = equivs.iterator(); while (it.hasNext()) { InetAddress in = (InetAddress) it.next(); sb.append(in.toString()); if (it.hasNext()) { sb.append(", "); } } // while logger.debug(sb); } // we need to send this to everyone else; even admin vm Set allOthers = new HashSet(getViewMembers()); allOthers.remove(getDistributionManagerId()); if (allOthers.isEmpty()) { return false; // no peers, we are alone. } try { ok = startupOperation.sendStartupMessage(allOthers, STARTUP_TIMEOUT, equivs, redundancyZone, enforceUniqueZone()); } catch (Exception re) { throw new SystemConnectException( LocalizedStrings.DistributionManager_ONE_OR_MORE_PEERS_GENERATED_EXCEPTIONS_DURING_CONNECTION_ATTEMPT .toLocalizedString(), re); } if (this.rejectionMessage != null) { throw new IncompatibleSystemException(rejectionMessage); } boolean isAdminDM = getId().getVmKind() == DistributionManager.ADMIN_ONLY_DM_TYPE || getId().getVmKind() == DistributionManager.LOCATOR_DM_TYPE || DistributionManager.isDedicatedAdminVM || Boolean.getBoolean(InternalLocator.FORCE_LOCATOR_DM_TYPE); boolean receivedAny = this.receivedStartupResponse; if (!ok) { // someone didn't reply int unresponsiveCount; synchronized (unfinishedStartupsLock) { if (unfinishedStartups == null) unresponsiveCount = 0; else unresponsiveCount = unfinishedStartups.size(); if (unresponsiveCount != 0) { if (Boolean.getBoolean("DistributionManager.requireAllStartupResponses")) { throw new SystemConnectException( LocalizedStrings.DistributionManager_NO_STARTUP_REPLIES_FROM_0 .toLocalizedString(unfinishedStartups)); } } } // synchronized // Bug 35887: // If there are other members, we must receive at least _one_ response if (allOthers.size() != 0) { // there exist others if (!receivedAny) { // and none responded StringBuffer sb = new StringBuffer(); Iterator itt = allOthers.iterator(); while (itt.hasNext()) { Object m = itt.next(); sb.append(m.toString()); if (itt.hasNext()) sb.append(", "); } if (DEBUG_NO_ACKNOWLEDGEMENTS) { printStacks(allOthers, false); } throw new SystemConnectException( LocalizedStrings.DistributionManager_RECEIVED_NO_CONNECTION_ACKNOWLEDGMENTS_FROM_ANY_OF_THE_0_SENIOR_CACHE_MEMBERS_1 .toLocalizedString( new Object[] {Integer.toString(allOthers.size()), sb.toString()})); } // and none responded } // there exist others InternalDistributedMember e = getElderId(); if (e != null) { // an elder exists boolean unresponsiveElder; synchronized (unfinishedStartupsLock) { if (unfinishedStartups == null) unresponsiveElder = false; else unresponsiveElder = unfinishedStartups.contains(e); } if (unresponsiveElder) { logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_FORCING_AN_ELDER_JOIN_EVENT_SINCE_A_STARTUP_RESPONSE_WAS_NOT_RECEIVED_FROM_ELDER__0_, e)); handleManagerStartup(e); } } // an elder exists } // someone didn't reply return receivedAny; } /** * List of InternalDistributedMember's that we have not received startup replies from. If null, we * have not finished sending the startup message. * <p> * Must be synchronized using {@link #unfinishedStartupsLock} */ private Set unfinishedStartups = null; /** * Synchronization for {@link #unfinishedStartups} */ private final Object unfinishedStartupsLock = new Object(); public void setUnfinishedStartups(Collection s) { synchronized (unfinishedStartupsLock) { Assert.assertTrue(unfinishedStartups == null, "Set unfinished startups twice"); unfinishedStartups = new HashSet(s); // OK, I don't _quite_ trust the list to be current, so let's // prune it here. Iterator it = unfinishedStartups.iterator(); synchronized (this.membersLock) { while (it.hasNext()) { InternalDistributedMember m = (InternalDistributedMember) it.next(); if (!isCurrentMember(m)) { it.remove(); } } // while } // synchronized } } public void removeUnfinishedStartup(InternalDistributedMember m, boolean departed) { synchronized (unfinishedStartupsLock) { if (logger.isDebugEnabled()) { logger.debug("removeUnfinishedStartup for {} wtih {}", m, unfinishedStartups); } if (unfinishedStartups == null) return; // not yet done with startup if (!unfinishedStartups.remove(m)) return; StringId msg = null; if (departed) { msg = LocalizedStrings.DistributionManager_STOPPED_WAITING_FOR_STARTUP_REPLY_FROM_0_BECAUSE_THE_PEER_DEPARTED_THE_VIEW; } else { msg = LocalizedStrings.DistributionManager_STOPPED_WAITING_FOR_STARTUP_REPLY_FROM_0_BECAUSE_THE_REPLY_WAS_FINALLY_RECEIVED; } logger.info(LocalizedMessage.create(msg, m)); int numLeft = unfinishedStartups.size(); if (numLeft != 0) { logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_STILL_AWAITING_0_RESPONSES_FROM_1, new Object[] {Integer.valueOf(numLeft), unfinishedStartups})); } } // synchronized } /** * Processes the first startup response. * * @see StartupResponseMessage#process */ void processStartupResponse(InternalDistributedMember sender, long otherCacheTime, String theRejectionMessage) { removeUnfinishedStartup(sender, false); synchronized (this) { if (!this.receivedStartupResponse) { this.receivedStartupResponse = true; } if (theRejectionMessage != null && this.rejectionMessage == null) { // remember the first non-null rejection. This fixes bug 33266 this.rejectionMessage = theRejectionMessage; } } } /** * Processes the first startup response. * * @see StartupResponseMessage#process */ void processStartupResponse(InternalDistributedMember sender, String theRejectionMessage) { removeUnfinishedStartup(sender, false); synchronized (this) { if (!this.receivedStartupResponse) { // only set the cacheTimeDelta once this.receivedStartupResponse = true; } if (theRejectionMessage != null && this.rejectionMessage == null) { // remember the first non-null rejection. This fixes bug 33266 this.rejectionMessage = theRejectionMessage; } } } /** * Based on a recent JGroups view, return a member that might be the next elder. * * @return the elder candidate, possibly this VM. */ private InternalDistributedMember getElderCandidate() { List<InternalDistributedMember> theMembers = getViewMembers(); // Assert.assertTrue(!closeInProgress // && theMembers.contains(this.myid)); // bug36202? int elderCandidates = 0; Iterator<InternalDistributedMember> it; // for bug #50510 we need to know if there are any members older than v8.0 it = theMembers.iterator(); boolean anyPre80Members = false; while (it.hasNext()) { InternalDistributedMember member = it.next(); if (member.getVersionObject().compareTo(Version.GFE_80) < 0) { anyPre80Members = true; } } // determine number of elder candidates (unless adam) if (!this.adam) { it = theMembers.iterator(); while (it.hasNext()) { InternalDistributedMember member = it.next(); int managerType = member.getVmKind(); if (managerType == ADMIN_ONLY_DM_TYPE) continue; if (managerType == LOCATOR_DM_TYPE) { // Fix for #50510 - pre-8.0 members will not let a locator be the elder // so we need to make the same decision here if (anyPre80Members) { continue; } } // Fix for #45566. Using a surprise member as the elder can cause a // deadlock. if (getMembershipManager().isSurpriseMember(member)) { continue; } elderCandidates++; if (elderCandidates > 1) { // If we have more than one candidate then we are not adam break; } } // while } // Second pass over members... it = theMembers.iterator(); while (it.hasNext()) { InternalDistributedMember member = it.next(); int managerType = member.getVmKind(); if (managerType == ADMIN_ONLY_DM_TYPE) continue; if (managerType == LOCATOR_DM_TYPE) { // Fix for #50510 - pre-8.0 members will not let a locator be the elder // so we need to make the same decision here if (anyPre80Members) { continue; } } // Fix for #45566. Using a surprise member as the elder can cause a // deadlock. if (getMembershipManager().isSurpriseMember(member)) { continue; } if (member.equals(this.myid)) { if (!this.adam && elderCandidates == 1) { this.adam = true; logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_0_IS_THE_ELDER_AND_THE_ONLY_MEMBER, this.myid)); } else { logger.info(LocalizedMessage.create(LocalizedStrings.DistributionManager_I_0_AM_THE_ELDER, this.myid)); } } return member; } // while // If we get this far then no elder exists return null; } /** * Select a new elder * */ protected void selectElder() { getSystem().getCancelCriterion().checkCancelInProgress(null); // bug 37884, if DS is // disconnecting, throw exception // Once we are the elder, we're stuck until we leave the view. if (this.myid.equals(this.elder)) { return; } // Determine who is the elder... InternalDistributedMember candidate = getElderCandidate(); if (candidate == null) { changeElder(null); return; // No valid elder in current context } // Carefully switch to new elder synchronized (this.elderMonitor) { if (!candidate.equals(this.elder)) { if (logger.isDebugEnabled()) { logger.debug("The elder is: {} (was {})", candidate, this.elder); } changeElder(candidate); } } // synchronized } private String prettifyReason(String r) { final String str = "java.io.IOException:"; if (r.startsWith(str)) { return r.substring(str.length()); } return r; } /** * Returns true if id was removed. Returns false if it was not in the list of managers. */ private boolean removeManager(InternalDistributedMember theId, boolean crashed, String p_reason) { String reason = p_reason; boolean result = false; // initialization shouldn't be required, but... // Test once before acquiring the lock, fault tolerance for potentially // recursive (and deadlock) conditions -- bug33626 // Note that it is always safe to _read_ {@link members} without locking if (isCurrentMember(theId)) { // Destroy underlying member's resources reason = prettifyReason(reason); synchronized (this.membersLock) { if (logger.isDebugEnabled()) { logger.debug("DistributionManager: removing member <{}>; crashed {}; reason = {}", theId, crashed, reason); } Map<InternalDistributedMember, InternalDistributedMember> tmp = new HashMap(this.members); if (tmp.remove(theId) != null) { // Note we don't modify in place. This allows reader to get snapshots // without locking. if (tmp.isEmpty()) { tmp = Collections.EMPTY_MAP; } else { tmp = Collections.unmodifiableMap(tmp); } this.members = tmp; result = true; } else { result = false; // Don't get upset since this can happen twice due to // an explicit remove followed by an implicit one caused // by a JavaGroup view change } Set tmp2 = new HashSet(this.membersAndAdmin); if (tmp2.remove(theId)) { if (tmp2.isEmpty()) { tmp2 = Collections.EMPTY_SET; } else { tmp2 = Collections.unmodifiableSet(tmp2); } this.membersAndAdmin = tmp2; } this.removeHostedLocators(theId); } // synchronized } // if // In any event, make sure that this member is no longer an elder. if (!theId.equals(myid) && theId.equals(elder)) { try { selectElder(); } catch (DistributedSystemDisconnectedException e) { // ignore } } redundancyZones.remove(theId); return result; } /** * Makes note of a new distribution manager that has started up in the distributed cache. Invokes * the appropriately listeners. * * @param theId The id of the distribution manager starting up * */ private void handleManagerStartup(InternalDistributedMember theId) { HashMap<InternalDistributedMember, InternalDistributedMember> tmp = null; synchronized (this.membersLock) { // Note test is under membersLock if (members.containsKey(theId)) { return; // already accounted for } // Note we don't modify in place. This allows reader to get snapshots // without locking. tmp = new HashMap(this.members); tmp.put(theId, theId); this.members = Collections.unmodifiableMap(tmp); Set stmp = new HashSet(this.membersAndAdmin); stmp.add(theId); this.membersAndAdmin = Collections.unmodifiableSet(stmp); } // synchronized if (theId.getVmKind() != DistributionManager.LOCATOR_DM_TYPE) { this.stats.incNodes(1); } logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_ADMITTING_MEMBER_0_NOW_THERE_ARE_1_NONADMIN_MEMBERS, new Object[] {theId, Integer.valueOf(tmp.size())})); addMemberEvent(new MemberJoinedEvent(theId)); } /** * Return true if id is a current member of our system. */ public boolean isCurrentMember(InternalDistributedMember id) { Set m; synchronized (this.membersLock) { // access to members synchronized under membersLock in order to // ensure serialization m = this.membersAndAdmin; } return m.contains(id); } /** * Makes note of a new console that has started up in the distributed cache. * */ private void handleConsoleStartup(InternalDistributedMember theId) { // if we have an all listener then notify it NOW. HashSet tmp = null; synchronized (this.membersLock) { // Note test is under membersLock if (membersAndAdmin.contains(theId)) return; // already accounted for // Note we don't modify in place. This allows reader to get snapshots // without locking. tmp = new HashSet(this.membersAndAdmin); tmp.add(theId); this.membersAndAdmin = Collections.unmodifiableSet(tmp); } // synchronized for (Iterator iter = allMembershipListeners.iterator(); iter.hasNext();) { MembershipListener listener = (MembershipListener) iter.next(); listener.memberJoined(theId); } logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_DMMEMBERSHIP_ADMITTING_NEW_ADMINISTRATION_MEMBER__0_, theId)); // Note that we don't add the member to the list of admin consoles until // we receive a message from them. } /** * Process an incoming distribution message. This includes scheduling it correctly based on the * message's nioPriority (executor type) */ public void handleIncomingDMsg(DistributionMessage message) { /* * disabled - not being used if (message instanceof OutgoingMessageWrapper) { * putOutgoing(((OutgoingMessageWrapper)message).getMessage()); return; } */ // long latency = message.getLatency(); // this.stats.incMessageTransitTime(latency * 1000000L); // message.resetTimestamp(); stats.incReceivedMessages(1L); stats.incReceivedBytes(message.getBytesRead()); stats.incMessageChannelTime(message.resetTimestamp()); // message.setRecipient(DistributionManager.this.getId()); if (logger.isDebugEnabled()) { logger.debug("Received message '{}' from <{}>", message, message.getSender()); } scheduleIncomingMessage(message); } /** * Makes note of a console that has shut down. * * @param theId The id of the console shutting down * @param crashed only true if we detect this id to be gone from a javagroup view * * @see AdminConsoleDisconnectMessage#process */ public void handleConsoleShutdown(InternalDistributedMember theId, boolean crashed, String reason) { boolean removedConsole = false; boolean removedMember = false; synchronized (this.membersLock) { // to fix bug 39747 we can only remove this member from // membersAndAdmin if he is not in members. // This happens when we have an admin guy colocated with a normal DS. // In this case we need for the normal DS to shutdown or crash. if (!this.members.containsKey(theId)) { if (logger.isDebugEnabled()) logger.debug("DistributionManager: removing admin member <{}>; crashed = {}; reason = {}", theId, crashed, reason); Set tmp = new HashSet(this.membersAndAdmin); if (tmp.remove(theId)) { // Note we don't modify in place. This allows reader to get snapshots // without locking. if (tmp.isEmpty()) { tmp = Collections.EMPTY_SET; } else { tmp = Collections.unmodifiableSet(tmp); } this.membersAndAdmin = tmp; removedMember = true; } else { // Don't get upset since this can happen twice due to // an explicit remove followed by an implicit one caused // by a JavaGroup view change } } removeHostedLocators(theId); } synchronized (this.adminConsolesLock) { if (this.adminConsoles.contains(theId)) { removedConsole = true; Set tmp = new HashSet(this.adminConsoles); tmp.remove(theId); if (tmp.isEmpty()) { tmp = Collections.EMPTY_SET; } else { tmp = Collections.unmodifiableSet(tmp); } this.adminConsoles = tmp; } } if (removedMember) { for (Iterator iter = allMembershipListeners.iterator(); iter.hasNext();) { MembershipListener listener = (MembershipListener) iter.next(); listener.memberDeparted(theId, crashed); } } if (removedConsole) { StringId msg = null; if (crashed) { msg = LocalizedStrings.DistributionManager_ADMINISTRATION_MEMBER_AT_0_CRASHED_1; } else { msg = LocalizedStrings.DistributionManager_ADMINISTRATION_MEMBER_AT_0_CLOSED_1; } logger.info(LocalizedMessage.create(msg, new Object[] {theId, reason})); } redundancyZones.remove(theId); } public void shutdownMessageReceived(InternalDistributedMember theId, String reason) { this.membershipManager.shutdownMessageReceived(theId, reason); handleManagerDeparture(theId, false, LocalizedStrings.ShutdownMessage_SHUTDOWN_MESSAGE_RECEIVED.toLocalizedString()); } /** * used by the DistributedMembershipListener and startup and shutdown operations, this method * decrements the number of nodes and handles lower-level clean up of the resources used by the * departed manager */ public void handleManagerDeparture(InternalDistributedMember theId, boolean p_crashed, String p_reason) { boolean crashed = p_crashed; String reason = p_reason; AlertAppender.getInstance().removeAlertListener(theId); // this fixes a race introduced in 5.0.1 by the fact that an explicit // shutdown will cause a member to no longer be in our DM membership // but still in the javagroup view. try { selectElder(); } catch (DistributedSystemDisconnectedException e) { // keep going } int vmType = theId.getVmKind(); if (vmType == ADMIN_ONLY_DM_TYPE) { removeUnfinishedStartup(theId, true); handleConsoleShutdown(theId, crashed, reason); return; } // not an admin VM... if (!isCurrentMember(theId)) { return; // fault tolerance } removeUnfinishedStartup(theId, true); if (removeManager(theId, crashed, reason)) { if (theId.getVmKind() != DistributionManager.LOCATOR_DM_TYPE) { this.stats.incNodes(-1); } StringId msg; if (crashed && !isCloseInProgress()) { msg = LocalizedStrings.DistributionManager_MEMBER_AT_0_UNEXPECTEDLY_LEFT_THE_DISTRIBUTED_CACHE_1; addMemberEvent(new MemberCrashedEvent(theId, reason)); } else { msg = LocalizedStrings.DistributionManager_MEMBER_AT_0_GRACEFULLY_LEFT_THE_DISTRIBUTED_CACHE_1; addMemberEvent(new MemberDepartedEvent(theId, reason)); } logger.info(LocalizedMessage.create(msg, new Object[] {theId, prettifyReason(reason)})); // Remove this manager from the serialQueueExecutor. if (this.serialQueuedExecutorPool != null) { serialQueuedExecutorPool.handleMemberDeparture(theId); } } } public void handleManagerSuspect(InternalDistributedMember suspect, InternalDistributedMember whoSuspected, String reason) { if (!isCurrentMember(suspect)) { return; // fault tolerance } int vmType = suspect.getVmKind(); if (vmType == ADMIN_ONLY_DM_TYPE) { return; } addMemberEvent(new MemberSuspectEvent(suspect, whoSuspected, reason)); } public void handleViewInstalled(NetView view) { addMemberEvent(new ViewInstalledEvent(view)); } public void handleQuorumLost(Set<InternalDistributedMember> failures, List<InternalDistributedMember> remaining) { addMemberEvent(new QuorumLostEvent(failures, remaining)); } /** * Sends the shutdown message. Not all DistributionManagers need to do this. */ protected void sendShutdownMessage() { if (getDMType() == ADMIN_ONLY_DM_TYPE && Locator.getLocators().size() == 0) { // [bruce] changed above "if" to have ShutdownMessage sent by locators. // Otherwise the system can hang because an admin member does not trigger // member-left notification unless a new view is received showing the departure. // If two locators are simultaneously shut down this may not occur. return; } ShutdownMessage m = new ShutdownMessage(); InternalDistributedMember theId = this.getDistributionManagerId(); m.setDistributionManagerId(theId); Set allOthers = new HashSet(getViewMembers()); allOthers.remove(getDistributionManagerId()); // ReplyProcessor21 rp = new ReplyProcessor21(this, allOthers); // m.setProcessorId(rp.getProcessorId()); // m.setMulticast(system.getConfig().getMcastPort() != 0); m.setRecipients(allOthers); // Address recipient = (Address) m.getRecipient(); if (logger.isTraceEnabled()) { logger.trace("{} Sending {} to {}", this.getDistributionManagerId(), m, m.getRecipientsDescription()); } try { // m.resetTimestamp(); // nanotimers across systems don't match long startTime = DistributionStats.getStatTime(); channel.send(m.getRecipients(), m, this, stats); this.stats.incSentMessages(1L); if (DistributionStats.enableClockStats) { stats.incSentMessagesTime(DistributionStats.getStatTime() - startTime); } } catch (CancelException e) { logger.debug("CancelException caught sending shutdown: {}", e.getMessage(), e); } catch (Exception ex2) { logger.fatal(LocalizedMessage .create(LocalizedStrings.DistributionManager_WHILE_SENDING_SHUTDOWN_MESSAGE), ex2); } finally { // Even if the message wasn't sent, *lie* about it, so that // everyone believes that message distribution is done. this.shutdownMsgSent = true; } } /** * Returns the executor for the given type of processor. * */ public final Executor getExecutor(int processorType, InternalDistributedMember sender) { switch (processorType) { case STANDARD_EXECUTOR: return getThreadPool(); case SERIAL_EXECUTOR: return getSerialExecutor(sender); case VIEW_EXECUTOR: return this.viewThread; case HIGH_PRIORITY_EXECUTOR: return getHighPriorityThreadPool(); case WAITING_POOL_EXECUTOR: return getWaitingThreadPool(); case PARTITIONED_REGION_EXECUTOR: return getPartitionedRegionExcecutor(); case REGION_FUNCTION_EXECUTION_EXECUTOR: return getFunctionExcecutor(); default: throw new InternalGemFireError(LocalizedStrings.DistributionManager_UNKNOWN_PROCESSOR_TYPE .toLocalizedString(processorType)); } } // /** // * Return a shortened name of a class that excludes the package // */ // private static String shortenClassName(String className) { // int index = className.lastIndexOf('.'); // if (index != -1) { // return className.substring(index + 1); // // } else { // return className; // } // } /** * Send a message that is guaranteed to be serialized * * @param msg * @return the recipients who did not receive the message */ protected Set sendOutgoingSerialized(DistributionMessage msg) { try { return sendOutgoing(msg); } catch (NotSerializableException e) { throw new InternalGemFireException(e); } catch (ToDataException e) { // exception from user code throw e; } } /** * Actually does the work of sending a message out over the distribution channel. * * @param message the message to send * @return list of recipients that did not receive the message because they left the view (null if * all received it or it was sent to {@link DistributionMessage#ALL_RECIPIENTS}. * @throws NotSerializableException If <code>message</code> cannot be serialized */ protected Set sendOutgoing(DistributionMessage message) throws NotSerializableException { long startTime = DistributionStats.getStatTime(); Set result = channel.send(message.getRecipients(), message, DistributionManager.this, this.stats); long endTime = 0L; if (DistributionStats.enableClockStats) { endTime = NanoTimer.getTime(); } boolean sentToAll = message.forAll(); if (sentToAll) { stats.incBroadcastMessages(1L); if (DistributionStats.enableClockStats) { stats.incBroadcastMessagesTime(endTime - startTime); } } stats.incSentMessages(1L); if (DistributionStats.enableClockStats) { stats.incSentMessagesTime(endTime - startTime); stats.incDistributeMessageTime(endTime - message.getTimestamp()); } return result; } /** * @return recipients who did not receive the message * @throws NotSerializableException If <codE>message</code> cannot be serialized */ Set sendMessage(DistributionMessage message) throws NotSerializableException { Set result = null; try { // Verify we're not too far into the shutdown stopper.checkCancelInProgress(null); // avoid race condition during startup waitUntilReadyToSendMsgs(message); result = sendOutgoing(message); } catch (NotSerializableException ex) { throw ex; // serialization error in user data } catch (ToDataException ex) { throw ex; // serialization error in user data } catch (ReenteredConnectException ex) { throw ex; // Recursively tried to get the same connection } catch (CancelException ex) { throw ex; // bug 37194, shutdown conditions } catch (InvalidDeltaException ide) { logger.info( LocalizedMessage .create(LocalizedStrings.DistributionManager_CAUGHT_EXCEPTION_WHILE_SENDING_DELTA), ide.getCause()); throw (RuntimeException) ide.getCause(); } catch (Exception ex) { DistributionManager.this.exceptionInThreads = true; String receiver = "NULL"; if (message != null) { receiver = message.getRecipientsDescription(); } logger.fatal( LocalizedMessage.create(LocalizedStrings.DistributionManager_WHILE_PUSHING_MESSAGE_0_TO_1, new Object[] {message, receiver}), ex); if (message == null || message.forAll()) return null; result = new HashSet(); for (int i = 0; i < message.getRecipients().length; i++) result.add(message.getRecipients()[i]); return result; /* * if (ex instanceof org.apache.geode.GemFireIpcResourceException) { return; } */ } return result; } /** * Schedule a given message appropriately, depending upon its executor kind. * * @param message */ protected void scheduleIncomingMessage(DistributionMessage message) { /* * Potential race condition between starting up and getting other distribution manager ids -- DM * will only be initialized upto the point at which it called startThreads */ waitUntilReadyForMessages(); message.schedule(DistributionManager.this); } /** * Mutex to control access to {@link #waitingForElderChange} or {@link #elder}. */ protected final Object elderMonitor = new Object(); /** * Must be read/written while holding {@link #elderMonitor} * * @see #elderChangeWait() */ private boolean waitingForElderChange = false; /** * @see DM#isAdam() */ private boolean adam = false; /** * This is the "elder" member of the distributed system, responsible for certain types of * arbitration. * * Must hold {@link #elderMonitor} in order to change this. * * @see #getElderId() */ protected volatile InternalDistributedMember elder = null; public boolean isAdam() { return this.adam; } public InternalDistributedMember getElderId() throws DistributedSystemDisconnectedException { // membershipManager.waitForEventProcessing(); if (closeInProgress) { throw new DistributedSystemDisconnectedException( LocalizedStrings.DistributionManager_NO_VALID_ELDER_WHEN_SYSTEM_IS_SHUTTING_DOWN .toLocalizedString(), this.getRootCause()); } getSystem().getCancelCriterion().checkCancelInProgress(null); // Cache a recent value of the elder InternalDistributedMember result = elder; if (result != null && membershipManager.memberExists(result)) { return result; } logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_ELDER__0__IS_NOT_CURRENTLY_AN_ACTIVE_MEMBER_SELECTING_NEW_ELDER, elder)); selectElder(); // ShutdownException can be thrown here logger.info(LocalizedMessage .create(LocalizedStrings.DistributionManager_NEWLY_SELECTED_ELDER_IS_NOW__0_, elder)); return elder; } public boolean isElder() { return getId().equals(elder); } public boolean isLoner() { return false; } private final StoppableReentrantLock elderLock; private ElderState elderState; private volatile boolean elderStateInitialized; public ElderState getElderState(boolean force, boolean useTryLock) { if (force) { if (logger.isDebugEnabled()) { if (!this.myid.equals(this.elder)) { logger.debug("Forcing myself, {}, to be the elder.", this.myid); } } changeElder(this.myid); } if (force || this.myid.equals(elder)) { // we are the elder if (this.elderStateInitialized) { return this.elderState; } return getElderStateWithTryLock(useTryLock); } else { // we are not the elder so return null return null; } } /** * Usage: GrantorRequestProcessor calls getElderState with useTryLock set to true if the * becomeGrantor Collaboration is already acquired. * <p> * This tryLock is attempted and if it fails, an exception is thrown to cause a Doug Lea style * back-off (p. 149). It throws an exception because it needs to back down a couple of packages * and I didn't want to couple this pkg too tightly with the dlock pkg. * <p> * GrantorRequestProcessor catches the exception, releases and reacquires the Collaboration, and * then comes back here to attempt the tryLock again. Currently nothing will stop it from * re-attempting forever. It has to get the ElderState and cannot give up, but it can free up the * Collaboration and then re-enter it. The other thread holding the elder lock will hold it only * briefly. I've added a volatile called elderStateInitialized which should cause this back-off to * occur only once in the life of a vm... once the elder, always the elder. * <p> */ private ElderState getElderStateWithTryLock(boolean useTryLock) { boolean locked = false; if (useTryLock) { boolean interrupted = Thread.interrupted(); try { locked = this.elderLock.tryLock(2000); } catch (InterruptedException e) { interrupted = true; getCancelCriterion().checkCancelInProgress(e); // one last attempt and then allow it to fail for back-off... locked = this.elderLock.tryLock(); } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } else { locked = true; this.elderLock.lock(); } if (!locked) { // try-lock must have failed throw new IllegalStateException( LocalizedStrings.DistributionManager_POSSIBLE_DEADLOCK_DETECTED.toLocalizedString()); } try { if (this.elderState == null) { this.elderState = new ElderState(this); } } finally { this.elderLock.unlock(); } this.elderStateInitialized = true; // if (Thread.currentThread().isInterrupted()) // throw new RuntimeException("Interrupted"); return this.elderState; } /** * Waits until elder if newElder or newElder is no longer a member * * @return true if newElder is the elder; false if he is no longer a member or we are the elder. */ public boolean waitForElder(final InternalDistributedMember desiredElder) { MembershipListener l = null; try { // Assert.assertTrue( // desiredElder.getVmKind() != DistributionManager.ADMIN_ONLY_DM_TYPE); synchronized (this.elderMonitor) { while (true) { if (closeInProgress) return false; InternalDistributedMember currentElder = this.elder; // Assert.assertTrue( // currentElder.getVmKind() != DistributionManager.ADMIN_ONLY_DM_TYPE); if (desiredElder.equals(currentElder)) { return true; } if (!isCurrentMember(desiredElder)) { return false; // no longer present } if (this.myid.equals(currentElder)) { // Once we become the elder we no longer allow anyone else to be the // elder so don't let them wait anymore. return false; } if (l == null) { l = new MembershipListener() { public void memberJoined(InternalDistributedMember theId) { // nothing needed } public void memberDeparted(InternalDistributedMember theId, boolean crashed) { if (desiredElder.equals(theId)) { notifyElderChangeWaiters(); } } public void memberSuspect(InternalDistributedMember id, InternalDistributedMember whoSuspected, String reason) {} public void viewInstalled(NetView view) {} public void quorumLost(Set<InternalDistributedMember> failures, List<InternalDistributedMember> remaining) {} }; addMembershipListener(l); } logger.info(LocalizedMessage.create( LocalizedStrings.DistributionManager_CHANGING_ELDER_FROM_0_TO_1, new Object[] {currentElder, desiredElder})); elderChangeWait(); } // while true } } finally { if (l != null) { removeMembershipListener(l); } } } /** * Set the elder to newElder and notify anyone waiting for it to change */ protected void changeElder(InternalDistributedMember newElder) { synchronized (this.elderMonitor) { if (newElder != null && this.myid != null && !this.myid.equals(newElder)) { if (this.myid.equals(this.elder)) { // someone else changed the elder while this thread was off cpu if (logger.isDebugEnabled()) { logger.debug("changeElder found this VM to be the elder and is taking an early out"); } return; } } this.elder = newElder; if (this.waitingForElderChange) { this.waitingForElderChange = false; this.elderMonitor.notifyAll(); } } } /** * Used to wakeup someone in elderChangeWait even though the elder has not changed */ protected void notifyElderChangeWaiters() { synchronized (this.elderMonitor) { if (this.waitingForElderChange) { this.waitingForElderChange = false; this.elderMonitor.notifyAll(); } } } /** * Must be called holding {@link #elderMonitor} lock */ private void elderChangeWait() { // This is OK since we're holding the elderMonitor lock, so no // new events will come through until the wait() below. this.waitingForElderChange = true; while (this.waitingForElderChange) { stopper.checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { this.elderMonitor.wait(); break; } catch (InterruptedException ignore) { interrupted = true; } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } // while } /** * getThreadPool gets this distribution manager's message-processing thread pool */ public ExecutorService getThreadPool() { return this.threadPool; } /** * Return the high-priority message-processing executor */ public ExecutorService getHighPriorityThreadPool() { return this.highPriorityPool; } /** * Return the waiting message-processing executor */ public ExecutorService getWaitingThreadPool() { return this.waitingPool; } /** * Return the waiting message-processing executor */ public ExecutorService getPrMetaDataCleanupThreadPool() { return this.prMetaDataCleanupThreadPool; } /** * Return the waiting message-processing executor */ public Executor getPartitionedRegionExcecutor() { if (this.partitionedRegionThread != null) { return this.partitionedRegionThread; } else { return this.partitionedRegionPool; } } /** * Return the waiting message-processing executor */ public Executor getFunctionExcecutor() { if (this.functionExecutionThread != null) { return this.functionExecutionThread; } else { return this.functionExecutionPool; } } private Executor getSerialExecutor(InternalDistributedMember sender) { if (MULTI_SERIAL_EXECUTORS) { return this.serialQueuedExecutorPool.getThrottledSerialExecutor(sender); } else { return this.serialThread; } } /** returns the serialThread's queue if throttling is being used, null if not */ public OverflowQueueWithDMStats getSerialQueue(InternalDistributedMember sender) { if (MULTI_SERIAL_EXECUTORS) { return this.serialQueuedExecutorPool.getSerialQueue(sender); } else { return this.serialQueue; } } /** * Sets the administration agent associated with this distribution manager. * * @since GemFire 4.0 */ public void setAgent(RemoteGfManagerAgent agent) { // Don't let the agent be set twice. There should be a one-to-one // correspondence between admin agent and distribution manager. if (agent != null) { if (this.agent != null) { throw new IllegalStateException( LocalizedStrings.DistributionManager_THERE_IS_ALREADY_AN_ADMIN_AGENT_ASSOCIATED_WITH_THIS_DISTRIBUTION_MANAGER .toLocalizedString()); } } else { if (this.agent == null) { throw new IllegalStateException( LocalizedStrings.DistributionManager_THERE_WAS_NEVER_AN_ADMIN_AGENT_ASSOCIATED_WITH_THIS_DISTRIBUTION_MANAGER .toLocalizedString()); } } this.agent = agent; } /** * Returns the agent that owns this distribution manager. (in ConsoleDistributionManager) * * @since GemFire 3.5 */ public RemoteGfManagerAgent getAgent() { return this.agent; } /** * Returns a description of the distribution configuration used for this distribution manager. (in * ConsoleDistributionManager) * * @return <code>null</code> if no admin {@linkplain #getAgent agent} is associated with this * distribution manager * * @since GemFire 3.5 */ public String getDistributionConfigDescription() { if (this.agent == null) { return null; } else { return this.agent.getTransport().toString(); } } /* -----------------------------Health Monitor------------------------------ */ private final ConcurrentMap hmMap = new ConcurrentHashMap(); /** * Returns the health monitor for this distribution manager and owner. * * @param owner the agent that owns the returned monitor * @return the health monitor created by the owner; <code>null</code> if the owner has now created * a monitor. * @since GemFire 3.5 */ public HealthMonitor getHealthMonitor(InternalDistributedMember owner) { return (HealthMonitor) this.hmMap.get(owner); } /** * Returns the health monitor for this distribution manager. * * @param owner the agent that owns the created monitor * @param cfg the configuration to use when creating the monitor * @since GemFire 3.5 */ public void createHealthMonitor(InternalDistributedMember owner, GemFireHealthConfig cfg) { if (closeInProgress) { return; } { final HealthMonitor hm = getHealthMonitor(owner); if (hm != null) { hm.stop(); this.hmMap.remove(owner); } } { HealthMonitorImpl newHm = new HealthMonitorImpl(owner, cfg, this); newHm.start(); this.hmMap.put(owner, newHm); } } /** * Remove a monitor that was previously created. * * @param owner the agent that owns the monitor to remove */ public void removeHealthMonitor(InternalDistributedMember owner, int theId) { final HealthMonitor hm = getHealthMonitor(owner); if (hm != null && hm.getId() == theId) { hm.stop(); this.hmMap.remove(owner); } } public void removeAllHealthMonitors() { Iterator it = this.hmMap.values().iterator(); while (it.hasNext()) { HealthMonitor hm = (HealthMonitor) it.next(); hm.stop(); it.remove(); } } // For feature request #32887 public Set getAdminMemberSet() { return this.adminConsoles; } /** Returns count of members filling the specified role */ public int getRoleCount(Role role) { int count = 0; Set mbrs = getDistributionManagerIds(); for (Iterator mbrIter = mbrs.iterator(); mbrIter.hasNext();) { Set roles = ((InternalDistributedMember) mbrIter.next()).getRoles(); for (Iterator rolesIter = roles.iterator(); rolesIter.hasNext();) { Role mbrRole = (Role) rolesIter.next(); if (mbrRole.equals(role)) { count++; break; } } } return count; } /** Returns true if at least one member is filling the specified role */ public boolean isRolePresent(Role role) { Set mbrs = getDistributionManagerIds(); for (Iterator mbrIter = mbrs.iterator(); mbrIter.hasNext();) { Set roles = ((InternalDistributedMember) mbrIter.next()).getRoles(); for (Iterator rolesIter = roles.iterator(); rolesIter.hasNext();) { Role mbrRole = (Role) rolesIter.next(); if (mbrRole.equals(role)) { return true; } } } return false; } /** Returns a set of all roles currently in the distributed system. */ public Set getAllRoles() { Set allRoles = new HashSet(); Set mbrs = getDistributionManagerIds(); for (Iterator mbrIter = mbrs.iterator(); mbrIter.hasNext();) { Set roles = ((InternalDistributedMember) mbrIter.next()).getRoles(); for (Iterator rolesIter = roles.iterator(); rolesIter.hasNext();) { Role mbrRole = (Role) rolesIter.next(); allRoles.add(mbrRole); } } return allRoles; } /** * Returns the membership manager for this distributed system. The membership manager owns the * membership set and handles all communications. The manager should NOT be used to bypass * DistributionManager to send or receive messages. * <p> * This method was added to allow hydra to obtain thread-local data for transport from one thread * to another. */ public MembershipManager getMembershipManager() { // NOTE: do not add cancellation checks here. This method is // used during auto-reconnect after the DS has been closed return membershipManager; } ////////////////////// Inner Classes ////////////////////// /** * This class is used for DM's multi serial executor. The serial messages are managed/executed by * multiple serial thread. This class takes care of executing messages related to a sender using * the same thread. */ static private class SerialQueuedExecutorPool { /** To store the serial threads */ ConcurrentMap serialQueuedExecutorMap = new ConcurrentHashMap(MAX_SERIAL_QUEUE_THREAD); /** To store the queue associated with thread */ Map serialQueuedMap = new HashMap(MAX_SERIAL_QUEUE_THREAD); /** Holds mapping between sender to the serial thread-id */ Map senderToSerialQueueIdMap = new HashMap(); /** * Holds info about unused thread, a thread is marked unused when the member associated with it * has left distribution system. */ ArrayList threadMarkedForUse = new ArrayList(); DistributionStats stats; ThreadGroup threadGroup; final boolean throttlingDisabled; /** * Constructor. * * @param group thread group to which the threads will belog to. * @param stats */ SerialQueuedExecutorPool(ThreadGroup group, DistributionStats stats, boolean throttlingDisabled) { this.threadGroup = group; this.stats = stats; this.throttlingDisabled = throttlingDisabled; } /* * Returns an id of the thread in serialQueuedExecutorMap, thats mapped to the given seder. * * @param sender * * @param createNew boolean flag to indicate whether to create a new id, if id doesnot exists. */ private Integer getQueueId(InternalDistributedMember sender, boolean createNew) { // Create a new Id. Integer queueId; synchronized (senderToSerialQueueIdMap) { // Check if there is a executor associated with this sender. queueId = (Integer) senderToSerialQueueIdMap.get(sender); if (!createNew || queueId != null) { return queueId; } // Create new. // Check if any threads are availabe that is marked for Use. if (!threadMarkedForUse.isEmpty()) { queueId = (Integer) threadMarkedForUse.remove(0); } // If Map is full, use the threads in round-robin fashion. if (queueId == null) { queueId = Integer.valueOf((serialQueuedExecutorMap.size() + 1) % MAX_SERIAL_QUEUE_THREAD); } senderToSerialQueueIdMap.put(sender, queueId); } return queueId; } /* * Returns the queue associated with this sender. Used in FlowControl for throttling (based on * queue size). */ public OverflowQueueWithDMStats getSerialQueue(InternalDistributedMember sender) { Integer queueId = getQueueId(sender, false); if (queueId == null) { return null; } return (OverflowQueueWithDMStats) serialQueuedMap.get(queueId); } /* * Returns the serial queue executor, before returning the thread this applies throttling, based * on the total serial queue size (total - sum of all the serial queue size). The throttling is * applied during put event, this doesnt block the extract operation on the queue. * */ public SerialQueuedExecutorWithDMStats getThrottledSerialExecutor( InternalDistributedMember sender) { SerialQueuedExecutorWithDMStats executor = getSerialExecutor(sender); // Get the total serial queue size. int totalSerialQueueMemSize = stats.getSerialQueueBytes(); // for tcp socket reader threads, this code throttles the thread // to keep the sender-side from overwhelming the receiver. // UDP readers are throttled in the FC protocol, which queries // the queue to see if it should throttle if (stats.getSerialQueueBytes() > TOTAL_SERIAL_QUEUE_THROTTLE && !DistributionMessage.isPreciousThread()) { do { boolean interrupted = Thread.interrupted(); try { float throttlePercent = (float) (totalSerialQueueMemSize - TOTAL_SERIAL_QUEUE_THROTTLE) / (float) (TOTAL_SERIAL_QUEUE_BYTE_LIMIT - TOTAL_SERIAL_QUEUE_THROTTLE); int sleep = (int) (100.0 * throttlePercent); sleep = Math.max(sleep, 1); Thread.sleep(sleep); } catch (InterruptedException ex) { interrupted = true; // FIXME-InterruptedException // Perhaps we should return null here? } finally { if (interrupted) { Thread.currentThread().interrupt(); } } this.stats.getSerialQueueHelper().incThrottleCount(); } while (stats.getSerialQueueBytes() >= TOTAL_SERIAL_QUEUE_BYTE_LIMIT); } return executor; } /* * Returns the serial queue executor for the given sender. */ public SerialQueuedExecutorWithDMStats getSerialExecutor(InternalDistributedMember sender) { SerialQueuedExecutorWithDMStats executor = null; Integer queueId = getQueueId(sender, true); if ((executor = (SerialQueuedExecutorWithDMStats) serialQueuedExecutorMap.get(queueId)) != null) { return executor; } // If executor doesn't exists for this sender, create one. executor = createSerialExecutor(queueId); serialQueuedExecutorMap.put(queueId, executor); if (logger.isDebugEnabled()) { logger.debug( "Created Serial Queued Executor With queueId {}. Total number of live Serial Threads :{}", queueId, serialQueuedExecutorMap.size()); } stats.incSerialPooledThread(); return executor; } /* * Creates a serial queue executor. */ private SerialQueuedExecutorWithDMStats createSerialExecutor(final Integer id) { BlockingQueue poolQueue; if (SERIAL_QUEUE_BYTE_LIMIT == 0 || this.throttlingDisabled) { poolQueue = new OverflowQueueWithDMStats(stats.getSerialQueueHelper()); } else { poolQueue = new ThrottlingMemLinkedQueueWithDMStats(SERIAL_QUEUE_BYTE_LIMIT, SERIAL_QUEUE_THROTTLE, SERIAL_QUEUE_SIZE_LIMIT, SERIAL_QUEUE_SIZE_THROTTLE, this.stats.getSerialQueueHelper()); } serialQueuedMap.put(id, poolQueue); ThreadFactory tf = new ThreadFactory() { public Thread newThread(final Runnable command) { SerialQueuedExecutorPool.this.stats.incSerialPooledThreadStarts(); final Runnable r = new Runnable() { public void run() { ConnectionTable.threadWantsSharedResources(); Connection.makeReaderThread(); try { command.run(); } finally { ConnectionTable.releaseThreadsSockets(); } } }; Thread thread = new Thread(threadGroup, r, "Pooled Serial Message Processor " + id); thread.setDaemon(true); return thread; } }; return new SerialQueuedExecutorWithDMStats(poolQueue, this.stats.getSerialPooledProcessorHelper(), tf); } /* * Does cleanup relating to this member. And marks the serial executor associated with this * member for re-use. */ public void handleMemberDeparture(InternalDistributedMember member) { Integer queueId = getQueueId(member, false); if (queueId == null) { return; } boolean isUsed = false; synchronized (senderToSerialQueueIdMap) { senderToSerialQueueIdMap.remove(member); // Check if any other members are using the same executor. for (Iterator iter = senderToSerialQueueIdMap.values().iterator(); iter.hasNext();) { Integer value = (Integer) iter.next(); if (value.equals(queueId)) { isUsed = true; break; } } // If not used mark this as unused. if (!isUsed) { if (logger.isInfoEnabled(LogMarker.DM)) logger.info(LogMarker.DM, LocalizedMessage.create( LocalizedStrings.DistributionManager_MARKING_THE_SERIALQUEUEDEXECUTOR_WITH_ID__0__USED_BY_THE_MEMBER__1__TO_BE_UNUSED, new Object[] {queueId, member})); threadMarkedForUse.add(queueId); } } } public void awaitTermination(long time, TimeUnit unit) throws InterruptedException { long timeNanos = unit.toNanos(time); long remainingNanos = timeNanos; long start = System.nanoTime(); for (Iterator iter = serialQueuedExecutorMap.values().iterator(); iter.hasNext();) { ExecutorService executor = (ExecutorService) iter.next(); executor.awaitTermination(remainingNanos, TimeUnit.NANOSECONDS); remainingNanos = timeNanos = (System.nanoTime() - start); if (remainingNanos <= 0) { return; } } } protected void shutdown() { for (Iterator iter = serialQueuedExecutorMap.values().iterator(); iter.hasNext();) { ExecutorService executor = (ExecutorService) iter.next(); executor.shutdown(); } } } /** * A simple class used for locking the list of members of the distributed system. We give this * lock its own class so that it shows up nicely in stack traces. */ private static final class MembersLock { protected MembersLock() { } } /** * A simple class used for locking the list of membership listeners. We give this lock its own * class so that it shows up nicely in stack traces. */ private static final class MembershipListenersLock { protected MembershipListenersLock() {} } /** * This is the listener implementation for responding from events from the Membership Manager. * */ private final class MyListener implements DistributedMembershipListener { DistributionManager dm; public MyListener(DistributionManager dm) { this.dm = dm; } public boolean isShutdownMsgSent() { return shutdownMsgSent; } public void membershipFailure(String reason, Throwable t) { exceptionInThreads = true; DistributionManager.this.rootCause = t; getSystem().disconnect(reason, t, true); } public void messageReceived(DistributionMessage message) { handleIncomingDMsg(message); } public void newMemberConnected(InternalDistributedMember member) { // Do not elect the elder here as surprise members invoke this callback // without holding the view lock. That can cause a race condition and // subsequent deadlock (#45566). Elder selection is now done when a view // is installed. dm.addNewMember(member); } public void memberDeparted(InternalDistributedMember theId, boolean crashed, String reason) { boolean wasAdmin = getAdminMemberSet().contains(theId); if (wasAdmin) { // Pretend we received an AdminConsoleDisconnectMessage from the console that // is no longer in the JavaGroup view. // He must have died without sending a ShutdownMessage. // This fixes bug 28454. AdminConsoleDisconnectMessage message = new AdminConsoleDisconnectMessage(); message.setSender(theId); message.setCrashed(crashed); message.setAlertListenerExpected(true); message.setIgnoreAlertListenerRemovalFailure(true); // we don't know if it was a listener so // don't issue a warning message.setRecipient(myid); message.setReason(reason); // added for #37950 handleIncomingDMsg(message); } dm.handleManagerDeparture(theId, crashed, reason); } public void memberSuspect(InternalDistributedMember suspect, InternalDistributedMember whoSuspected, String reason) { dm.handleManagerSuspect(suspect, whoSuspected, reason); } public void viewInstalled(NetView view) { processElderSelection(); dm.handleViewInstalled(view); } /** this is invoked when quorum is being lost, before the view has been installed */ public void quorumLost(Set<InternalDistributedMember> failures, List<InternalDistributedMember> remaining) { dm.handleQuorumLost(failures, remaining); } public DistributionManager getDM() { return dm; } private void processElderSelection() { // If we currently had no elder, this member might be the elder; // go through the selection process and decide now. try { dm.selectElder(); } catch (DistributedSystemDisconnectedException e) { // ignore } } } private static abstract class MemberEvent { private InternalDistributedMember id; MemberEvent(InternalDistributedMember id) { this.id = id; } public InternalDistributedMember getId() { return this.id; } public void handleEvent(DistributionManager manager) { handleEvent(manager, manager.membershipListeners.keySet()); handleEvent(manager, manager.allMembershipListeners); } protected abstract void handleEvent(MembershipListener listener); protected void handleEvent(DistributionManager manager, Set<MembershipListener> membershipListeners) { for (MembershipListener listener : membershipListeners) { try { handleEvent(listener); } catch (CancelException e) { if (manager.isCloseInProgress()) { if (logger.isTraceEnabled()) { logger.trace("MemberEventInvoker: cancelled"); } } else { logger.warn(LocalizedMessage .create(LocalizedStrings.DistributionManager_UNEXPECTED_CANCELLATION), e); } break; } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); logger.warn(LocalizedMessage.create( LocalizedStrings.DistributionManager_EXCEPTION_WHILE_CALLING_MEMBERSHIP_LISTENER_FOR_EVENT__0, this), t); } } } } /** * This is an event reflecting that a InternalDistributedMember has joined the system. * * */ private static final class MemberJoinedEvent extends MemberEvent { MemberJoinedEvent(InternalDistributedMember id) { super(id); } @Override public String toString() { return "member " + getId() + " joined"; } @Override protected void handleEvent(MembershipListener listener) { listener.memberJoined(getId()); } } /** * This is an event reflecting that a InternalDistributedMember has left the system. * */ private static final class MemberDepartedEvent extends MemberEvent { String reason; MemberDepartedEvent(InternalDistributedMember id, String r) { super(id); reason = r; } @Override public String toString() { return "member " + getId() + " departed (" + reason + ")"; } @Override protected void handleEvent(MembershipListener listener) { listener.memberDeparted(getId(), false); } } /** * This is an event reflecting that a InternalDistributedMember has left the system in an * unexpected way. * * */ private static final class MemberCrashedEvent extends MemberEvent { String reason; MemberCrashedEvent(InternalDistributedMember id, String r) { super(id); reason = r; } @Override public String toString() { return "member " + getId() + " crashed: " + reason; } @Override protected void handleEvent(MembershipListener listener) { listener.memberDeparted(getId(), true/* crashed */); } } /** * This is an event reflecting that a InternalDistributedMember may be missing but has not yet * left the system. */ private static final class MemberSuspectEvent extends MemberEvent { InternalDistributedMember whoSuspected; String reason; MemberSuspectEvent(InternalDistributedMember suspect, InternalDistributedMember whoSuspected, String reason) { super(suspect); this.whoSuspected = whoSuspected; this.reason = reason; } public InternalDistributedMember whoSuspected() { return this.whoSuspected; } public String getReason() { return this.reason; } @Override public String toString() { return "member " + getId() + " suspected by: " + this.whoSuspected + " reason: " + reason; } @Override protected void handleEvent(MembershipListener listener) { listener.memberSuspect(getId(), whoSuspected(), reason); } } private static final class ViewInstalledEvent extends MemberEvent { NetView view; ViewInstalledEvent(NetView view) { super(null); this.view = view; } public long getViewId() { return view.getViewId(); } @Override public String toString() { return "view installed: " + this.view; } @Override public void handleEvent(DistributionManager manager) { manager.handleViewInstalledEvent(this); } @Override protected void handleEvent(MembershipListener listener) { throw new UnsupportedOperationException(); } } private static final class QuorumLostEvent extends MemberEvent { Set<InternalDistributedMember> failures; List<InternalDistributedMember> remaining; QuorumLostEvent(Set<InternalDistributedMember> failures, List<InternalDistributedMember> remaining) { super(null); this.failures = failures; this.remaining = remaining; } public Set<InternalDistributedMember> getFailures() { return this.failures; } public List<InternalDistributedMember> getRemaining() { return this.remaining; } @Override public String toString() { return "quorum lost. failures=" + failures + "; remaining=" + remaining; } @Override protected void handleEvent(MembershipListener listener) { listener.quorumLost(getFailures(), getRemaining()); } } /* * (non-Javadoc) * * @see org.apache.geode.distributed.internal.DM#getRootCause() */ public Throwable getRootCause() { return this.rootCause; } /* * (non-Javadoc) * * @see org.apache.geode.distributed.internal.DM#setRootCause(java.lang.Throwable) */ public void setRootCause(Throwable t) { this.rootCause = t; } /* * (non-Javadoc) * * @see org.apache.geode.distributed.internal.DM#getMembersOnThisHost() * * @since GemFire 5.9 */ public Set<InternalDistributedMember> getMembersInThisZone() { return getMembersInSameZone(getDistributionManagerId()); } public Set<InternalDistributedMember> getMembersInSameZone( InternalDistributedMember targetMember) { Set<InternalDistributedMember> buddyMembers = new HashSet<InternalDistributedMember>(); if (!redundancyZones.isEmpty()) { synchronized (redundancyZones) { String targetZone = redundancyZones.get(targetMember); for (Map.Entry<InternalDistributedMember, String> entry : redundancyZones.entrySet()) { if (entry.getValue().equals(targetZone)) { buddyMembers.add(entry.getKey()); } } } } else { buddyMembers.add(targetMember); Set targetAddrs = getEquivalents(targetMember.getInetAddress()); for (Iterator i = getDistributionManagerIds().iterator(); i.hasNext();) { InternalDistributedMember o = (InternalDistributedMember) i.next(); if (SetUtils.intersectsWith(targetAddrs, getEquivalents(o.getInetAddress()))) { buddyMembers.add(o); } } } return buddyMembers; } public boolean areInSameZone(InternalDistributedMember member1, InternalDistributedMember member2) { if (!redundancyZones.isEmpty()) { String zone1 = redundancyZones.get(member1); String zone2 = redundancyZones.get(member2); return zone1 != null && zone1.equals(zone2); } else { return areOnEquivalentHost(member1, member2); } } public void acquireGIIPermitUninterruptibly() { this.parallelGIIs.acquireUninterruptibly(); this.stats.incInitialImageRequestsInProgress(1); } public void releaseGIIPermit() { this.stats.incInitialImageRequestsInProgress(-1); this.parallelGIIs.release(); } public void setDistributedSystemId(int distributedSystemId) { if (distributedSystemId != -1) { this.distributedSystemId = distributedSystemId; } } public int getDistributedSystemId() { return this.distributedSystemId; } /** * this causes all members in the system to log thread dumps If useNative is true we attempt to * use OSProcess native code for the dumps. This goes to stdout instead of the system.log files. */ public void printDistributedSystemStacks(boolean useNative) { printStacks(new HashSet(getDistributionManagerIds()), useNative); } /** * this causes the given InternalDistributedMembers to log thread dumps. If useNative is true we * attempt to use OSProcess native code for the dumps. This goes to stdout instead of the * system.log files. */ public void printStacks(Collection ids, boolean useNative) { Set requiresMessage = new HashSet(); if (ids.contains(myid)) { OSProcess.printStacks(0, useNative); } if (useNative) { requiresMessage.addAll(ids); ids.remove(myid); } else { for (Iterator it = ids.iterator(); it.hasNext();) { InternalDistributedMember mbr = (InternalDistributedMember) it.next(); if (mbr.getProcessId() > 0 && mbr.getInetAddress().equals(this.myid.getInetAddress())) { if (!mbr.equals(myid)) { if (!OSProcess.printStacks(mbr.getProcessId(), false)) { requiresMessage.add(mbr); } } } else { requiresMessage.add(mbr); } } } if (requiresMessage.size() > 0) { HighPriorityAckedMessage msg = new HighPriorityAckedMessage(); msg.dumpStacks(requiresMessage, useNative, false); } } public Set<DistributedMember> getGroupMembers(String group) { HashSet<DistributedMember> result = null; for (DistributedMember m : (Set<DistributedMember>) getDistributionManagerIdsIncludingAdmin()) { if (m.getGroups().contains(group)) { if (result == null) { result = new HashSet<DistributedMember>(); } result.add(m); } } if (result == null) { return Collections.emptySet(); } else { return result; } } @Override public Set getNormalDistributionManagerIds() { // access to members synchronized under membersLock in order to // ensure serialization synchronized (this.membersLock) { HashSet<InternalDistributedMember> result = new HashSet<InternalDistributedMember>(); for (InternalDistributedMember m : this.members.keySet()) { if (m.getVmKind() != DistributionManager.LOCATOR_DM_TYPE) { result.add(m); } } return result; } } public Set<InternalDistributedMember> getLocatorDistributionManagerIds() { // access to members synchronized under membersLock in order to // ensure serialization synchronized (this.membersLock) { HashSet<InternalDistributedMember> result = new HashSet<InternalDistributedMember>(); for (InternalDistributedMember m : this.members.keySet()) { if (m.getVmKind() == DistributionManager.LOCATOR_DM_TYPE) { result.add(m); } } return result; } } }