/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.spi.communication.tcp; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.ConnectException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.SelectableChannel; import java.nio.channels.SocketChannel; import java.nio.channels.spi.AbstractInterruptibleChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import javax.net.ssl.SSLEngine; import javax.net.ssl.SSLException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteClientDisconnectedException; import org.apache.ignite.IgniteException; import org.apache.ignite.IgniteLogger; import org.apache.ignite.IgniteSystemProperties; import org.apache.ignite.cluster.ClusterNode; import org.apache.ignite.configuration.AddressResolver; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.events.DiscoveryEvent; import org.apache.ignite.events.Event; import org.apache.ignite.internal.IgniteClientDisconnectedCheckedException; import org.apache.ignite.internal.IgniteInternalFuture; import org.apache.ignite.internal.IgniteInterruptedCheckedException; import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException; import org.apache.ignite.internal.managers.eventstorage.GridLocalEventListener; import org.apache.ignite.internal.util.GridConcurrentFactory; import org.apache.ignite.internal.util.GridSpinReadWriteLock; import org.apache.ignite.internal.util.future.GridFutureAdapter; import org.apache.ignite.internal.util.ipc.IpcEndpoint; import org.apache.ignite.internal.util.ipc.IpcToNioAdapter; import org.apache.ignite.internal.util.ipc.shmem.IpcOutOfSystemResourcesException; import org.apache.ignite.internal.util.ipc.shmem.IpcSharedMemoryServerEndpoint; import org.apache.ignite.internal.util.lang.IgniteInClosure2X; import org.apache.ignite.internal.util.nio.GridCommunicationClient; import org.apache.ignite.internal.util.nio.GridConnectionBytesVerifyFilter; import org.apache.ignite.internal.util.nio.GridDirectParser; import org.apache.ignite.internal.util.nio.GridNioCodecFilter; import org.apache.ignite.internal.util.nio.GridNioFilter; import org.apache.ignite.internal.util.nio.GridNioMessageReaderFactory; import org.apache.ignite.internal.util.nio.GridNioMessageTracker; import org.apache.ignite.internal.util.nio.GridNioMessageWriterFactory; import org.apache.ignite.internal.util.nio.GridNioMetricsListener; import org.apache.ignite.internal.util.nio.GridNioRecoveryDescriptor; import org.apache.ignite.internal.util.nio.GridNioServer; import org.apache.ignite.internal.util.nio.GridNioServerListener; import org.apache.ignite.internal.util.nio.GridNioServerListenerAdapter; import org.apache.ignite.internal.util.nio.GridNioSession; import org.apache.ignite.internal.util.nio.GridNioSessionMetaKey; import org.apache.ignite.internal.util.nio.GridShmemCommunicationClient; import org.apache.ignite.internal.util.nio.GridTcpNioCommunicationClient; import org.apache.ignite.internal.util.nio.ssl.BlockingSslHandler; import org.apache.ignite.internal.util.nio.ssl.GridNioSslFilter; import org.apache.ignite.internal.util.nio.ssl.GridSslMeta; import org.apache.ignite.internal.util.typedef.CI2; import org.apache.ignite.internal.util.typedef.F; import org.apache.ignite.internal.util.typedef.X; import org.apache.ignite.internal.util.typedef.internal.CU; import org.apache.ignite.internal.util.typedef.internal.LT; import org.apache.ignite.internal.util.typedef.internal.S; import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.internal.util.worker.GridWorker; import org.apache.ignite.lang.IgniteBiInClosure; import org.apache.ignite.lang.IgniteBiTuple; import org.apache.ignite.lang.IgniteFuture; import org.apache.ignite.lang.IgniteInClosure; import org.apache.ignite.lang.IgnitePredicate; import org.apache.ignite.lang.IgniteRunnable; import org.apache.ignite.lang.IgniteUuid; import org.apache.ignite.plugin.extensions.communication.Message; import org.apache.ignite.plugin.extensions.communication.MessageFactory; import org.apache.ignite.plugin.extensions.communication.MessageFormatter; import org.apache.ignite.plugin.extensions.communication.MessageReader; import org.apache.ignite.plugin.extensions.communication.MessageWriter; import org.apache.ignite.resources.IgniteInstanceResource; import org.apache.ignite.resources.LoggerResource; import org.apache.ignite.spi.IgnitePortProtocol; import org.apache.ignite.spi.IgniteSpiAdapter; import org.apache.ignite.spi.IgniteSpiConfiguration; import org.apache.ignite.spi.IgniteSpiConsistencyChecked; import org.apache.ignite.spi.IgniteSpiContext; import org.apache.ignite.spi.IgniteSpiException; import org.apache.ignite.spi.IgniteSpiMBeanAdapter; import org.apache.ignite.spi.IgniteSpiMultipleInstancesSupport; import org.apache.ignite.spi.IgniteSpiOperationTimeoutException; import org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper; import org.apache.ignite.spi.IgniteSpiThread; import org.apache.ignite.spi.IgniteSpiTimeoutObject; import org.apache.ignite.spi.communication.CommunicationListener; import org.apache.ignite.spi.communication.CommunicationSpi; import org.apache.ignite.thread.IgniteThread; import org.jetbrains.annotations.Nullable; import org.jsr166.ConcurrentLinkedDeque8; import org.jsr166.LongAdder8; import static org.apache.ignite.events.EventType.EVT_NODE_FAILED; import static org.apache.ignite.events.EventType.EVT_NODE_LEFT; import static org.apache.ignite.internal.util.nio.GridNioSessionMetaKey.SSL_META; /** * <tt>TcpCommunicationSpi</tt> is default communication SPI which uses * TCP/IP protocol and Java NIO to communicate with other nodes. * <p> * To enable communication with other nodes, this SPI adds {@link #ATTR_ADDRS} * and {@link #ATTR_PORT} local node attributes (see {@link ClusterNode#attributes()}. * <p> * At startup, this SPI tries to start listening to local port specified by * {@link #setLocalPort(int)} method. If local port is occupied, then SPI will * automatically increment the port number until it can successfully bind for * listening. {@link #setLocalPortRange(int)} configuration parameter controls * maximum number of ports that SPI will try before it fails. Port range comes * very handy when starting multiple grid nodes on the same machine or even * in the same VM. In this case all nodes can be brought up without a single * change in configuration. * <p> * This SPI caches connections to remote nodes so it does not have to reconnect every * time a message is sent. By default, idle connections are kept active for * {@link #DFLT_IDLE_CONN_TIMEOUT} period and then are closed. Use * {@link #setIdleConnectionTimeout(long)} configuration parameter to configure * you own idle connection timeout. * <h1 class="header">Failure Detection</h1> * Configuration defaults (see Configuration section below and * {@link IgniteConfiguration#getFailureDetectionTimeout()}) for details) are chosen to make possible for * communication SPI work reliably on most of hardware and virtual deployments, but this has made failure detection * time worse. * <p> * If it's needed to tune failure detection then it's highly recommended to do this using * {@link IgniteConfiguration#setFailureDetectionTimeout(long)}. This failure timeout automatically controls the * following parameters: {@link #getConnectTimeout()}, {@link #getMaxConnectTimeout()}, * {@link #getReconnectCount()}. If any of those parameters is set explicitly, then the failure timeout setting will be * ignored. * <p> * If it's required to perform advanced settings of failure detection and * {@link IgniteConfiguration#getFailureDetectionTimeout()} is unsuitable then various {@code TcpCommunicationSpi} * configuration parameters may be used. * <h1 class="header">Configuration</h1> * <h2 class="header">Mandatory</h2> * This SPI has no mandatory configuration parameters. * <h2 class="header">Optional</h2> * The following configuration parameters are optional: * <ul> * <li>Node local IP address (see {@link #setLocalAddress(String)})</li> * <li>Node local port number (see {@link #setLocalPort(int)})</li> * <li>Local port range (see {@link #setLocalPortRange(int)}</li> * <li>Connections per node (see {@link #setConnectionsPerNode(int)})</li> * <li>Idle connection timeout (see {@link #setIdleConnectionTimeout(long)})</li> * <li>Direct or heap buffer allocation (see {@link #setDirectBuffer(boolean)})</li> * <li>Direct or heap buffer allocation for sending (see {@link #setDirectSendBuffer(boolean)})</li> * <li>Count of selectors and selector threads for NIO server (see {@link #setSelectorsCount(int)})</li> * <li>{@code TCP_NODELAY} socket option for sockets (see {@link #setTcpNoDelay(boolean)})</li> * <li>Message queue limit (see {@link #setMessageQueueLimit(int)})</li> * <li>Connect timeout (see {@link #setConnectTimeout(long)})</li> * <li>Maximum connect timeout (see {@link #setMaxConnectTimeout(long)})</li> * <li>Reconnect attempts count (see {@link #setReconnectCount(int)})</li> * <li>Socket receive buffer size (see {@link #setSocketReceiveBuffer(int)})</li> * <li>Socket send buffer size (see {@link #setSocketSendBuffer(int)})</li> * <li>Socket write timeout (see {@link #setSocketWriteTimeout(long)})</li> * <li>Number of received messages after which acknowledgment is sent (see {@link #setAckSendThreshold(int)})</li> * <li>Maximum number of unacknowledged messages (see {@link #setUnacknowledgedMessagesBufferSize(int)})</li> * </ul> * <h2 class="header">Java Example</h2> * TcpCommunicationSpi is used by default and should be explicitly configured * only if some SPI configuration parameters need to be overridden. * <pre name="code" class="java"> * TcpCommunicationSpi commSpi = new TcpCommunicationSpi(); * * // Override local port. * commSpi.setLocalPort(4321); * * IgniteConfiguration cfg = new IgniteConfiguration(); * * // Override default communication SPI. * cfg.setCommunicationSpi(commSpi); * * // Start grid. * Ignition.start(cfg); * </pre> * <h2 class="header">Spring Example</h2> * TcpCommunicationSpi can be configured from Spring XML configuration file: * <pre name="code" class="xml"> * <bean id="grid.custom.cfg" class="org.apache.ignite.configuration.IgniteConfiguration" singleton="true"> * ... * <property name="communicationSpi"> * <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi"> * <!-- Override local port. --> * <property name="localPort" value="4321"/> * </bean> * </property> * ... * </bean> * </pre> * <p> * <img src="http://ignite.apache.org/images/spring-small.png"> * <br> * For information about Spring framework visit <a href="http://www.springframework.org/">www.springframework.org</a> * @see CommunicationSpi */ @IgniteSpiMultipleInstancesSupport(true) @IgniteSpiConsistencyChecked(optional = false) public class TcpCommunicationSpi extends IgniteSpiAdapter implements CommunicationSpi<Message> { /** IPC error message. */ public static final String OUT_OF_RESOURCES_TCP_MSG = "Failed to allocate shared memory segment " + "(switching to TCP, may be slower)."; /** Node attribute that is mapped to node IP addresses (value is <tt>comm.tcp.addrs</tt>). */ public static final String ATTR_ADDRS = "comm.tcp.addrs"; /** Node attribute that is mapped to node host names (value is <tt>comm.tcp.host.names</tt>). */ public static final String ATTR_HOST_NAMES = "comm.tcp.host.names"; /** Node attribute that is mapped to node port number (value is <tt>comm.tcp.port</tt>). */ public static final String ATTR_PORT = "comm.tcp.port"; /** Node attribute that is mapped to node port number (value is <tt>comm.shmem.tcp.port</tt>). */ public static final String ATTR_SHMEM_PORT = "comm.shmem.tcp.port"; /** Node attribute that is mapped to node's external addresses (value is <tt>comm.tcp.ext-addrs</tt>). */ public static final String ATTR_EXT_ADDRS = "comm.tcp.ext-addrs"; /** */ public static final String ATTR_PAIRED_CONN = "comm.tcp.pairedConnection"; /** Default port which node sets listener to (value is <tt>47100</tt>). */ public static final int DFLT_PORT = 47100; /** Default port which node sets listener for shared memory connections (value is <tt>48100</tt>). */ public static final int DFLT_SHMEM_PORT = -1; /** Default idle connection timeout (value is <tt>30000</tt>ms). */ public static final long DFLT_IDLE_CONN_TIMEOUT = 30000; /** Default socket send and receive buffer size. */ public static final int DFLT_SOCK_BUF_SIZE = 32 * 1024; /** Default connection timeout (value is <tt>5000</tt>ms). */ public static final long DFLT_CONN_TIMEOUT = 5000; /** Default Maximum connection timeout (value is <tt>600,000</tt>ms). */ public static final long DFLT_MAX_CONN_TIMEOUT = 10 * 60 * 1000; /** Default reconnect attempts count (value is <tt>10</tt>). */ public static final int DFLT_RECONNECT_CNT = 10; /** Default message queue limit per connection (for incoming and outgoing . */ public static final int DFLT_MSG_QUEUE_LIMIT = GridNioServer.DFLT_SEND_QUEUE_LIMIT; /** * Default count of selectors for TCP server equals to * {@code "Math.max(4, Runtime.getRuntime().availableProcessors() / 2)"}. */ public static final int DFLT_SELECTORS_CNT = Math.max(4, Runtime.getRuntime().availableProcessors() / 2); /** Connection index meta for session. */ private static final int CONN_IDX_META = GridNioSessionMetaKey.nextUniqueKey(); /** Message tracker meta for session. */ private static final int TRACKER_META = GridNioSessionMetaKey.nextUniqueKey(); /** * Default local port range (value is <tt>100</tt>). * See {@link #setLocalPortRange(int)} for details. */ public static final int DFLT_PORT_RANGE = 100; /** Default value for {@code TCP_NODELAY} socket option (value is <tt>true</tt>). */ public static final boolean DFLT_TCP_NODELAY = true; /** Default received messages threshold for sending ack. */ public static final int DFLT_ACK_SND_THRESHOLD = 32; /** Default socket write timeout. */ public static final long DFLT_SOCK_WRITE_TIMEOUT = 2000; /** Default connections per node. */ public static final int DFLT_CONN_PER_NODE = 1; /** No-op runnable. */ private static final IgniteRunnable NOOP = new IgniteRunnable() { @Override public void run() { // No-op. } }; /** Node ID message type. */ public static final short NODE_ID_MSG_TYPE = -1; /** Recovery last received ID message type. */ public static final short RECOVERY_LAST_ID_MSG_TYPE = -2; /** Handshake message type. */ public static final short HANDSHAKE_MSG_TYPE = -3; /** */ private ConnectGateway connectGate; /** */ private ConnectionPolicy connPlc; /** Server listener. */ private final GridNioServerListener<Message> srvLsnr = new GridNioServerListenerAdapter<Message>() { @Override public void onSessionWriteTimeout(GridNioSession ses) { LT.warn(log,"Communication SPI session write timed out (consider increasing " + "'socketWriteTimeout' " + "configuration property) [remoteAddr=" + ses.remoteAddress() + ", writeTimeout=" + sockWriteTimeout + ']'); if (log.isDebugEnabled()) log.debug("Closing communication SPI session on write timeout [remoteAddr=" + ses.remoteAddress() + ", writeTimeout=" + sockWriteTimeout + ']'); ses.close(); } @Override public void onConnected(GridNioSession ses) { if (ses.accepted()) { if (log.isInfoEnabled()) log.info("Accepted incoming communication connection [locAddr=" + ses.localAddress() + ", rmtAddr=" + ses.remoteAddress() + ']'); if (log.isDebugEnabled()) log.debug("Sending local node ID to newly accepted session: " + ses); try { ses.sendNoFuture(nodeIdMessage(), null); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } } else { if (log.isInfoEnabled()) log.info("Established outgoing communication connection [locAddr=" + ses.localAddress() + ", rmtAddr=" + ses.remoteAddress() + ']'); } } @Override public void onDisconnected(GridNioSession ses, @Nullable Exception e) { ConnectionKey connId = ses.meta(CONN_IDX_META); if (connId != null) { UUID id = connId.nodeId(); GridCommunicationClient[] nodeClients = clients.get(id); if (nodeClients != null) { for (GridCommunicationClient client : nodeClients) { if (client instanceof GridTcpNioCommunicationClient && ((GridTcpNioCommunicationClient)client).session() == ses) { client.close(); removeNodeClient(id, client); } } } if (!stopping) { GridNioRecoveryDescriptor outDesc = ses.outRecoveryDescriptor(); if (outDesc != null) { if (outDesc.nodeAlive(getSpiContext().node(id))) { if (!outDesc.messagesRequests().isEmpty()) { if (log.isDebugEnabled()) log.debug("Session was closed but there are unacknowledged messages, " + "will try to reconnect [rmtNode=" + outDesc.node().id() + ']'); DisconnectedSessionInfo disconnectData = new DisconnectedSessionInfo(outDesc, connId.connectionIndex()); commWorker.addProcessDisconnectRequest(disconnectData); } } else outDesc.onNodeLeft(); } } CommunicationListener<Message> lsnr0 = lsnr; if (lsnr0 != null) lsnr0.onDisconnected(id); } } /** * @param ses Session. * @param msg Message. */ private void onFirstMessage(GridNioSession ses, Message msg) { UUID sndId; ConnectionKey connKey; if (msg instanceof NodeIdMessage) { sndId = U.bytesToUuid(((NodeIdMessage) msg).nodeIdBytes, 0); connKey = new ConnectionKey(sndId, 0, -1); } else { assert msg instanceof HandshakeMessage : msg; HandshakeMessage msg0 = (HandshakeMessage)msg; sndId = ((HandshakeMessage)msg).nodeId(); connKey = new ConnectionKey(sndId, msg0.connectionIndex(), msg0.connectCount()); } if (log.isDebugEnabled()) log.debug("Remote node ID received: " + sndId); final ClusterNode rmtNode = getSpiContext().node(sndId); if (rmtNode == null) { if (log.isDebugEnabled()) log.debug("Close incoming connection, unknown node: " + sndId); ses.close(); return; } final ConnectionKey old = ses.addMeta(CONN_IDX_META, connKey); assert old == null; ClusterNode locNode = getSpiContext().localNode(); if (ses.remoteAddress() == null) return; assert msg instanceof HandshakeMessage : msg; HandshakeMessage msg0 = (HandshakeMessage)msg; if (log.isDebugEnabled()) log.debug("Received handshake message [locNodeId=" + locNode.id() + ", rmtNodeId=" + sndId + ", msg=" + msg0 + ']'); if (usePairedConnections(rmtNode)) { final GridNioRecoveryDescriptor recoveryDesc = inRecoveryDescriptor(rmtNode, connKey); ConnectClosureNew c = new ConnectClosureNew(ses, recoveryDesc, rmtNode); boolean reserve = recoveryDesc.tryReserve(msg0.connectCount(), c); if (reserve) connectedNew(recoveryDesc, ses, true); else { if (c.failed) { ses.send(new RecoveryLastReceivedMessage(-1)); for (GridNioSession ses0 : nioSrvr.sessions()) { ConnectionKey key0 = ses0.meta(CONN_IDX_META); if (ses0.accepted() && key0 != null && key0.nodeId().equals(connKey.nodeId()) && key0.connectionIndex() == connKey.connectionIndex() && key0.connectCount() < connKey.connectCount()) ses0.close(); } } } } else { assert connKey.connectionIndex() >= 0 : connKey; GridCommunicationClient[] curClients = clients.get(sndId); GridCommunicationClient oldClient = curClients != null && connKey.connectionIndex() < curClients.length ? curClients[connKey.connectionIndex()] : null; boolean hasShmemClient = false; if (oldClient != null) { if (oldClient instanceof GridTcpNioCommunicationClient) { if (log.isDebugEnabled()) log.debug("Received incoming connection when already connected " + "to this node, rejecting [locNode=" + locNode.id() + ", rmtNode=" + sndId + ']'); ses.send(new RecoveryLastReceivedMessage(-1)); return; } else { assert oldClient instanceof GridShmemCommunicationClient; hasShmemClient = true; } } GridFutureAdapter<GridCommunicationClient> fut = new GridFutureAdapter<>(); GridFutureAdapter<GridCommunicationClient> oldFut = clientFuts.putIfAbsent(connKey, fut); final GridNioRecoveryDescriptor recoveryDesc = inRecoveryDescriptor(rmtNode, connKey); if (oldFut == null) { curClients = clients.get(sndId); oldClient = curClients != null && connKey.connectionIndex() < curClients.length ? curClients[connKey.connectionIndex()] : null; if (oldClient != null) { if (oldClient instanceof GridTcpNioCommunicationClient) { assert oldClient.connectionIndex() == connKey.connectionIndex() : oldClient; if (log.isDebugEnabled()) log.debug("Received incoming connection when already connected " + "to this node, rejecting [locNode=" + locNode.id() + ", rmtNode=" + sndId + ']'); ses.send(new RecoveryLastReceivedMessage(-1)); fut.onDone(oldClient); return; } else { assert oldClient instanceof GridShmemCommunicationClient; hasShmemClient = true; } } boolean reserved = recoveryDesc.tryReserve(msg0.connectCount(), new ConnectClosure(ses, recoveryDesc, rmtNode, connKey, msg0, !hasShmemClient, fut)); if (log.isDebugEnabled()) log.debug("Received incoming connection from remote node " + "[rmtNode=" + rmtNode.id() + ", reserved=" + reserved + ", recovery=" + recoveryDesc + ']'); if (reserved) { try { GridTcpNioCommunicationClient client = connected(recoveryDesc, ses, rmtNode, msg0.received(), true, !hasShmemClient); fut.onDone(client); } finally { clientFuts.remove(connKey, fut); } } } else { if (oldFut instanceof ConnectFuture && locNode.order() < rmtNode.order()) { if (log.isDebugEnabled()) { log.debug("Received incoming connection from remote node while " + "connecting to this node, rejecting [locNode=" + locNode.id() + ", locNodeOrder=" + locNode.order() + ", rmtNode=" + rmtNode.id() + ", rmtNodeOrder=" + rmtNode.order() + ']'); } ses.send(new RecoveryLastReceivedMessage(-1)); } else { // The code below causes a race condition between shmem and TCP (see IGNITE-1294) boolean reserved = recoveryDesc.tryReserve(msg0.connectCount(), new ConnectClosure(ses, recoveryDesc, rmtNode, connKey, msg0, !hasShmemClient, fut)); if (reserved) connected(recoveryDesc, ses, rmtNode, msg0.received(), true, !hasShmemClient); } } } } @Override public void onMessage(GridNioSession ses, Message msg) { ConnectionKey connKey = ses.meta(CONN_IDX_META); if (connKey == null) { assert ses.accepted() : ses; if (!connectGate.tryEnter()) { if (log.isDebugEnabled()) log.debug("Close incoming connection, failed to enter gateway."); ses.close(); return; } try { onFirstMessage(ses, msg); } finally { connectGate.leave(); } } else { rcvdMsgsCnt.increment(); if (msg instanceof RecoveryLastReceivedMessage) { GridNioRecoveryDescriptor recovery = ses.outRecoveryDescriptor(); if (recovery != null) { RecoveryLastReceivedMessage msg0 = (RecoveryLastReceivedMessage)msg; if (log.isDebugEnabled()) { log.debug("Received recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + msg0.received() + ']'); } recovery.ackReceived(msg0.received()); return; } } else { GridNioRecoveryDescriptor recovery = ses.inRecoveryDescriptor(); if (recovery != null) { long rcvCnt = recovery.onReceived(); if (rcvCnt % ackSndThreshold == 0) { if (log.isDebugEnabled()) { log.debug("Send recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + rcvCnt + ']'); } ses.systemMessage(new RecoveryLastReceivedMessage(rcvCnt)); recovery.lastAcknowledged(rcvCnt); } } } IgniteRunnable c; if (msgQueueLimit > 0) { GridNioMessageTracker tracker = ses.meta(TRACKER_META); if (tracker == null) { GridNioMessageTracker old = ses.addMeta(TRACKER_META, tracker = new GridNioMessageTracker(ses, msgQueueLimit)); assert old == null; } tracker.onMessageReceived(); c = tracker; } else c = NOOP; notifyListener(connKey.nodeId(), msg, c); } } /** * @param recovery Recovery descriptor. * @param ses Session. * @param node Node. * @param rcvCnt Number of received messages. * @param sndRes If {@code true} sends response for recovery handshake. * @param createClient If {@code true} creates NIO communication client. * @return Client. */ private GridTcpNioCommunicationClient connected( GridNioRecoveryDescriptor recovery, GridNioSession ses, ClusterNode node, long rcvCnt, boolean sndRes, boolean createClient) { ConnectionKey connKey = ses.meta(CONN_IDX_META); assert connKey != null && connKey.connectionIndex() >= 0 : connKey; assert !usePairedConnections(node); recovery.onHandshake(rcvCnt); ses.inRecoveryDescriptor(recovery); ses.outRecoveryDescriptor(recovery); nioSrvr.resend(ses); try { if (sndRes) nioSrvr.sendSystem(ses, new RecoveryLastReceivedMessage(recovery.received())); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } recovery.onConnected(); GridTcpNioCommunicationClient client = null; if (createClient) { client = new GridTcpNioCommunicationClient(connKey.connectionIndex(), ses, log); addNodeClient(node, connKey.connectionIndex(), client); } return client; } /** * @param recovery Recovery descriptor. * @param ses Session. * @param sndRes If {@code true} sends response for recovery handshake. */ private void connectedNew( GridNioRecoveryDescriptor recovery, GridNioSession ses, boolean sndRes) { try { ses.inRecoveryDescriptor(recovery); if (sndRes) nioSrvr.sendSystem(ses, new RecoveryLastReceivedMessage(recovery.received())); recovery.onConnected(); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } } /** * */ class ConnectClosureNew implements IgniteInClosure<Boolean> { /** */ private static final long serialVersionUID = 0L; /** */ private final GridNioSession ses; /** */ private final GridNioRecoveryDescriptor recoveryDesc; /** */ private final ClusterNode rmtNode; /** */ private boolean failed; /** * @param ses Incoming session. * @param recoveryDesc Recovery descriptor. * @param rmtNode Remote node. */ ConnectClosureNew(GridNioSession ses, GridNioRecoveryDescriptor recoveryDesc, ClusterNode rmtNode) { this.ses = ses; this.recoveryDesc = recoveryDesc; this.rmtNode = rmtNode; } /** {@inheritDoc} */ @Override public void apply(Boolean success) { try { failed = !success; if (success) { IgniteInClosure<IgniteInternalFuture<?>> lsnr = new IgniteInClosure<IgniteInternalFuture<?>>() { @Override public void apply(IgniteInternalFuture<?> msgFut) { try { msgFut.get(); connectedNew(recoveryDesc, ses, false); } catch (IgniteCheckedException e) { if (log.isDebugEnabled()) log.debug("Failed to send recovery handshake " + "[rmtNode=" + rmtNode.id() + ", err=" + e + ']'); recoveryDesc.release(); } } }; nioSrvr.sendSystem(ses, new RecoveryLastReceivedMessage(recoveryDesc.received()), lsnr); } else nioSrvr.sendSystem(ses, new RecoveryLastReceivedMessage(-1)); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } } } /** * */ @SuppressWarnings("PackageVisibleInnerClass") class ConnectClosure implements IgniteInClosure<Boolean> { /** */ private static final long serialVersionUID = 0L; /** */ private final GridNioSession ses; /** */ private final GridNioRecoveryDescriptor recoveryDesc; /** */ private final ClusterNode rmtNode; /** */ private final HandshakeMessage msg; /** */ private final GridFutureAdapter<GridCommunicationClient> fut; /** */ private final boolean createClient; /** */ private final ConnectionKey connKey; /** * @param ses Incoming session. * @param recoveryDesc Recovery descriptor. * @param rmtNode Remote node. * @param connKey Connection key. * @param msg Handshake message. * @param createClient If {@code true} creates NIO communication client.. * @param fut Connect future. */ ConnectClosure(GridNioSession ses, GridNioRecoveryDescriptor recoveryDesc, ClusterNode rmtNode, ConnectionKey connKey, HandshakeMessage msg, boolean createClient, GridFutureAdapter<GridCommunicationClient> fut) { this.ses = ses; this.recoveryDesc = recoveryDesc; this.rmtNode = rmtNode; this.connKey = connKey; this.msg = msg; this.createClient = createClient; this.fut = fut; } /** {@inheritDoc} */ @Override public void apply(Boolean success) { if (success) { try { IgniteInClosure<IgniteInternalFuture<?>> lsnr = new IgniteInClosure<IgniteInternalFuture<?>>() { @Override public void apply(IgniteInternalFuture<?> msgFut) { try { msgFut.get(); GridTcpNioCommunicationClient client = connected(recoveryDesc, ses, rmtNode, msg.received(), false, createClient); fut.onDone(client); } catch (IgniteCheckedException e) { if (log.isDebugEnabled()) log.debug("Failed to send recovery handshake " + "[rmtNode=" + rmtNode.id() + ", err=" + e + ']'); recoveryDesc.release(); fut.onDone(); } finally { clientFuts.remove(connKey, fut); } } }; nioSrvr.sendSystem(ses, new RecoveryLastReceivedMessage(recoveryDesc.received()), lsnr); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } } else { try { fut.onDone(); } finally { clientFuts.remove(connKey, fut); } } } } }; /** Logger. */ @LoggerResource private IgniteLogger log; /** Local IP address. */ private String locAddr; /** Complex variable that represents this node IP address. */ private volatile InetAddress locHost; /** Local port which node uses. */ private int locPort = DFLT_PORT; /** Local port range. */ private int locPortRange = DFLT_PORT_RANGE; /** Local port which node uses to accept shared memory connections. */ private int shmemPort = DFLT_SHMEM_PORT; /** Allocate direct buffer or heap buffer. */ private boolean directBuf = true; /** Allocate direct buffer or heap buffer. */ private boolean directSndBuf; /** Idle connection timeout. */ private long idleConnTimeout = DFLT_IDLE_CONN_TIMEOUT; /** Connect timeout. */ private long connTimeout = DFLT_CONN_TIMEOUT; /** Maximum connect timeout. */ private long maxConnTimeout = DFLT_MAX_CONN_TIMEOUT; /** Reconnect attempts count. */ @SuppressWarnings({"FieldAccessedSynchronizedAndUnsynchronized"}) private int reconCnt = DFLT_RECONNECT_CNT; /** Socket send buffer. */ private int sockSndBuf = DFLT_SOCK_BUF_SIZE; /** Socket receive buffer. */ private int sockRcvBuf = DFLT_SOCK_BUF_SIZE; /** Message queue limit. */ private int msgQueueLimit = DFLT_MSG_QUEUE_LIMIT; /** Slow client queue limit. */ private int slowClientQueueLimit; /** NIO server. */ private GridNioServer<Message> nioSrvr; /** Shared memory server. */ private IpcSharedMemoryServerEndpoint shmemSrv; /** */ private boolean usePairedConnections; /** */ private int connectionsPerNode = DFLT_CONN_PER_NODE; /** {@code TCP_NODELAY} option value for created sockets. */ private boolean tcpNoDelay = DFLT_TCP_NODELAY; /** Number of received messages after which acknowledgment is sent. */ private int ackSndThreshold = DFLT_ACK_SND_THRESHOLD; /** Maximum number of unacknowledged messages. */ private int unackedMsgsBufSize; /** Socket write timeout. */ private long sockWriteTimeout = DFLT_SOCK_WRITE_TIMEOUT; /** Recovery and idle clients handler. */ private CommunicationWorker commWorker; /** Shared memory accept worker. */ private ShmemAcceptWorker shmemAcceptWorker; /** Shared memory workers. */ private final Collection<ShmemWorker> shmemWorkers = new ConcurrentLinkedDeque8<>(); /** Clients. */ private final ConcurrentMap<UUID, GridCommunicationClient[]> clients = GridConcurrentFactory.newMap(); /** SPI listener. */ private volatile CommunicationListener<Message> lsnr; /** Bound port. */ private int boundTcpPort = -1; /** Bound port for shared memory server. */ private int boundTcpShmemPort = -1; /** Count of selectors to use in TCP server. */ private int selectorsCnt = DFLT_SELECTORS_CNT; /** * Defines how many non-blocking {@code selector.selectNow()} should be made before * falling into {@code selector.select(long)} in NIO server. Long value. Default is {@code 0}. * Can be set to {@code Long.MAX_VALUE} so selector threads will never block. */ private long selectorSpins = IgniteSystemProperties.getLong("IGNITE_SELECTOR_SPINS", 0L); /** Address resolver. */ private AddressResolver addrRslvr; /** Received messages count. */ private final LongAdder8 rcvdMsgsCnt = new LongAdder8(); /** Sent messages count.*/ private final LongAdder8 sentMsgsCnt = new LongAdder8(); /** Received bytes count. */ private final LongAdder8 rcvdBytesCnt = new LongAdder8(); /** Sent bytes count.*/ private final LongAdder8 sentBytesCnt = new LongAdder8(); /** Context initialization latch. */ private final CountDownLatch ctxInitLatch = new CountDownLatch(1); /** Stopping flag (set to {@code true} when SPI gets stopping signal). */ private volatile boolean stopping; /** metrics listener. */ private final GridNioMetricsListener metricsLsnr = new GridNioMetricsListener() { @Override public void onBytesSent(int bytesCnt) { sentBytesCnt.add(bytesCnt); } @Override public void onBytesReceived(int bytesCnt) { rcvdBytesCnt.add(bytesCnt); } }; /** Client connect futures. */ private final ConcurrentMap<ConnectionKey, GridFutureAdapter<GridCommunicationClient>> clientFuts = GridConcurrentFactory.newMap(); /** */ private final ConcurrentMap<ConnectionKey, GridNioRecoveryDescriptor> recoveryDescs = GridConcurrentFactory.newMap(); /** */ private final ConcurrentMap<ConnectionKey, GridNioRecoveryDescriptor> outRecDescs = GridConcurrentFactory.newMap(); /** */ private final ConcurrentMap<ConnectionKey, GridNioRecoveryDescriptor> inRecDescs = GridConcurrentFactory.newMap(); /** Discovery listener. */ private final GridLocalEventListener discoLsnr = new GridLocalEventListener() { @Override public void onEvent(Event evt) { assert evt instanceof DiscoveryEvent : evt; assert evt.type() == EVT_NODE_LEFT || evt.type() == EVT_NODE_FAILED ; onNodeLeft(((DiscoveryEvent)evt).eventNode().id()); } }; /** * @return {@code True} if ssl enabled. */ private boolean isSslEnabled() { return ignite.configuration().getSslContextFactory() != null; } /** * Sets address resolver. * * @param addrRslvr Address resolver. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setAddressResolver(AddressResolver addrRslvr) { // Injection should not override value already set by Spring or user. if (this.addrRslvr == null) this.addrRslvr = addrRslvr; return this; } /** * Injects resources. * * @param ignite Ignite. */ @IgniteInstanceResource @Override protected void injectResources(Ignite ignite) { super.injectResources(ignite); if (ignite != null) { setAddressResolver(ignite.configuration().getAddressResolver()); setLocalAddress(ignite.configuration().getLocalHost()); } } /** * Sets local host address for socket binding. Note that one node could have * additional addresses beside the loopback one. This configuration * parameter is optional. * * @param locAddr IP address. Default value is any available local * IP address. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setLocalAddress(String locAddr) { // Injection should not override value already set by Spring or user. if (this.locAddr == null) this.locAddr = locAddr; return this; } /** * See {@link #setLocalAddress(String)}. * * @return Grid node IP address. */ public String getLocalAddress() { return locAddr; } /** * Sets local port for socket binding. * <p> * If not provided, default value is {@link #DFLT_PORT}. * * @param locPort Port number. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setLocalPort(int locPort) { this.locPort = locPort; return this; } /** * See {@link #setLocalPort(int)}. * * @return Port number. */ public int getLocalPort() { return locPort; } /** * Sets local port range for local host ports (value must greater than or equal to <tt>0</tt>). * If provided local port (see {@link #setLocalPort(int)}} is occupied, * implementation will try to increment the port number for as long as it is less than * initial value plus this range. * <p> * If port range value is <tt>0</tt>, then implementation will try bind only to the port provided by * {@link #setLocalPort(int)} method and fail if binding to this port did not succeed. * <p> * Local port range is very useful during development when more than one grid nodes need to run * on the same physical machine. * <p> * If not provided, default value is {@link #DFLT_PORT_RANGE}. * * @param locPortRange New local port range. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setLocalPortRange(int locPortRange) { this.locPortRange = locPortRange; return this; } /** * See {@link #setLocalPortRange(int)}. * * @return Local Port range. */ public int getLocalPortRange() { return locPortRange; } /** * See {@link #setUsePairedConnections(boolean)}. * * @return {@code true} to use paired connections and {@code false} otherwise. */ public boolean isUsePairedConnections() { return usePairedConnections; } /** * Set this to {@code true} if {@code TcpCommunicationSpi} should * maintain connection for outgoing and incoming messages separately. * In this case total number of connections between local and each remote node * is {@link #getConnectionsPerNode()} * 2. * <p> * Set this to {@code false} if each connection of {@link #getConnectionsPerNode()} * should be used for outgoing and incoming messages. In this case total number * of connections between local and each remote node is {@link #getConnectionsPerNode()}. * <p> * Default is {@code false}. * * @param usePairedConnections {@code true} to use paired connections and {@code false} otherwise. * @see #getConnectionsPerNode() * @return {@code this} for chaining. */ public TcpCommunicationSpi setUsePairedConnections(boolean usePairedConnections) { this.usePairedConnections = usePairedConnections; return this; } /** * Sets number of connections to each remote node. if {@link #isUsePairedConnections()} * is {@code true} then number of connections is doubled and half is used for incoming and * half for outgoing messages. * * @param maxConnectionsPerNode Number of connections per node. * @see #isUsePairedConnections() * @return {@code this} for chaining. */ public TcpCommunicationSpi setConnectionsPerNode(int maxConnectionsPerNode) { this.connectionsPerNode = maxConnectionsPerNode; return this; } /** * See {@link #setConnectionsPerNode(int)}. * * @return Number of connections per node. */ public int getConnectionsPerNode() { return connectionsPerNode; } /** * Sets local port to accept shared memory connections. * <p> * If set to {@code -1} shared memory communication will be disabled. * <p> * If not provided, default value is {@link #DFLT_SHMEM_PORT}. * * @param shmemPort Port number. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setSharedMemoryPort(int shmemPort) { this.shmemPort = shmemPort; return this; } /** * See {@link #setSharedMemoryPort(int)}. * * @return Port number. */ public int getSharedMemoryPort() { return shmemPort; } /** * Sets maximum idle connection timeout upon which a connection * to client will be closed. * <p> * If not provided, default value is {@link #DFLT_IDLE_CONN_TIMEOUT}. * * @param idleConnTimeout Maximum idle connection time. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setIdleConnectionTimeout(long idleConnTimeout) { this.idleConnTimeout = idleConnTimeout; return this; } /** * See {@link #setIdleConnectionTimeout(long)}. * * @return Maximum idle connection time. */ public long getIdleConnectionTimeout() { return idleConnTimeout; } /** * See {@link #setSocketWriteTimeout(long)}. * * @return Socket write timeout for TCP connections. */ public long getSocketWriteTimeout() { return sockWriteTimeout; } /** * Sets socket write timeout for TCP connection. If message can not be written to * socket within this time then connection is closed and reconnect is attempted. * <p> * Default to {@link #DFLT_SOCK_WRITE_TIMEOUT}. * * @param sockWriteTimeout Socket write timeout for TCP connection. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setSocketWriteTimeout(long sockWriteTimeout) { this.sockWriteTimeout = sockWriteTimeout; return this; } /** * See {@link #setAckSendThreshold(int)}. * * @return Number of received messages after which acknowledgment is sent. */ public int getAckSendThreshold() { return ackSndThreshold; } /** * Sets number of received messages per connection to node after which acknowledgment message is sent. * <p> * Default to {@link #DFLT_ACK_SND_THRESHOLD}. * * @param ackSndThreshold Number of received messages after which acknowledgment is sent. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setAckSendThreshold(int ackSndThreshold) { this.ackSndThreshold = ackSndThreshold; return this; } /** * See {@link #setUnacknowledgedMessagesBufferSize(int)}. * * @return Maximum number of unacknowledged messages. */ public int getUnacknowledgedMessagesBufferSize() { return unackedMsgsBufSize; } /** * Sets maximum number of stored unacknowledged messages per connection to node. * If number of unacknowledged messages exceeds this number then connection to node is * closed and reconnect is attempted. * * @param unackedMsgsBufSize Maximum number of unacknowledged messages. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setUnacknowledgedMessagesBufferSize(int unackedMsgsBufSize) { this.unackedMsgsBufSize = unackedMsgsBufSize; return this; } /** * Sets connect timeout used when establishing connection * with remote nodes. * <p> * {@code 0} is interpreted as infinite timeout. * <p> * If not provided, default value is {@link #DFLT_CONN_TIMEOUT}. * <p> * When this property is explicitly set {@link IgniteConfiguration#getFailureDetectionTimeout()} is ignored. * * @param connTimeout Connect timeout. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setConnectTimeout(long connTimeout) { this.connTimeout = connTimeout; failureDetectionTimeoutEnabled(false); return this; } /** * See {@link #setConnectTimeout(long)}. * * @return Connect timeout. */public long getConnectTimeout() { return connTimeout; } /** * Sets maximum connect timeout. If handshake is not established within connect timeout, * then SPI tries to repeat handshake procedure with increased connect timeout. * Connect timeout can grow till maximum timeout value, * if maximum timeout value is reached then the handshake is considered as failed. * <p> * {@code 0} is interpreted as infinite timeout. * <p> * If not provided, default value is {@link #DFLT_MAX_CONN_TIMEOUT}. * <p> * When this property is explicitly set {@link IgniteConfiguration#getFailureDetectionTimeout()} is ignored. * * @param maxConnTimeout Maximum connect timeout. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setMaxConnectTimeout(long maxConnTimeout) { this.maxConnTimeout = maxConnTimeout; failureDetectionTimeoutEnabled(false); return this; } /** * Gets maximum connect timeout. * * @return Maximum connect timeout. */ public long getMaxConnectTimeout() { return maxConnTimeout; } /** * Sets maximum number of reconnect attempts used when establishing connection * with remote nodes. * <p> * If not provided, default value is {@link #DFLT_RECONNECT_CNT}. * <p> * When this property is explicitly set {@link IgniteConfiguration#getFailureDetectionTimeout()} is ignored. * * @param reconCnt Maximum number of reconnection attempts. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setReconnectCount(int reconCnt) { this.reconCnt = reconCnt; failureDetectionTimeoutEnabled(false); return this; } /** * Gets maximum number of reconnect attempts used when establishing connection * with remote nodes. * * @return Reconnects count. */ public int getReconnectCount() { return reconCnt; } /** * Sets flag to allocate direct or heap buffer in SPI. * If value is {@code true}, then SPI will use {@link ByteBuffer#allocateDirect(int)} call. * Otherwise, SPI will use {@link ByteBuffer#allocate(int)} call. * <p> * If not provided, default value is {@code true}. * * @param directBuf Flag indicates to allocate direct or heap buffer in SPI. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setDirectBuffer(boolean directBuf) { this.directBuf = directBuf; return this; } /** * Gets flag that indicates whether direct or heap allocated buffer is used. * * @return Flag that indicates whether direct or heap allocated buffer is used. */ public boolean isDirectBuffer() { return directBuf; } /** * Gets flag defining whether direct send buffer should be used. * * @return {@code True} if direct buffers should be used. */ public boolean isDirectSendBuffer() { return directSndBuf; } /** * Sets whether to use direct buffer for sending. * * If not provided default is {@code false}. * * @param directSndBuf {@code True} to use direct buffers for send. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setDirectSendBuffer(boolean directSndBuf) { this.directSndBuf = directSndBuf; return this; } /** * Sets the count of selectors te be used in TCP server. * <p/> * If not provided, default value is {@link #DFLT_SELECTORS_CNT}. * * @param selectorsCnt Selectors count. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setSelectorsCount(int selectorsCnt) { this.selectorsCnt = selectorsCnt; return this; } /** * See {@link #setSelectorsCount(int)}. * * @return Count of selectors in TCP server. */ public int getSelectorsCount() { return selectorsCnt; } /** * See {@link #setSelectorSpins(long)}. * * @return Selector thread busy-loop iterations. */ public long getSelectorSpins() { return selectorSpins; } /** * Defines how many non-blocking {@code selector.selectNow()} should be made before * falling into {@code selector.select(long)} in NIO server. Long value. Default is {@code 0}. * Can be set to {@code Long.MAX_VALUE} so selector threads will never block. * * @param selectorSpins Selector thread busy-loop iterations. * @return {@code this} for chaining. */ public TcpCommunicationSpi setSelectorSpins(long selectorSpins) { this.selectorSpins = selectorSpins; return this; } /** * Sets value for {@code TCP_NODELAY} socket option. Each * socket will be opened using provided value. * <p> * Setting this option to {@code true} disables Nagle's algorithm * for socket decreasing latency and delivery time for small messages. * <p> * For systems that work under heavy network load it is advisable to * set this value to {@code false}. * <p> * If not provided, default value is {@link #DFLT_TCP_NODELAY}. * * @param tcpNoDelay {@code True} to disable TCP delay. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setTcpNoDelay(boolean tcpNoDelay) { this.tcpNoDelay = tcpNoDelay; return this; } /** * Gets value for {@code TCP_NODELAY} socket option. * * @return {@code True} if TCP delay is disabled. */ public boolean isTcpNoDelay() { return tcpNoDelay; } /** * Sets receive buffer size for sockets created or accepted by this SPI. * <p> * If not provided, default is {@link #DFLT_SOCK_BUF_SIZE}. * * @param sockRcvBuf Socket receive buffer size. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setSocketReceiveBuffer(int sockRcvBuf) { this.sockRcvBuf = sockRcvBuf; return this; } /** * See {@link #setSocketReceiveBuffer(int)}. * * @return Socket receive buffer size. */ public int getSocketReceiveBuffer() { return sockRcvBuf; } /** * Sets send buffer size for sockets created or accepted by this SPI. * <p> * If not provided, default is {@link #DFLT_SOCK_BUF_SIZE}. * * @param sockSndBuf Socket send buffer size. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setSocketSendBuffer(int sockSndBuf) { this.sockSndBuf = sockSndBuf; return this; } /** * See {@link #setSocketSendBuffer(int)}. * * @return Socket send buffer size. */ public int getSocketSendBuffer() { return sockSndBuf; } /** * Sets message queue limit for incoming and outgoing messages. * <p> * When set to positive number send queue is limited to the configured value. * {@code 0} disables the size limitations. * <p> * If not provided, default is {@link #DFLT_MSG_QUEUE_LIMIT}. * * @param msgQueueLimit Send queue size limit. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public TcpCommunicationSpi setMessageQueueLimit(int msgQueueLimit) { this.msgQueueLimit = msgQueueLimit; return this; } /** * Gets message queue limit for incoming and outgoing messages. * * @return Send queue size limit. */ public int getMessageQueueLimit() { return msgQueueLimit; } /** * See {@link #setSlowClientQueueLimit(int)}. * * @return Slow client queue limit. */ public int getSlowClientQueueLimit() { return slowClientQueueLimit; } /** * Sets slow client queue limit. * <p/> * When set to a positive number, communication SPI will monitor clients outbound message queue sizes and will drop * those clients whose queue exceeded this limit. * <p/> * Usually this value should be set to the same value as {@link #getMessageQueueLimit()} which controls * message back-pressure for server nodes. The default value for this parameter is {@code 0} * which means {@code unlimited}. * * @param slowClientQueueLimit Slow client queue limit. * @return {@code this} for chaining. */ public TcpCommunicationSpi setSlowClientQueueLimit(int slowClientQueueLimit) { this.slowClientQueueLimit = slowClientQueueLimit; return this; } /** {@inheritDoc} */ @Override public void setListener(CommunicationListener<Message> lsnr) { this.lsnr = lsnr; } /** * @return Listener. */ public CommunicationListener getListener() { return lsnr; } /** {@inheritDoc} */ @Override public int getSentMessagesCount() { return sentMsgsCnt.intValue(); } /** {@inheritDoc} */ @Override public long getSentBytesCount() { return sentBytesCnt.longValue(); } /** {@inheritDoc} */ @Override public int getReceivedMessagesCount() { return rcvdMsgsCnt.intValue(); } /** {@inheritDoc} */ @Override public long getReceivedBytesCount() { return rcvdBytesCnt.longValue(); } /** {@inheritDoc} */ @Override public int getOutboundMessagesQueueSize() { GridNioServer<Message> srv = nioSrvr; return srv != null ? srv.outboundMessagesQueueSize() : 0; } /** {@inheritDoc} */ @Override public void resetMetrics() { // Can't use 'reset' method because it is not thread-safe // according to javadoc. sentMsgsCnt.add(-sentMsgsCnt.sum()); rcvdMsgsCnt.add(-rcvdMsgsCnt.sum()); sentBytesCnt.add(-sentBytesCnt.sum()); rcvdBytesCnt.add(-rcvdBytesCnt.sum()); } /** * Dumps SPI per-connection stats to logs. */ public void dumpStats() { IgniteLogger log = this.log; if (log != null) { StringBuilder sb = new StringBuilder("Communication SPI recovery descriptors: ").append(U.nl()); for (Map.Entry<ConnectionKey, GridNioRecoveryDescriptor> entry : recoveryDescs.entrySet()) { GridNioRecoveryDescriptor desc = entry.getValue(); sb.append(" [key=").append(entry.getKey()) .append(", msgsSent=").append(desc.sent()) .append(", msgsAckedByRmt=").append(desc.acked()) .append(", msgsRcvd=").append(desc.received()) .append(", lastAcked=").append(desc.lastAcknowledged()) .append(", reserveCnt=").append(desc.reserveCount()) .append(", descIdHash=").append(System.identityHashCode(desc)) .append(']').append(U.nl()); } for (Map.Entry<ConnectionKey, GridNioRecoveryDescriptor> entry : outRecDescs.entrySet()) { GridNioRecoveryDescriptor desc = entry.getValue(); sb.append(" [key=").append(entry.getKey()) .append(", msgsSent=").append(desc.sent()) .append(", msgsAckedByRmt=").append(desc.acked()) .append(", reserveCnt=").append(desc.reserveCount()) .append(", connected=").append(desc.connected()) .append(", reserved=").append(desc.reserved()) .append(", descIdHash=").append(System.identityHashCode(desc)) .append(']').append(U.nl()); } for (Map.Entry<ConnectionKey, GridNioRecoveryDescriptor> entry : inRecDescs.entrySet()) { GridNioRecoveryDescriptor desc = entry.getValue(); sb.append(" [key=").append(entry.getKey()) .append(", msgsRcvd=").append(desc.received()) .append(", lastAcked=").append(desc.lastAcknowledged()) .append(", reserveCnt=").append(desc.reserveCount()) .append(", connected=").append(desc.connected()) .append(", reserved=").append(desc.reserved()) .append(", handshakeIdx=").append(desc.handshakeIndex()) .append(", descIdHash=").append(System.identityHashCode(desc)) .append(']').append(U.nl()); } sb.append("Communication SPI clients: ").append(U.nl()); for (Map.Entry<UUID, GridCommunicationClient[]> entry : clients.entrySet()) { UUID nodeId = entry.getKey(); GridCommunicationClient[] clients0 = entry.getValue(); for (GridCommunicationClient client : clients0) { if (client != null) { sb.append(" [node=").append(nodeId) .append(", client=").append(client) .append(']').append(U.nl()); } } } U.warn(log, sb.toString()); } GridNioServer<Message> nioSrvr = this.nioSrvr; if (nioSrvr != null) nioSrvr.dumpStats(); } /** */ private final ThreadLocal<Integer> threadConnIdx = new ThreadLocal<>(); /** */ private final AtomicInteger connIdx = new AtomicInteger(); /** {@inheritDoc} */ @Override public Map<String, Object> getNodeAttributes() throws IgniteSpiException { initFailureDetectionTimeout(); assertParameter(locPort > 1023, "locPort > 1023"); assertParameter(locPort <= 0xffff, "locPort < 0xffff"); assertParameter(locPortRange >= 0, "locPortRange >= 0"); assertParameter(idleConnTimeout > 0, "idleConnTimeout > 0"); assertParameter(sockRcvBuf >= 0, "sockRcvBuf >= 0"); assertParameter(sockSndBuf >= 0, "sockSndBuf >= 0"); assertParameter(msgQueueLimit >= 0, "msgQueueLimit >= 0"); assertParameter(shmemPort > 0 || shmemPort == -1, "shmemPort > 0 || shmemPort == -1"); assertParameter(selectorsCnt > 0, "selectorsCnt > 0"); assertParameter(connectionsPerNode > 0, "connectionsPerNode > 0"); assertParameter(connectionsPerNode <= 1024, "connectionsPerNode <= 1024"); if (!failureDetectionTimeoutEnabled()) { assertParameter(reconCnt > 0, "reconnectCnt > 0"); assertParameter(connTimeout >= 0, "connTimeout >= 0"); assertParameter(maxConnTimeout >= connTimeout, "maxConnTimeout >= connTimeout"); } assertParameter(sockWriteTimeout >= 0, "sockWriteTimeout >= 0"); assertParameter(ackSndThreshold > 0, "ackSndThreshold > 0"); assertParameter(unackedMsgsBufSize >= 0, "unackedMsgsBufSize >= 0"); if (unackedMsgsBufSize > 0) { assertParameter(unackedMsgsBufSize >= msgQueueLimit * 5, "Specified 'unackedMsgsBufSize' is too low, it should be at least 'msgQueueLimit * 5'."); assertParameter(unackedMsgsBufSize >= ackSndThreshold * 5, "Specified 'unackedMsgsBufSize' is too low, it should be at least 'ackSndThreshold * 5'."); } if (connectionsPerNode > 1) { connPlc = new ConnectionPolicy() { @Override public int connectionIndex() { return (int)(U.safeAbs(Thread.currentThread().getId()) % connectionsPerNode); } }; } else { connPlc = new ConnectionPolicy() { @Override public int connectionIndex() { return 0; } }; } try { locHost = U.resolveLocalHost(locAddr); } catch (IOException e) { throw new IgniteSpiException("Failed to initialize local address: " + locAddr, e); } try { shmemSrv = resetShmemServer(); } catch (IgniteCheckedException e) { U.warn(log, "Failed to start shared memory communication server.", e); } try { // This method potentially resets local port to the value // local node was bound to. nioSrvr = resetNioServer(); } catch (IgniteCheckedException e) { throw new IgniteSpiException("Failed to initialize TCP server: " + locHost, e); } // Set local node attributes. try { IgniteBiTuple<Collection<String>, Collection<String>> addrs = U.resolveLocalAddresses(locHost); Collection<InetSocketAddress> extAddrs = addrRslvr == null ? null : U.resolveAddresses(addrRslvr, F.flat(Arrays.asList(addrs.get1(), addrs.get2())), boundTcpPort); HashMap<String, Object> res = new HashMap<>(5); res.put(createSpiAttributeName(ATTR_ADDRS), addrs.get1()); res.put(createSpiAttributeName(ATTR_HOST_NAMES), addrs.get2()); res.put(createSpiAttributeName(ATTR_PORT), boundTcpPort); res.put(createSpiAttributeName(ATTR_SHMEM_PORT), boundTcpShmemPort >= 0 ? boundTcpShmemPort : null); res.put(createSpiAttributeName(ATTR_EXT_ADDRS), extAddrs); res.put(createSpiAttributeName(ATTR_PAIRED_CONN), usePairedConnections); return res; } catch (IOException | IgniteCheckedException e) { throw new IgniteSpiException("Failed to resolve local host to addresses: " + locHost, e); } } /** {@inheritDoc} */ @Override public void spiStart(String igniteInstanceName) throws IgniteSpiException { assert locHost != null; // Start SPI start stopwatch. startStopwatch(); // Ack parameters. if (log.isDebugEnabled()) { log.debug(configInfo("locAddr", locAddr)); log.debug(configInfo("locPort", locPort)); log.debug(configInfo("locPortRange", locPortRange)); log.debug(configInfo("idleConnTimeout", idleConnTimeout)); log.debug(configInfo("directBuf", directBuf)); log.debug(configInfo("directSendBuf", directSndBuf)); log.debug(configInfo("selectorsCnt", selectorsCnt)); log.debug(configInfo("tcpNoDelay", tcpNoDelay)); log.debug(configInfo("sockSndBuf", sockSndBuf)); log.debug(configInfo("sockRcvBuf", sockRcvBuf)); log.debug(configInfo("shmemPort", shmemPort)); log.debug(configInfo("msgQueueLimit", msgQueueLimit)); log.debug(configInfo("connectionsPerNode", connectionsPerNode)); if (failureDetectionTimeoutEnabled()) { log.debug(configInfo("connTimeout", connTimeout)); log.debug(configInfo("maxConnTimeout", maxConnTimeout)); log.debug(configInfo("reconCnt", reconCnt)); } else log.debug(configInfo("failureDetectionTimeout", failureDetectionTimeout())); log.debug(configInfo("sockWriteTimeout", sockWriteTimeout)); log.debug(configInfo("ackSndThreshold", ackSndThreshold)); log.debug(configInfo("unackedMsgsBufSize", unackedMsgsBufSize)); } if (!tcpNoDelay) U.quietAndWarn(log, "'TCP_NO_DELAY' for communication is off, which should be used with caution " + "since may produce significant delays with some scenarios."); if (slowClientQueueLimit > 0 && msgQueueLimit > 0 && slowClientQueueLimit >= msgQueueLimit) { U.quietAndWarn(log, "Slow client queue limit is set to a value greater than message queue limit " + "(slow client queue limit will have no effect) [msgQueueLimit=" + msgQueueLimit + ", slowClientQueueLimit=" + slowClientQueueLimit + ']'); } if (msgQueueLimit == 0) U.quietAndWarn(log, "Message queue limit is set to 0 which may lead to " + "potential OOMEs when running cache operations in FULL_ASYNC or PRIMARY_SYNC modes " + "due to message queues growth on sender and receiver sides."); registerMBean(igniteInstanceName, new TcpCommunicationSpiMBeanImpl(this), TcpCommunicationSpiMBean.class); connectGate = new ConnectGateway(); if (shmemSrv != null) { shmemAcceptWorker = new ShmemAcceptWorker(shmemSrv); new IgniteThread(shmemAcceptWorker).start(); } nioSrvr.start(); commWorker = new CommunicationWorker(igniteInstanceName); commWorker.start(); // Ack start. if (log.isDebugEnabled()) log.debug(startInfo()); } /** {@inheritDoc} }*/ @Override public void onContextInitialized0(IgniteSpiContext spiCtx) throws IgniteSpiException { spiCtx.registerPort(boundTcpPort, IgnitePortProtocol.TCP); // SPI can start without shmem port. if (boundTcpShmemPort > 0) spiCtx.registerPort(boundTcpShmemPort, IgnitePortProtocol.TCP); spiCtx.addLocalEventListener(discoLsnr, EVT_NODE_LEFT, EVT_NODE_FAILED); ctxInitLatch.countDown(); } /** {@inheritDoc} */ @Override public IgniteSpiContext getSpiContext() { if (ctxInitLatch.getCount() > 0) { if (log.isDebugEnabled()) log.debug("Waiting for context initialization."); try { U.await(ctxInitLatch); if (log.isDebugEnabled()) log.debug("Context has been initialized."); } catch (IgniteInterruptedCheckedException e) { U.warn(log, "Thread has been interrupted while waiting for SPI context initialization.", e); } } return super.getSpiContext(); } /** * Recreates tpcSrvr socket instance. * * @return Server instance. * @throws IgniteCheckedException Thrown if it's not possible to create server. */ private GridNioServer<Message> resetNioServer() throws IgniteCheckedException { if (boundTcpPort >= 0) throw new IgniteCheckedException("Tcp NIO server was already created on port " + boundTcpPort); IgniteCheckedException lastEx = null; // If configured TCP port is busy, find first available in range. int lastPort = locPortRange == 0 ? locPort : locPort + locPortRange - 1; for (int port = locPort; port <= lastPort; port++) { try { MessageFactory msgFactory = new MessageFactory() { private MessageFactory impl; @Nullable @Override public Message create(short type) { if (impl == null) impl = getSpiContext().messageFactory(); assert impl != null; return impl.create(type); } }; GridNioMessageReaderFactory readerFactory = new GridNioMessageReaderFactory() { private MessageFormatter formatter; @Override public MessageReader reader(GridNioSession ses, MessageFactory msgFactory) throws IgniteCheckedException { if (formatter == null) formatter = getSpiContext().messageFormatter(); assert formatter != null; ConnectionKey key = ses.meta(CONN_IDX_META); return key != null ? formatter.reader(key.nodeId(), msgFactory) : null; } }; GridNioMessageWriterFactory writerFactory = new GridNioMessageWriterFactory() { private MessageFormatter formatter; @Override public MessageWriter writer(GridNioSession ses) throws IgniteCheckedException { if (formatter == null) formatter = getSpiContext().messageFormatter(); assert formatter != null; ConnectionKey key = ses.meta(CONN_IDX_META); return key != null ? formatter.writer(key.nodeId()) : null; } }; GridDirectParser parser = new GridDirectParser(log.getLogger(GridDirectParser.class), msgFactory, readerFactory); IgnitePredicate<Message> skipRecoveryPred = new IgnitePredicate<Message>() { @Override public boolean apply(Message msg) { return msg instanceof RecoveryLastReceivedMessage; } }; boolean clientMode = Boolean.TRUE.equals(ignite.configuration().isClientMode()); IgniteBiInClosure<GridNioSession, Integer> queueSizeMonitor = !clientMode && slowClientQueueLimit > 0 ? new CI2<GridNioSession, Integer>() { @Override public void apply(GridNioSession ses, Integer qSize) { checkClientQueueSize(ses, qSize); } } : null; GridNioFilter[] filters; if (isSslEnabled()) { GridNioSslFilter sslFilter = new GridNioSslFilter(ignite.configuration().getSslContextFactory().create(), true, ByteOrder.nativeOrder(), log); sslFilter.directMode(true); sslFilter.wantClientAuth(true); sslFilter.needClientAuth(true); filters = new GridNioFilter[] { new GridNioCodecFilter(parser, log, true), new GridConnectionBytesVerifyFilter(log), sslFilter }; } else filters = new GridNioFilter[] { new GridNioCodecFilter(parser, log, true), new GridConnectionBytesVerifyFilter(log) }; GridNioServer<Message> srvr = GridNioServer.<Message>builder() .address(locHost) .port(port) .listener(srvLsnr) .logger(log) .selectorCount(selectorsCnt) .igniteInstanceName(igniteInstanceName) .serverName("tcp-comm") .tcpNoDelay(tcpNoDelay) .directBuffer(directBuf) .byteOrder(ByteOrder.nativeOrder()) .socketSendBufferSize(sockSndBuf) .socketReceiveBufferSize(sockRcvBuf) .sendQueueLimit(msgQueueLimit) .directMode(true) .metricsListener(metricsLsnr) .writeTimeout(sockWriteTimeout) .selectorSpins(selectorSpins) .filters(filters) .writerFactory(writerFactory) .skipRecoveryPredicate(skipRecoveryPred) .messageQueueSizeListener(queueSizeMonitor) .readWriteSelectorsAssign(usePairedConnections) .build(); boundTcpPort = port; // Ack Port the TCP server was bound to. if (log.isInfoEnabled()) { log.info("Successfully bound communication NIO server to TCP port " + "[port=" + boundTcpPort + ", locHost=" + locHost + ", selectorsCnt=" + selectorsCnt + ", selectorSpins=" + srvr.selectorSpins() + ", pairedConn=" + usePairedConnections + ']'); } srvr.idleTimeout(idleConnTimeout); return srvr; } catch (IgniteCheckedException e) { if (X.hasCause(e, SSLException.class)) throw new IgniteSpiException("Failed to create SSL context. SSL factory: " + ignite.configuration().getSslContextFactory() + '.', e); lastEx = e; if (log.isDebugEnabled()) log.debug("Failed to bind to local port (will try next port within range) [port=" + port + ", locHost=" + locHost + ']'); onException("Failed to bind to local port (will try next port within range) [port=" + port + ", locHost=" + locHost + ']', e); } } // If free port wasn't found. throw new IgniteCheckedException("Failed to bind to any port within range [startPort=" + locPort + ", portRange=" + locPortRange + ", locHost=" + locHost + ']', lastEx); } /** * Creates new shared memory communication server. * * @return Server. * @throws IgniteCheckedException If failed. */ @Nullable private IpcSharedMemoryServerEndpoint resetShmemServer() throws IgniteCheckedException { if (boundTcpShmemPort >= 0) throw new IgniteCheckedException("Shared memory server was already created on port " + boundTcpShmemPort); if (shmemPort == -1 || U.isWindows()) return null; IgniteCheckedException lastEx = null; // If configured TCP port is busy, find first available in range. for (int port = shmemPort; port < shmemPort + locPortRange; port++) { try { IgniteConfiguration cfg = ignite.configuration(); IpcSharedMemoryServerEndpoint srv = new IpcSharedMemoryServerEndpoint(log, cfg.getNodeId(), igniteInstanceName, cfg.getWorkDirectory()); srv.setPort(port); srv.omitOutOfResourcesWarning(true); srv.start(); boundTcpShmemPort = port; // Ack Port the TCP server was bound to. if (log.isInfoEnabled()) log.info("Successfully bound shared memory communication to TCP port [port=" + boundTcpShmemPort + ", locHost=" + locHost + ']'); return srv; } catch (IgniteCheckedException e) { lastEx = e; if (log.isDebugEnabled()) log.debug("Failed to bind to local port (will try next port within range) [port=" + port + ", locHost=" + locHost + ']'); } } // If free port wasn't found. throw new IgniteCheckedException("Failed to bind shared memory communication to any port within range [startPort=" + locPort + ", portRange=" + locPortRange + ", locHost=" + locHost + ']', lastEx); } /** {@inheritDoc} */ @Override public void spiStop() throws IgniteSpiException { assert stopping; unregisterMBean(); // Stop TCP server. if (nioSrvr != null) nioSrvr.stop(); U.interrupt(commWorker); U.join(commWorker, log); U.cancel(shmemAcceptWorker); U.join(shmemAcceptWorker, log); U.cancel(shmemWorkers); U.join(shmemWorkers, log); shmemWorkers.clear(); // Force closing on stop (safety). for (GridCommunicationClient[] clients0 : clients.values()) { for (GridCommunicationClient client : clients0) { if (client != null) client.forceClose(); } } // Clear resources. nioSrvr = null; commWorker = null; boundTcpPort = -1; // Ack stop. if (log.isDebugEnabled()) log.debug(stopInfo()); } /** {@inheritDoc} */ @Override protected void onContextDestroyed0() { stopping = true; if (ctxInitLatch.getCount() > 0) // Safety. ctxInitLatch.countDown(); if (connectGate != null) connectGate.stopped(); // Force closing. for (GridCommunicationClient[] clients0 : clients.values()) { for (GridCommunicationClient client : clients0) { if (client != null) client.forceClose(); } } getSpiContext().deregisterPorts(); getSpiContext().removeLocalEventListener(discoLsnr); } /** {@inheritDoc} */ @Override public void onClientDisconnected(IgniteFuture<?> reconnectFut) { connectGate.disconnected(reconnectFut); for (GridCommunicationClient[] clients0 : clients.values()) { for (GridCommunicationClient client : clients0) { if (client != null) client.forceClose(); } } IgniteClientDisconnectedCheckedException err = new IgniteClientDisconnectedCheckedException(reconnectFut, "Failed to connect client node disconnected."); for (GridFutureAdapter<GridCommunicationClient> clientFut : clientFuts.values()) clientFut.onDone(err); recoveryDescs.clear(); inRecDescs.clear(); outRecDescs.clear(); } /** {@inheritDoc} */ @Override public void onClientReconnected(boolean clusterRestarted) { connectGate.reconnected(); } /** * @param nodeId Left node ID. */ void onNodeLeft(UUID nodeId) { assert nodeId != null; GridCommunicationClient[] clients0 = clients.remove(nodeId); if (clients0 != null) { for (GridCommunicationClient client : clients0) { if (client != null) { if (log.isDebugEnabled()) log.debug("Forcing NIO client close since node has left [nodeId=" + nodeId + ", client=" + client + ']'); client.forceClose(); } } } } /** {@inheritDoc} */ @Override protected void checkConfigurationConsistency0(IgniteSpiContext spiCtx, ClusterNode node, boolean starting) throws IgniteSpiException { // These attributes are set on node startup in any case, so we MUST receive them. checkAttributePresence(node, createSpiAttributeName(ATTR_ADDRS)); checkAttributePresence(node, createSpiAttributeName(ATTR_HOST_NAMES)); checkAttributePresence(node, createSpiAttributeName(ATTR_PORT)); } /** * Checks that node has specified attribute and prints warning if it does not. * * @param node Node to check. * @param attrName Name of the attribute. */ private void checkAttributePresence(ClusterNode node, String attrName) { if (node.attribute(attrName) == null) U.warn(log, "Remote node has inconsistent configuration (required attribute was not found) " + "[attrName=" + attrName + ", nodeId=" + node.id() + "spiCls=" + U.getSimpleName(TcpCommunicationSpi.class) + ']'); } /** {@inheritDoc} */ @Override public void sendMessage(ClusterNode node, Message msg) throws IgniteSpiException { sendMessage0(node, msg, null); } /** * Sends given message to destination node. Note that characteristics of the * exchange such as durability, guaranteed delivery or error notification is * dependant on SPI implementation. * * @param node Destination node. * @param msg Message to send. * @param ackC Ack closure. * @throws org.apache.ignite.spi.IgniteSpiException Thrown in case of any error during sending the message. * Note that this is not guaranteed that failed communication will result * in thrown exception as this is dependant on SPI implementation. */ public void sendMessage(ClusterNode node, Message msg, IgniteInClosure<IgniteException> ackC) throws IgniteSpiException { sendMessage0(node, msg, ackC); } /** * @param node Destination node. * @param msg Message to send. * @param ackC Ack closure. * @throws org.apache.ignite.spi.IgniteSpiException Thrown in case of any error during sending the message. * Note that this is not guaranteed that failed communication will result * in thrown exception as this is dependant on SPI implementation. */ private void sendMessage0(ClusterNode node, Message msg, IgniteInClosure<IgniteException> ackC) throws IgniteSpiException { assert node != null; assert msg != null; if (log.isTraceEnabled()) log.trace("Sending message with ack to node [node=" + node + ", msg=" + msg + ']'); ClusterNode locNode = getLocalNode(); if (locNode == null) throw new IgniteSpiException("Local node has not been started or fully initialized " + "[isStopping=" + getSpiContext().isStopping() + ']'); if (node.id().equals(locNode.id())) notifyListener(node.id(), msg, NOOP); else { GridCommunicationClient client = null; int connIdx = connPlc.connectionIndex(); try { boolean retry; do { client = reserveClient(node, connIdx); UUID nodeId = null; if (!client.async()) nodeId = node.id(); retry = client.sendMessage(nodeId, msg, ackC); client.release(); if (!retry) sentMsgsCnt.increment(); else { removeNodeClient(node.id(), client); ClusterNode node0 = getSpiContext().node(node.id()); if (node0 == null) throw new IgniteCheckedException("Failed to send message to remote node " + "(node has left the grid): " + node.id()); } client = null; } while (retry); } catch (IgniteCheckedException e) { throw new IgniteSpiException("Failed to send message to remote node: " + node, e); } finally { if (client != null && removeNodeClient(node.id(), client)) client.forceClose(); } } } /** * @param nodeId Node ID. * @param rmvClient Client to remove. * @return {@code True} if client was removed. */ private boolean removeNodeClient(UUID nodeId, GridCommunicationClient rmvClient) { for (;;) { GridCommunicationClient[] curClients = clients.get(nodeId); if (curClients == null || rmvClient.connectionIndex() >= curClients.length || curClients[rmvClient.connectionIndex()] != rmvClient) return false; GridCommunicationClient[] newClients = Arrays.copyOf(curClients, curClients.length); newClients[rmvClient.connectionIndex()] = null; if (clients.replace(nodeId, curClients, newClients)) return true; } } /** * @param node Node. * @param connIdx Connection index. * @param addClient Client to add. */ private void addNodeClient(ClusterNode node, int connIdx, GridCommunicationClient addClient) { assert connectionsPerNode > 0 : connectionsPerNode; assert connIdx == addClient.connectionIndex() : addClient; if (connIdx >= connectionsPerNode) { assert !usePairedConnections(node); return; } for (;;) { GridCommunicationClient[] curClients = clients.get(node.id()); assert curClients == null || curClients[connIdx] == null : "Client already created [node=" + node.id() + ", connIdx=" + connIdx + ", client=" + addClient + ", oldClient=" + curClients[connIdx] + ']'; GridCommunicationClient[] newClients; if (curClients == null) { newClients = new GridCommunicationClient[connectionsPerNode]; newClients[connIdx] = addClient; if (clients.putIfAbsent(node.id(), newClients) == null) break; } else { newClients = Arrays.copyOf(curClients, curClients.length); newClients[connIdx] = addClient; if (clients.replace(node.id(), curClients, newClients)) break; } } } /** * Returns existing or just created client to node. * * @param node Node to which client should be open. * @param connIdx Connection index. * @return The existing or just created client. * @throws IgniteCheckedException Thrown if any exception occurs. */ private GridCommunicationClient reserveClient(ClusterNode node, int connIdx) throws IgniteCheckedException { assert node != null; assert (connIdx >= 0 && connIdx < connectionsPerNode) || !usePairedConnections(node) : connIdx; UUID nodeId = node.id(); while (true) { GridCommunicationClient[] curClients = clients.get(nodeId); GridCommunicationClient client = curClients != null && connIdx < curClients.length ? curClients[connIdx] : null; if (client == null) { if (stopping) throw new IgniteSpiException("Node is stopping."); // Do not allow concurrent connects. GridFutureAdapter<GridCommunicationClient> fut = new ConnectFuture(); ConnectionKey connKey = new ConnectionKey(nodeId, connIdx, -1); GridFutureAdapter<GridCommunicationClient> oldFut = clientFuts.putIfAbsent(connKey, fut); if (oldFut == null) { try { GridCommunicationClient[] curClients0 = clients.get(nodeId); GridCommunicationClient client0 = curClients0 != null && connIdx < curClients0.length ? curClients0[connIdx] : null; if (client0 == null) { client0 = createNioClient(node, connIdx); if (client0 != null) { addNodeClient(node, connIdx, client0); if (client0 instanceof GridTcpNioCommunicationClient) { GridTcpNioCommunicationClient tcpClient = ((GridTcpNioCommunicationClient)client0); if (tcpClient.session().closeTime() > 0 && removeNodeClient(nodeId, client0)) { if (log.isDebugEnabled()) log.debug("Session was closed after client creation, will retry " + "[node=" + node + ", client=" + client0 + ']'); client0 = null; } } } else U.sleep(200); } fut.onDone(client0); } catch (Throwable e) { fut.onDone(e); if (e instanceof Error) throw (Error)e; } finally { clientFuts.remove(connKey, fut); } } else fut = oldFut; client = fut.get(); if (client == null) continue; if (getSpiContext().node(nodeId) == null) { if (removeNodeClient(nodeId, client)) client.forceClose(); throw new IgniteSpiException("Destination node is not in topology: " + node.id()); } } assert connIdx == client.connectionIndex() : client; if (client.reserve()) return client; else // Client has just been closed by idle worker. Help it and try again. removeNodeClient(nodeId, client); } } /** * @param node Node to create client for. * @param connIdx Connection index. * @return Client. * @throws IgniteCheckedException If failed. */ @Nullable private GridCommunicationClient createNioClient(ClusterNode node, int connIdx) throws IgniteCheckedException { assert node != null; Integer shmemPort = node.attribute(createSpiAttributeName(ATTR_SHMEM_PORT)); ClusterNode locNode = getSpiContext().localNode(); if (locNode == null) throw new IgniteCheckedException("Failed to create NIO client (local node is stopping)"); if (log.isDebugEnabled()) log.debug("Creating NIO client to node: " + node); // If remote node has shared memory server enabled and has the same set of MACs // then we are likely to run on the same host and shared memory communication could be tried. if (shmemPort != null && U.sameMacs(locNode, node)) { try { GridCommunicationClient client = createShmemClient( node, connIdx, shmemPort); if (log.isDebugEnabled()) log.debug("Shmem client created: " + client); return client; } catch (IgniteCheckedException e) { if (e.hasCause(IpcOutOfSystemResourcesException.class)) // Has cause or is itself the IpcOutOfSystemResourcesException. LT.warn(log, OUT_OF_RESOURCES_TCP_MSG); else if (getSpiContext().node(node.id()) != null) LT.warn(log, e.getMessage()); else if (log.isDebugEnabled()) log.debug("Failed to establish shared memory connection with local node (node has left): " + node.id()); } } connectGate.enter(); try { GridCommunicationClient client = createTcpClient(node, connIdx); if (log.isDebugEnabled()) log.debug("TCP client created: " + client); return client; } finally { connectGate.leave(); } } /** * @param node Node. * @param port Port. * @param connIdx Connection index. * @return Client. * @throws IgniteCheckedException If failed. */ @Nullable private GridCommunicationClient createShmemClient(ClusterNode node, int connIdx, Integer port) throws IgniteCheckedException { int attempt = 1; int connectAttempts = 1; long connTimeout0 = connTimeout; IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(this, !node.isClient()); while (true) { GridCommunicationClient client; try { client = new GridShmemCommunicationClient( connIdx, metricsLsnr, port, timeoutHelper.nextTimeoutChunk(connTimeout), log, getSpiContext().messageFormatter()); } catch (IgniteCheckedException e) { if (timeoutHelper.checkFailureTimeoutReached(e)) throw e; // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && X.hasCause(e, ConnectException.class)) { connectAttempts++; continue; } throw e; } try { safeHandshake(client, null, node.id(), timeoutHelper.nextTimeoutChunk(connTimeout0), null, null); } catch (HandshakeTimeoutException | IgniteSpiOperationTimeoutException e) { client.forceClose(); if (failureDetectionTimeoutEnabled() && (e instanceof HandshakeTimeoutException || timeoutHelper.checkFailureTimeoutReached(e))) { if (log.isDebugEnabled()) log.debug("Handshake timed out (failure threshold reached) [failureDetectionTimeout=" + failureDetectionTimeout() + ", err=" + e.getMessage() + ", client=" + client + ']'); throw e; } assert !failureDetectionTimeoutEnabled(); if (log.isDebugEnabled()) log.debug("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", err=" + e.getMessage() + ", client=" + client + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug("Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", client=" + client + ']'); throw e; } else { attempt++; connTimeout0 *= 2; continue; } } catch (IgniteCheckedException | RuntimeException | Error e) { if (log.isDebugEnabled()) log.debug( "Caught exception (will close client) [err=" + e.getMessage() + ", client=" + client + ']'); client.forceClose(); throw e; } return client; } } /** * Checks client message queue size and initiates client drop if message queue size exceeds the configured limit. * * @param ses Node communication session. * @param msgQueueSize Message queue size. */ private void checkClientQueueSize(GridNioSession ses, int msgQueueSize) { if (slowClientQueueLimit > 0 && msgQueueSize > slowClientQueueLimit) { ConnectionKey id = ses.meta(CONN_IDX_META); if (id != null) { ClusterNode node = getSpiContext().node(id.nodeId); if (node != null && node.isClient()) { String msg = "Client node outbound message queue size exceeded slowClientQueueLimit, " + "the client will be dropped " + "(consider changing 'slowClientQueueLimit' configuration property) " + "[srvNode=" + getSpiContext().localNode().id() + ", clientNode=" + node + ", slowClientQueueLimit=" + slowClientQueueLimit + ']'; U.quietAndWarn(log, msg); getSpiContext().failNode(id.nodeId(), msg); } } } } /** * Establish TCP connection to remote node and returns client. * * @param node Remote node. * @param connIdx Connection index. * @return Client. * @throws IgniteCheckedException If failed. */ protected GridCommunicationClient createTcpClient(ClusterNode node, int connIdx) throws IgniteCheckedException { Collection<String> rmtAddrs0 = node.attribute(createSpiAttributeName(ATTR_ADDRS)); Collection<String> rmtHostNames0 = node.attribute(createSpiAttributeName(ATTR_HOST_NAMES)); Integer boundPort = node.attribute(createSpiAttributeName(ATTR_PORT)); Collection<InetSocketAddress> extAddrs = node.attribute(createSpiAttributeName(ATTR_EXT_ADDRS)); boolean isRmtAddrsExist = (!F.isEmpty(rmtAddrs0) && boundPort != null); boolean isExtAddrsExist = !F.isEmpty(extAddrs); if (!isRmtAddrsExist && !isExtAddrsExist) throw new IgniteCheckedException("Failed to send message to the destination node. Node doesn't have any " + "TCP communication addresses or mapped external addresses. Check configuration and make sure " + "that you use the same communication SPI on all nodes. Remote node id: " + node.id()); LinkedHashSet<InetSocketAddress> addrs; // Try to connect first on bound addresses. if (isRmtAddrsExist) { List<InetSocketAddress> addrs0 = new ArrayList<>(U.toSocketAddresses(rmtAddrs0, rmtHostNames0, boundPort)); boolean sameHost = U.sameMacs(getSpiContext().localNode(), node); Collections.sort(addrs0, U.inetAddressesComparator(sameHost)); addrs = new LinkedHashSet<>(addrs0); } else addrs = new LinkedHashSet<>(); // Then on mapped external addresses. if (isExtAddrsExist) addrs.addAll(extAddrs); Set<InetAddress> allInetAddrs = U.newHashSet(addrs.size()); for (InetSocketAddress addr : addrs) allInetAddrs.add(addr.getAddress()); List<InetAddress> reachableInetAddrs = U.filterReachable(allInetAddrs); if (reachableInetAddrs.size() < allInetAddrs.size()) { LinkedHashSet<InetSocketAddress> addrs0 = U.newLinkedHashSet(addrs.size()); for (InetSocketAddress addr : addrs) { if (reachableInetAddrs.contains(addr.getAddress())) addrs0.add(addr); } for (InetSocketAddress addr : addrs) { if (!reachableInetAddrs.contains(addr.getAddress())) addrs0.add(addr); } addrs = addrs0; } if (log.isDebugEnabled()) log.debug("Addresses to connect for node [rmtNode=" + node.id() + ", addrs=" + addrs.toString() + ']'); boolean conn = false; GridCommunicationClient client = null; IgniteCheckedException errs = null; int connectAttempts = 1; for (InetSocketAddress addr : addrs) { long connTimeout0 = connTimeout; int attempt = 1; IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(this, !node.isClient()); while (!conn) { // Reconnection on handshake timeout. try { SocketChannel ch = SocketChannel.open(); ch.configureBlocking(true); ch.socket().setTcpNoDelay(tcpNoDelay); ch.socket().setKeepAlive(true); if (sockRcvBuf > 0) ch.socket().setReceiveBufferSize(sockRcvBuf); if (sockSndBuf > 0) ch.socket().setSendBufferSize(sockSndBuf); if (getSpiContext().node(node.id()) == null) { U.closeQuiet(ch); throw new ClusterTopologyCheckedException("Failed to send message " + "(node left topology): " + node); } ConnectionKey connKey = new ConnectionKey(node.id(), connIdx, -1); GridNioRecoveryDescriptor recoveryDesc = outRecoveryDescriptor(node, connKey); if (!recoveryDesc.reserve()) { U.closeQuiet(ch); return null; } long rcvCnt = -1; Map<Integer, Object> meta = new HashMap<>(); GridSslMeta sslMeta = null; try { ch.socket().connect(addr, (int)timeoutHelper.nextTimeoutChunk(connTimeout)); if (isSslEnabled()) { meta.put(SSL_META.ordinal(), sslMeta = new GridSslMeta()); SSLEngine sslEngine = ignite.configuration().getSslContextFactory().create().createSSLEngine(); sslEngine.setUseClientMode(true); sslMeta.sslEngine(sslEngine); } Integer handshakeConnIdx = connIdx; rcvCnt = safeHandshake(ch, recoveryDesc, node.id(), timeoutHelper.nextTimeoutChunk(connTimeout0), sslMeta, handshakeConnIdx); if (rcvCnt == -1) return null; } finally { if (recoveryDesc != null && rcvCnt == -1) recoveryDesc.release(); } try { meta.put(CONN_IDX_META, connKey); if (recoveryDesc != null) { recoveryDesc.onHandshake(rcvCnt); meta.put(-1, recoveryDesc); } GridNioSession ses = nioSrvr.createSession(ch, meta).get(); client = new GridTcpNioCommunicationClient(connIdx, ses, log); conn = true; } finally { if (!conn) { if (recoveryDesc != null) recoveryDesc.release(); } } } catch (HandshakeTimeoutException | IgniteSpiOperationTimeoutException e) { if (client != null) { client.forceClose(); client = null; } if (failureDetectionTimeoutEnabled() && (e instanceof HandshakeTimeoutException || timeoutHelper.checkFailureTimeoutReached(e))) { String msg = "Handshake timed out (failure detection timeout is reached) " + "[failureDetectionTimeout=" + failureDetectionTimeout() + ", addr=" + addr + ']'; onException(msg, e); if (log.isDebugEnabled()) log.debug(msg); if (errs == null) errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']'); errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e)); break; } assert !failureDetectionTimeoutEnabled(); onException("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ']', e); if (log.isDebugEnabled()) log.debug( "Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ", err=" + e + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug("Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']'); if (errs == null) errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']'); errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e)); break; } else { attempt++; connTimeout0 *= 2; // Continue loop. } } catch (Exception e) { if (client != null) { client.forceClose(); client = null; } onException("Client creation failed [addr=" + addr + ", err=" + e + ']', e); if (log.isDebugEnabled()) log.debug("Client creation failed [addr=" + addr + ", err=" + e + ']'); boolean failureDetThrReached = timeoutHelper.checkFailureTimeoutReached(e); if (failureDetThrReached) LT.warn(log, "Connect timed out (consider increasing 'failureDetectionTimeout' " + "configuration property) [addr=" + addr + ", failureDetectionTimeout=" + failureDetectionTimeout() + ']'); else if (X.hasCause(e, SocketTimeoutException.class)) LT.warn(log, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", connTimeout=" + connTimeout + ']'); if (errs == null) errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']'); errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e)); // Reconnect for the second time, if connection is not established. if (!failureDetThrReached && connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) { connectAttempts++; continue; } break; } } if (conn) break; } if (client == null) { assert errs != null; if (X.hasCause(errs, ConnectException.class)) LT.warn(log, "Failed to connect to a remote node " + "(make sure that destination node is alive and " + "operating system firewall is disabled on local and remote hosts) " + "[addrs=" + addrs + ']'); if (getSpiContext().node(node.id()) != null && (CU.clientNode(node) || !CU.clientNode(getLocalNode())) && X.hasCause(errs, ConnectException.class, SocketTimeoutException.class, HandshakeTimeoutException.class, IgniteSpiOperationTimeoutException.class)) { LT.warn(log, "TcpCommunicationSpi failed to establish connection to node, node will be dropped from " + "cluster [" + "rmtNode=" + node + ", err=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']'); getSpiContext().failNode(node.id(), "TcpCommunicationSpi failed to establish connection to node [" + "rmtNode=" + node + ", errs=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']'); } throw errs; } return client; } /** * Performs handshake in timeout-safe way. * * @param client Client. * @param recovery Recovery descriptor if use recovery handshake, otherwise {@code null}. * @param rmtNodeId Remote node. * @param timeout Timeout for handshake. * @param sslMeta Session meta. * @param handshakeConnIdx Non null connection index if need send it in handshake. * @throws IgniteCheckedException If handshake failed or wasn't completed withing timeout. * @return Handshake response. */ @SuppressWarnings("ThrowFromFinallyBlock") private <T> long safeHandshake( T client, @Nullable GridNioRecoveryDescriptor recovery, UUID rmtNodeId, long timeout, GridSslMeta sslMeta, @Nullable Integer handshakeConnIdx ) throws IgniteCheckedException { HandshakeTimeoutObject<T> obj = new HandshakeTimeoutObject<>(client, U.currentTimeMillis() + timeout); addTimeoutObject(obj); long rcvCnt = 0; try { if (client instanceof GridCommunicationClient) ((GridCommunicationClient)client).doHandshake(new HandshakeClosure(rmtNodeId)); else { SocketChannel ch = (SocketChannel)client; boolean success = false; try { BlockingSslHandler sslHnd = null; ByteBuffer buf; if (isSslEnabled()) { assert sslMeta != null; sslHnd = new BlockingSslHandler(sslMeta.sslEngine(), ch, directBuf, ByteOrder.nativeOrder(), log); if (!sslHnd.handshake()) throw new IgniteCheckedException("SSL handshake is not completed."); ByteBuffer handBuff = sslHnd.applicationBuffer(); if (handBuff.remaining() < NodeIdMessage.MESSAGE_FULL_SIZE) { buf = ByteBuffer.allocate(1000); int read = ch.read(buf); if (read == -1) throw new IgniteCheckedException("Failed to read remote node ID (connection closed)."); buf.flip(); buf = sslHnd.decode(buf); } else buf = handBuff; } else { buf = ByteBuffer.allocate(NodeIdMessage.MESSAGE_FULL_SIZE); for (int i = 0; i < NodeIdMessage.MESSAGE_FULL_SIZE; ) { int read = ch.read(buf); if (read == -1) throw new IgniteCheckedException("Failed to read remote node ID (connection closed)."); i += read; } } UUID rmtNodeId0 = U.bytesToUuid(buf.array(), Message.DIRECT_TYPE_SIZE); if (!rmtNodeId.equals(rmtNodeId0)) throw new IgniteCheckedException("Remote node ID is not as expected [expected=" + rmtNodeId + ", rcvd=" + rmtNodeId0 + ']'); else if (log.isDebugEnabled()) log.debug("Received remote node ID: " + rmtNodeId0); if (isSslEnabled()) { assert sslHnd != null; ch.write(sslHnd.encrypt(ByteBuffer.wrap(U.IGNITE_HEADER))); } else ch.write(ByteBuffer.wrap(U.IGNITE_HEADER)); ClusterNode locNode = getLocalNode(); if (locNode == null) throw new IgniteCheckedException("Local node has not been started or " + "fully initialized [isStopping=" + getSpiContext().isStopping() + ']'); if (recovery != null) { HandshakeMessage msg; int msgSize = HandshakeMessage.MESSAGE_FULL_SIZE; if (handshakeConnIdx != null) { msg = new HandshakeMessage2(locNode.id(), recovery.incrementConnectCount(), recovery.received(), handshakeConnIdx); msgSize += 4; } else { msg = new HandshakeMessage(locNode.id(), recovery.incrementConnectCount(), recovery.received()); } if (log.isDebugEnabled()) log.debug("Writing handshake message [locNodeId=" + locNode.id() + ", rmtNode=" + rmtNodeId + ", msg=" + msg + ']'); buf = ByteBuffer.allocate(msgSize); buf.order(ByteOrder.nativeOrder()); boolean written = msg.writeTo(buf, null); assert written; buf.flip(); if (isSslEnabled()) { assert sslHnd != null; ch.write(sslHnd.encrypt(buf)); } else ch.write(buf); } else { if (isSslEnabled()) { assert sslHnd != null; ch.write(sslHnd.encrypt(ByteBuffer.wrap(nodeIdMessage().nodeIdBytesWithType))); } else ch.write(ByteBuffer.wrap(nodeIdMessage().nodeIdBytesWithType)); } if (recovery != null) { if (log.isDebugEnabled()) log.debug("Waiting for handshake [rmtNode=" + rmtNodeId + ']'); if (isSslEnabled()) { assert sslHnd != null; buf = ByteBuffer.allocate(1000); buf.order(ByteOrder.nativeOrder()); ByteBuffer decode = ByteBuffer.allocate(2 * buf.capacity()); decode.order(ByteOrder.nativeOrder()); for (int i = 0; i < RecoveryLastReceivedMessage.MESSAGE_FULL_SIZE; ) { int read = ch.read(buf); if (read == -1) throw new IgniteCheckedException("Failed to read remote node recovery handshake " + "(connection closed)."); buf.flip(); ByteBuffer decode0 = sslHnd.decode(buf); i += decode0.remaining(); decode = appendAndResizeIfNeeded(decode, decode0); buf.clear(); } decode.flip(); rcvCnt = decode.getLong(Message.DIRECT_TYPE_SIZE); if (decode.limit() > RecoveryLastReceivedMessage.MESSAGE_FULL_SIZE) { decode.position(RecoveryLastReceivedMessage.MESSAGE_FULL_SIZE); sslMeta.decodedBuffer(decode); } ByteBuffer inBuf = sslHnd.inputBuffer(); if (inBuf.position() > 0) sslMeta.encodedBuffer(inBuf); } else { buf = ByteBuffer.allocate(RecoveryLastReceivedMessage.MESSAGE_FULL_SIZE); buf.order(ByteOrder.nativeOrder()); for (int i = 0; i < RecoveryLastReceivedMessage.MESSAGE_FULL_SIZE; ) { int read = ch.read(buf); if (read == -1) throw new IgniteCheckedException("Failed to read remote node recovery handshake " + "(connection closed)."); i += read; } rcvCnt = buf.getLong(Message.DIRECT_TYPE_SIZE); } if (log.isDebugEnabled()) log.debug("Received handshake message [rmtNode=" + rmtNodeId + ", rcvCnt=" + rcvCnt + ']'); if (rcvCnt == -1) { if (log.isDebugEnabled()) log.debug("Connection rejected, will retry client creation [rmtNode=" + rmtNodeId + ']'); } else success = true; } else success = true; } catch (IOException e) { if (log.isDebugEnabled()) log.debug("Failed to read from channel: " + e); throw new IgniteCheckedException("Failed to read from channel.", e); } finally { if (!success) U.closeQuiet(ch); } } } finally { boolean cancelled = obj.cancel(); if (cancelled) removeTimeoutObject(obj); // Ignoring whatever happened after timeout - reporting only timeout event. if (!cancelled) throw new HandshakeTimeoutException("Failed to perform handshake due to timeout (consider increasing " + "'connectionTimeout' configuration property)."); } return rcvCnt; } /** * @param sndId Sender ID. * @param msg Communication message. * @param msgC Closure to call when message processing finished. */ protected void notifyListener(UUID sndId, Message msg, IgniteRunnable msgC) { CommunicationListener<Message> lsnr = this.lsnr; if (lsnr != null) // Notify listener of a new message. lsnr.onMessage(sndId, msg, msgC); else if (log.isDebugEnabled()) log.debug("Received communication message without any registered listeners (will ignore, " + "is node stopping?) [senderNodeId=" + sndId + ", msg=" + msg + ']'); } /** * @param target Target buffer to append to. * @param src Source buffer to get data. * @return Original or expanded buffer. */ private ByteBuffer appendAndResizeIfNeeded(ByteBuffer target, ByteBuffer src) { if (target.remaining() < src.remaining()) { int newSize = Math.max(target.capacity() * 2, target.capacity() + src.remaining()); ByteBuffer tmp = ByteBuffer.allocate(newSize); tmp.order(target.order()); target.flip(); tmp.put(target); target = tmp; } target.put(src); return target; } /** * Stops service threads to simulate node failure. * * FOR TEST PURPOSES ONLY!!! */ public void simulateNodeFailure() { if (nioSrvr != null) nioSrvr.stop(); U.interrupt(commWorker); U.join(commWorker, log); for (GridCommunicationClient[] clients0 : clients.values()) { for (GridCommunicationClient client : clients0) { if (client != null) client.forceClose(); } } } /** * @param node Node. * @param key Connection key. * @return Recovery descriptor for outgoing connection. */ private GridNioRecoveryDescriptor outRecoveryDescriptor(ClusterNode node, ConnectionKey key) { if (usePairedConnections(node)) return recoveryDescriptor(outRecDescs, true, node, key); else return recoveryDescriptor(recoveryDescs, false, node, key); } /** * @param node Node. * @param key Connection key. * @return Recovery descriptor for incoming connection. */ private GridNioRecoveryDescriptor inRecoveryDescriptor(ClusterNode node, ConnectionKey key) { if (usePairedConnections(node)) return recoveryDescriptor(inRecDescs, true, node, key); else return recoveryDescriptor(recoveryDescs, false, node, key); } /** * @param node Node. * @return {@code True} if can use in/out connection pair for communication. */ private boolean usePairedConnections(ClusterNode node) { if (usePairedConnections) { Boolean attr = node.attribute(createSpiAttributeName(ATTR_PAIRED_CONN)); return attr != null && attr; } return false; } /** * @param recoveryDescs Descriptors map. * @param pairedConnections {@code True} if in/out connections pair is used for communication with node. * @param node Node. * @param key Connection key. * @return Recovery receive data for given node. */ private GridNioRecoveryDescriptor recoveryDescriptor( ConcurrentMap<ConnectionKey, GridNioRecoveryDescriptor> recoveryDescs, boolean pairedConnections, ClusterNode node, ConnectionKey key) { GridNioRecoveryDescriptor recovery = recoveryDescs.get(key); if (recovery == null) { if (log.isDebugEnabled()) log.debug("Missing recovery descriptor for the node (will create a new one) " + "[locNodeId=" + getLocalNode().id() + ", key=" + key + ", rmtNode=" + node + ']'); int maxSize = Math.max(msgQueueLimit, ackSndThreshold); int queueLimit = unackedMsgsBufSize != 0 ? unackedMsgsBufSize : (maxSize * 128); GridNioRecoveryDescriptor old = recoveryDescs.putIfAbsent(key, recovery = new GridNioRecoveryDescriptor(pairedConnections, queueLimit, node, log)); if (old != null) { recovery = old; if (log.isDebugEnabled()) log.debug("Will use existing recovery descriptor: " + recovery); } else { if (log.isDebugEnabled()) log.debug("Initialized recovery descriptor [desc=" + recovery + ", maxSize=" + maxSize + ", queueLimit=" + queueLimit + ']'); } } return recovery; } /** * @param msg Error message. * @param e Exception. */ private void onException(String msg, Exception e) { getExceptionRegistry().onException(msg, e); } /** * @return Node ID message. */ private NodeIdMessage nodeIdMessage() { ClusterNode locNode = getLocalNode(); UUID id; if (locNode == null) { U.warn(log, "Local node is not started or fully initialized [isStopping=" + getSpiContext().isStopping() + ']'); id = new UUID(0, 0); } else id = locNode.id(); return new NodeIdMessage(id); } /** {@inheritDoc} */ @Override public TcpCommunicationSpi setName(String name) { super.setName(name); return this; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(TcpCommunicationSpi.class, this); } /** Internal exception class for proper timeout handling. */ private static class HandshakeTimeoutException extends IgniteCheckedException { /** */ private static final long serialVersionUID = 0L; /** * @param msg Message. */ HandshakeTimeoutException(String msg) { super(msg); } } /** * This worker takes responsibility to shut the server down when stopping, * No other thread shall stop passed server. */ private class ShmemAcceptWorker extends GridWorker { /** */ private final IpcSharedMemoryServerEndpoint srv; /** * @param srv Server. */ ShmemAcceptWorker(IpcSharedMemoryServerEndpoint srv) { super(igniteInstanceName, "shmem-communication-acceptor", TcpCommunicationSpi.this.log); this.srv = srv; } /** {@inheritDoc} */ @Override protected void body() throws InterruptedException { try { while (!Thread.interrupted()) { ShmemWorker e = new ShmemWorker(srv.accept()); shmemWorkers.add(e); new IgniteThread(e).start(); } } catch (IgniteCheckedException e) { if (!isCancelled()) U.error(log, "Shmem server failed.", e); } finally { srv.close(); } } /** {@inheritDoc} */ @Override public void cancel() { super.cancel(); srv.close(); } } /** * Write message type to output stream. * * @param os Output stream. * @param type Message type. * @throws IOException On error. */ private static void writeMessageType(OutputStream os, short type) throws IOException { os.write((byte)(type & 0xFF)); os.write((byte)((type >> 8) & 0xFF)); } /** * Write message type to byte buffer. * * @param buf Byte buffer. * @param type Message type. */ private static void writeMessageType(ByteBuffer buf, short type) { buf.put((byte)(type & 0xFF)); buf.put((byte)((type >> 8) & 0xFF)); } /** * */ private class ShmemWorker extends GridWorker { /** */ private final IpcEndpoint endpoint; /** * @param endpoint Endpoint. */ private ShmemWorker(IpcEndpoint endpoint) { super(igniteInstanceName, "shmem-worker", TcpCommunicationSpi.this.log); this.endpoint = endpoint; } /** {@inheritDoc} */ @Override protected void body() throws InterruptedException { try { MessageFactory msgFactory = new MessageFactory() { private MessageFactory impl; @Nullable @Override public Message create(short type) { if (impl == null) impl = getSpiContext().messageFactory(); assert impl != null; return impl.create(type); } }; GridNioMessageWriterFactory writerFactory = new GridNioMessageWriterFactory() { private MessageFormatter formatter; @Override public MessageWriter writer(GridNioSession ses) throws IgniteCheckedException { if (formatter == null) formatter = getSpiContext().messageFormatter(); assert formatter != null; ConnectionKey connKey = ses.meta(CONN_IDX_META); return connKey != null ? formatter.writer(connKey.nodeId()) : null; } }; GridNioMessageReaderFactory readerFactory = new GridNioMessageReaderFactory() { private MessageFormatter formatter; @Override public MessageReader reader(GridNioSession ses, MessageFactory msgFactory) throws IgniteCheckedException { if (formatter == null) formatter = getSpiContext().messageFormatter(); assert formatter != null; ConnectionKey connKey = ses.meta(CONN_IDX_META); return connKey != null ? formatter.reader(connKey.nodeId(), msgFactory) : null; } }; IpcToNioAdapter<Message> adapter = new IpcToNioAdapter<>( metricsLsnr, log, endpoint, srvLsnr, writerFactory, new GridNioCodecFilter( new GridDirectParser(log.getLogger(GridDirectParser.class),msgFactory, readerFactory), log, true), new GridConnectionBytesVerifyFilter(log) ); adapter.serve(); } finally { shmemWorkers.remove(this); endpoint.close(); } } /** {@inheritDoc} */ @Override public void cancel() { super.cancel(); endpoint.close(); } /** {@inheritDoc} */ @Override protected void cleanup() { super.cleanup(); endpoint.close(); } /** {@inheritDoc} */ @Override public String toString() { return S.toString(ShmemWorker.class, this); } } /** * */ private class CommunicationWorker extends IgniteSpiThread { /** */ private final BlockingQueue<DisconnectedSessionInfo> q = new LinkedBlockingQueue<>(); /** * @param igniteInstanceName Ignite instance name. */ private CommunicationWorker(String igniteInstanceName) { super(igniteInstanceName, "tcp-comm-worker", log); } /** {@inheritDoc} */ @Override protected void body() throws InterruptedException { if (log.isDebugEnabled()) log.debug("Tcp communication worker has been started."); while (!isInterrupted()) { DisconnectedSessionInfo disconnectData = q.poll(idleConnTimeout, TimeUnit.MILLISECONDS); if (disconnectData != null) processDisconnect(disconnectData); else processIdle(); } } /** * */ private void processIdle() { cleanupRecovery(); for (Map.Entry<UUID, GridCommunicationClient[]> e : clients.entrySet()) { UUID nodeId = e.getKey(); for (GridCommunicationClient client : e.getValue()) { if (client == null) continue; ClusterNode node = getSpiContext().node(nodeId); if (node == null) { if (log.isDebugEnabled()) log.debug("Forcing close of non-existent node connection: " + nodeId); client.forceClose(); removeNodeClient(nodeId, client); continue; } GridNioRecoveryDescriptor recovery = null; if (!usePairedConnections(node) && client instanceof GridTcpNioCommunicationClient) { recovery = recoveryDescs.get(new ConnectionKey(node.id(), client.connectionIndex(), -1)); if (recovery != null && recovery.lastAcknowledged() != recovery.received()) { RecoveryLastReceivedMessage msg = new RecoveryLastReceivedMessage(recovery.received()); if (log.isDebugEnabled()) log.debug("Send recovery acknowledgement on timeout [rmtNode=" + nodeId + ", rcvCnt=" + msg.received() + ']'); try { nioSrvr.sendSystem(((GridTcpNioCommunicationClient)client).session(), msg); recovery.lastAcknowledged(msg.received()); } catch (IgniteCheckedException err) { U.error(log, "Failed to send message: " + err, err); } continue; } } long idleTime = client.getIdleTime(); if (idleTime >= idleConnTimeout) { if (recovery == null && usePairedConnections(node)) recovery = outRecDescs.get(new ConnectionKey(node.id(), client.connectionIndex(), -1)); if (recovery != null && recovery.nodeAlive(getSpiContext().node(nodeId)) && !recovery.messagesRequests().isEmpty()) { if (log.isDebugEnabled()) log.debug("Node connection is idle, but there are unacknowledged messages, " + "will wait: " + nodeId); continue; } if (log.isDebugEnabled()) log.debug("Closing idle node connection: " + nodeId); if (client.close() || client.closed()) removeNodeClient(nodeId, client); } } } for (GridNioSession ses : nioSrvr.sessions()) { GridNioRecoveryDescriptor recovery = ses.inRecoveryDescriptor(); if (recovery != null && usePairedConnections(recovery.node())) { assert ses.accepted() : ses; sendAckOnTimeout(recovery, ses); } } } /** * @param recovery Recovery descriptor. * @param ses Session. */ private void sendAckOnTimeout(GridNioRecoveryDescriptor recovery, GridNioSession ses) { if (recovery != null && recovery.lastAcknowledged() != recovery.received()) { RecoveryLastReceivedMessage msg = new RecoveryLastReceivedMessage(recovery.received()); if (log.isDebugEnabled()) { log.debug("Send recovery acknowledgement on timeout [rmtNode=" + recovery.node().id() + ", rcvCnt=" + msg.received() + ", lastAcked=" + recovery.lastAcknowledged() + ']'); } try { nioSrvr.sendSystem(ses, msg); recovery.lastAcknowledged(msg.received()); } catch (IgniteCheckedException e) { U.error(log, "Failed to send message: " + e, e); } } } /** * */ private void cleanupRecovery() { cleanupRecovery(recoveryDescs); cleanupRecovery(inRecDescs); cleanupRecovery(outRecDescs); } /** * @param recoveryDescs Recovery descriptors to cleanup. */ private void cleanupRecovery(ConcurrentMap<ConnectionKey, GridNioRecoveryDescriptor> recoveryDescs) { Set<ConnectionKey> left = null; for (Map.Entry<ConnectionKey, GridNioRecoveryDescriptor> e : recoveryDescs.entrySet()) { if (left != null && left.contains(e.getKey())) continue; GridNioRecoveryDescriptor recoveryDesc = e.getValue(); if (!recoveryDesc.nodeAlive(getSpiContext().node(e.getKey().nodeId()))) { if (left == null) left = new HashSet<>(); left.add(e.getKey()); } } if (left != null) { assert !left.isEmpty(); for (ConnectionKey id : left) { GridNioRecoveryDescriptor recoveryDesc = recoveryDescs.get(id); if (recoveryDesc != null && recoveryDesc.onNodeLeft()) recoveryDescs.remove(id, recoveryDesc); } } } /** * @param sesInfo Disconnected session information. */ private void processDisconnect(DisconnectedSessionInfo sesInfo) { GridNioRecoveryDescriptor recoveryDesc = sesInfo.recoveryDesc; ClusterNode node = recoveryDesc.node(); if (!recoveryDesc.nodeAlive(getSpiContext().node(node.id()))) return; try { if (log.isDebugEnabled()) log.debug("Recovery reconnect [rmtNode=" + recoveryDesc.node().id() + ']'); GridCommunicationClient client = reserveClient(node, sesInfo.connIdx); client.release(); } catch (IgniteCheckedException | IgniteException e) { try { if (recoveryDesc.nodeAlive(getSpiContext().node(node.id())) && getSpiContext().pingNode(node.id())) { if (log.isDebugEnabled()) log.debug("Recovery reconnect failed, will retry " + "[rmtNode=" + recoveryDesc.node().id() + ", err=" + e + ']'); addProcessDisconnectRequest(sesInfo); } else { if (log.isDebugEnabled()) log.debug("Recovery reconnect failed, " + "node left [rmtNode=" + recoveryDesc.node().id() + ", err=" + e + ']'); onException("Recovery reconnect failed, node left [rmtNode=" + recoveryDesc.node().id() + "]", e); } } catch (IgniteClientDisconnectedException ignored) { if (log.isDebugEnabled()) log.debug("Failed to ping node, client disconnected."); } } } /** * @param sesInfo Disconnected session information. */ void addProcessDisconnectRequest(DisconnectedSessionInfo sesInfo) { boolean add = q.add(sesInfo); assert add; } } /** * */ private static class ConnectFuture extends GridFutureAdapter<GridCommunicationClient> { /** */ private static final long serialVersionUID = 0L; // No-op. } /** * */ private static class HandshakeTimeoutObject<T> implements IgniteSpiTimeoutObject { /** */ private final IgniteUuid id = IgniteUuid.randomUuid(); /** */ private final T obj; /** */ private final long endTime; /** */ private final AtomicBoolean done = new AtomicBoolean(); /** * @param obj Client. * @param endTime End time. */ private HandshakeTimeoutObject(T obj, long endTime) { assert obj != null; assert obj instanceof GridCommunicationClient || obj instanceof SelectableChannel; assert endTime > 0; this.obj = obj; this.endTime = endTime; } /** * @return {@code True} if object has not yet been timed out. */ boolean cancel() { return done.compareAndSet(false, true); } /** {@inheritDoc} */ @Override public void onTimeout() { if (done.compareAndSet(false, true)) { // Close socket - timeout occurred. if (obj instanceof GridCommunicationClient) ((GridCommunicationClient)obj).forceClose(); else U.closeQuiet((AbstractInterruptibleChannel)obj); } } /** {@inheritDoc} */ @Override public long endTime() { return endTime; } /** {@inheritDoc} */ @Override public IgniteUuid id() { return id; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(HandshakeTimeoutObject.class, this); } } /** * */ private class HandshakeClosure extends IgniteInClosure2X<InputStream, OutputStream> { /** */ private static final long serialVersionUID = 0L; /** */ private final UUID rmtNodeId; /** * @param rmtNodeId Remote node ID. */ private HandshakeClosure(UUID rmtNodeId) { this.rmtNodeId = rmtNodeId; } /** {@inheritDoc} */ @SuppressWarnings("ThrowFromFinallyBlock") @Override public void applyx(InputStream in, OutputStream out) throws IgniteCheckedException { try { // Handshake. byte[] b = new byte[NodeIdMessage.MESSAGE_FULL_SIZE]; int n = 0; while (n < NodeIdMessage.MESSAGE_FULL_SIZE) { int cnt = in.read(b, n, NodeIdMessage.MESSAGE_FULL_SIZE - n); if (cnt < 0) throw new IgniteCheckedException("Failed to get remote node ID (end of stream reached)"); n += cnt; } // First 4 bytes are for length. UUID id = U.bytesToUuid(b, Message.DIRECT_TYPE_SIZE); if (!rmtNodeId.equals(id)) throw new IgniteCheckedException("Remote node ID is not as expected [expected=" + rmtNodeId + ", rcvd=" + id + ']'); else if (log.isDebugEnabled()) log.debug("Received remote node ID: " + id); } catch (SocketTimeoutException e) { throw new IgniteCheckedException("Failed to perform handshake due to timeout (consider increasing " + "'connectionTimeout' configuration property).", e); } catch (IOException e) { throw new IgniteCheckedException("Failed to perform handshake.", e); } try { ClusterNode localNode = getLocalNode(); if (localNode == null) throw new IgniteSpiException("Local node has not been started or fully initialized " + "[isStopping=" + getSpiContext().isStopping() + ']'); UUID id = localNode.id(); NodeIdMessage msg = new NodeIdMessage(id); out.write(U.IGNITE_HEADER); writeMessageType(out, NODE_ID_MSG_TYPE); out.write(msg.nodeIdBytes); out.flush(); if (log.isDebugEnabled()) log.debug("Sent local node ID [locNodeId=" + id + ", rmtNodeId=" + rmtNodeId + ']'); } catch (IOException e) { throw new IgniteCheckedException("Failed to perform handshake.", e); } } } /** * Handshake message. */ @SuppressWarnings("PublicInnerClass") public static class HandshakeMessage implements Message { /** */ private static final long serialVersionUID = 0L; /** Message body size in bytes. */ private static final int MESSAGE_SIZE = 32; /** Full message size (with message type) in bytes. */ public static final int MESSAGE_FULL_SIZE = MESSAGE_SIZE + DIRECT_TYPE_SIZE; /** */ private UUID nodeId; /** */ private long rcvCnt; /** */ private long connectCnt; /** * Default constructor required by {@link Message}. */ public HandshakeMessage() { // No-op. } /** * @param nodeId Node ID. * @param connectCnt Connect count. * @param rcvCnt Number of received messages. */ public HandshakeMessage(UUID nodeId, long connectCnt, long rcvCnt) { assert nodeId != null; assert rcvCnt >= 0 : rcvCnt; this.nodeId = nodeId; this.connectCnt = connectCnt; this.rcvCnt = rcvCnt; } /** * @return Connection index. */ public int connectionIndex() { return 0; } /** * @return Connect count. */ public long connectCount() { return connectCnt; } /** * @return Number of received messages. */ public long received() { return rcvCnt; } /** * @return Node ID. */ public UUID nodeId() { return nodeId; } /** {@inheritDoc} */ @Override public void onAckReceived() { // No-op. } /** {@inheritDoc} */ @Override public boolean writeTo(ByteBuffer buf, MessageWriter writer) { if (buf.remaining() < MESSAGE_FULL_SIZE) return false; writeMessageType(buf, directType()); byte[] bytes = U.uuidToBytes(nodeId); assert bytes.length == 16 : bytes.length; buf.put(bytes); buf.putLong(rcvCnt); buf.putLong(connectCnt); return true; } /** {@inheritDoc} */ @Override public boolean readFrom(ByteBuffer buf, MessageReader reader) { if (buf.remaining() < MESSAGE_SIZE) return false; byte[] nodeIdBytes = new byte[NodeIdMessage.MESSAGE_SIZE]; buf.get(nodeIdBytes); nodeId = U.bytesToUuid(nodeIdBytes, 0); rcvCnt = buf.getLong(); connectCnt = buf.getLong(); return true; } /** {@inheritDoc} */ @Override public short directType() { return HANDSHAKE_MSG_TYPE; } /** {@inheritDoc} */ @Override public byte fieldsCount() { throw new UnsupportedOperationException(); } /** {@inheritDoc} */ @Override public String toString() { return S.toString(HandshakeMessage.class, this); } } /** * Updated handshake message. */ @SuppressWarnings("PublicInnerClass") public static class HandshakeMessage2 extends HandshakeMessage { /** */ private static final long serialVersionUID = 0L; /** */ private int connIdx; /** * */ public HandshakeMessage2() { // No-op. } /** * @param nodeId Node ID. * @param connectCnt Connect count. * @param rcvCnt Number of received messages. * @param connIdx Connection index. */ HandshakeMessage2(UUID nodeId, long connectCnt, long rcvCnt, int connIdx) { super(nodeId, connectCnt, rcvCnt); this.connIdx = connIdx; } /** {@inheritDoc} */ @Override public short directType() { return -44; } /** {@inheritDoc} */ @Override public int connectionIndex() { return connIdx; } /** {@inheritDoc} */ @Override public boolean writeTo(ByteBuffer buf, MessageWriter writer) { if (!super.writeTo(buf, writer)) return false; if (buf.remaining() < 4) return false; buf.putInt(connIdx); return true; } /** {@inheritDoc} */ @Override public boolean readFrom(ByteBuffer buf, MessageReader reader) { if (!super.readFrom(buf, reader)) return false; if (buf.remaining() < 4) return false; connIdx = buf.getInt(); return true; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(HandshakeMessage2.class, this); } } /** * Recovery acknowledgment message. */ @SuppressWarnings("PublicInnerClass") public static class RecoveryLastReceivedMessage implements Message { /** */ private static final long serialVersionUID = 0L; /** Message body size in bytes. */ private static final int MESSAGE_SIZE = 8; /** Full message size (with message type) in bytes. */ public static final int MESSAGE_FULL_SIZE = MESSAGE_SIZE + DIRECT_TYPE_SIZE; /** */ private long rcvCnt; /** * Default constructor required by {@link Message}. */ public RecoveryLastReceivedMessage() { // No-op. } /** * @param rcvCnt Number of received messages. */ public RecoveryLastReceivedMessage(long rcvCnt) { this.rcvCnt = rcvCnt; } /** * @return Number of received messages. */ public long received() { return rcvCnt; } /** {@inheritDoc} */ @Override public void onAckReceived() { // No-op. } /** {@inheritDoc} */ @Override public boolean writeTo(ByteBuffer buf, MessageWriter writer) { if (buf.remaining() < MESSAGE_FULL_SIZE) return false; writeMessageType(buf, directType()); buf.putLong(rcvCnt); return true; } /** {@inheritDoc} */ @Override public boolean readFrom(ByteBuffer buf, MessageReader reader) { if (buf.remaining() < MESSAGE_SIZE) return false; rcvCnt = buf.getLong(); return true; } /** {@inheritDoc} */ @Override public short directType() { return RECOVERY_LAST_ID_MSG_TYPE; } /** {@inheritDoc} */ @Override public byte fieldsCount() { return 0; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(RecoveryLastReceivedMessage.class, this); } } /** * Node ID message. */ @SuppressWarnings("PublicInnerClass") public static class NodeIdMessage implements Message { /** */ private static final long serialVersionUID = 0L; /** Message body size (with message type) in bytes. */ private static final int MESSAGE_SIZE = 16; /** Full message size (with message type) in bytes. */ public static final int MESSAGE_FULL_SIZE = MESSAGE_SIZE + DIRECT_TYPE_SIZE; /** */ private byte[] nodeIdBytes; /** */ private byte[] nodeIdBytesWithType; /** */ public NodeIdMessage() { // No-op. } /** * @param nodeId Node ID. */ private NodeIdMessage(UUID nodeId) { assert nodeId != null; nodeIdBytes = U.uuidToBytes(nodeId); assert nodeIdBytes.length == MESSAGE_SIZE : "Node ID size must be " + MESSAGE_SIZE; nodeIdBytesWithType = new byte[MESSAGE_FULL_SIZE]; nodeIdBytesWithType[0] = (byte)(NODE_ID_MSG_TYPE & 0xFF); nodeIdBytesWithType[1] = (byte)((NODE_ID_MSG_TYPE >> 8) & 0xFF); System.arraycopy(nodeIdBytes, 0, nodeIdBytesWithType, 2, nodeIdBytes.length); } /** {@inheritDoc} */ @Override public void onAckReceived() { // No-op. } /** {@inheritDoc} */ @Override public boolean writeTo(ByteBuffer buf, MessageWriter writer) { assert nodeIdBytes.length == MESSAGE_SIZE; if (buf.remaining() < MESSAGE_FULL_SIZE) return false; writeMessageType(buf, directType()); buf.put(nodeIdBytes); return true; } /** {@inheritDoc} */ @Override public boolean readFrom(ByteBuffer buf, MessageReader reader) { if (buf.remaining() < MESSAGE_SIZE) return false; nodeIdBytes = new byte[MESSAGE_SIZE]; buf.get(nodeIdBytes); return true; } /** {@inheritDoc} */ @Override public short directType() { return NODE_ID_MSG_TYPE; } /** {@inheritDoc} */ @Override public byte fieldsCount() { return 0; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(NodeIdMessage.class, this); } } /** * */ private class ConnectGateway { /** */ private GridSpinReadWriteLock lock = new GridSpinReadWriteLock(); /** */ private IgniteException err; /** * */ void enter() { lock.readLock(); if (err != null) { lock.readUnlock(); throw err; } } /** * @return {@code True} if entered gateway. */ boolean tryEnter() { lock.readLock(); boolean res = err == null; if (!res) lock.readUnlock(); return res; } /** * */ void leave() { lock.readUnlock(); } /** * @param reconnectFut Reconnect future. */ void disconnected(IgniteFuture<?> reconnectFut) { lock.writeLock(); err = new IgniteClientDisconnectedException(reconnectFut, "Failed to connect, client node disconnected."); lock.writeUnlock(); } /** * */ void reconnected() { lock.writeLock(); try { if (err instanceof IgniteClientDisconnectedException) err = null; } finally { lock.writeUnlock(); } } /** * */ void stopped() { lock.readLock(); err = new IgniteException("Failed to connect, node stopped."); lock.readUnlock(); } } /** * */ private static class DisconnectedSessionInfo { /** */ private final GridNioRecoveryDescriptor recoveryDesc; /** */ private int connIdx; /** * @param recoveryDesc Recovery descriptor. * @param connIdx Connection index. */ DisconnectedSessionInfo(@Nullable GridNioRecoveryDescriptor recoveryDesc, int connIdx) { this.recoveryDesc = recoveryDesc; this.connIdx = connIdx; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(DisconnectedSessionInfo.class, this); } } /** * */ private static class ConnectionKey { /** */ private final UUID nodeId; /** */ private final int idx; /** */ private final long connCnt; /** * @param nodeId Node ID. * @param idx Connection index. * @param connCnt Connection counter (set only for incoming connections). */ ConnectionKey(UUID nodeId, int idx, long connCnt) { this.nodeId = nodeId; this.idx = idx; this.connCnt = connCnt; } /** * @return Connection counter. */ long connectCount() { return connCnt; } /** * @return Node ID. */ UUID nodeId() { return nodeId; } /** * @return Connection index. */ int connectionIndex() { return idx; } /** {@inheritDoc} */ @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ConnectionKey key = (ConnectionKey) o; return idx == key.idx && nodeId.equals(key.nodeId); } /** {@inheritDoc} */ @Override public int hashCode() { int res = nodeId.hashCode(); res = 31 * res + idx; return res; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(ConnectionKey.class, this); } } /** * */ interface ConnectionPolicy { /** * @return Thread connection index. */ int connectionIndex(); } /** * MBean implementation for TcpCommunicationSpi. */ private class TcpCommunicationSpiMBeanImpl extends IgniteSpiMBeanAdapter implements TcpCommunicationSpiMBean { /** {@inheritDoc} */ TcpCommunicationSpiMBeanImpl(IgniteSpiAdapter spiAdapter) { super(spiAdapter); } /** {@inheritDoc} */ @Override public String getLocalAddress() { return TcpCommunicationSpi.this.getLocalAddress(); } /** {@inheritDoc} */ @Override public int getLocalPort() { return TcpCommunicationSpi.this.getLocalPort(); } /** {@inheritDoc} */ @Override public int getLocalPortRange() { return TcpCommunicationSpi.this.getLocalPortRange(); } /** {@inheritDoc} */ @Override public boolean isUsePairedConnections() { return TcpCommunicationSpi.this.isUsePairedConnections(); } /** {@inheritDoc} */ @Override public int getConnectionsPerNode() { return TcpCommunicationSpi.this.getConnectionsPerNode(); } /** {@inheritDoc} */ @Override public int getSharedMemoryPort() { return TcpCommunicationSpi.this.getSharedMemoryPort(); } /** {@inheritDoc} */ @Override public long getIdleConnectionTimeout() { return TcpCommunicationSpi.this.getIdleConnectionTimeout(); } /** {@inheritDoc} */ @Override public long getSocketWriteTimeout() { return TcpCommunicationSpi.this.getSocketWriteTimeout(); } /** {@inheritDoc} */ @Override public int getAckSendThreshold() { return TcpCommunicationSpi.this.getAckSendThreshold(); } /** {@inheritDoc} */ @Override public int getUnacknowledgedMessagesBufferSize() { return TcpCommunicationSpi.this.getUnacknowledgedMessagesBufferSize(); } /** {@inheritDoc} */ @Override public long getConnectTimeout() { return TcpCommunicationSpi.this.getConnectTimeout(); } /** {@inheritDoc} */ @Override public long getMaxConnectTimeout() { return TcpCommunicationSpi.this.getMaxConnectTimeout(); } /** {@inheritDoc} */ @Override public int getReconnectCount() { return TcpCommunicationSpi.this.getReconnectCount(); } /** {@inheritDoc} */ @Override public boolean isDirectBuffer() { return TcpCommunicationSpi.this.isDirectBuffer(); } /** {@inheritDoc} */ @Override public boolean isDirectSendBuffer() { return TcpCommunicationSpi.this.isDirectSendBuffer(); } /** {@inheritDoc} */ @Override public int getSelectorsCount() { return TcpCommunicationSpi.this.getSelectorsCount(); } /** {@inheritDoc} */ @Override public long getSelectorSpins() { return TcpCommunicationSpi.this.getSelectorSpins(); } /** {@inheritDoc} */ @Override public boolean isTcpNoDelay() { return TcpCommunicationSpi.this.isTcpNoDelay(); } /** {@inheritDoc} */ @Override public int getSocketReceiveBuffer() { return TcpCommunicationSpi.this.getSocketReceiveBuffer(); } /** {@inheritDoc} */ @Override public int getSocketSendBuffer() { return TcpCommunicationSpi.this.getSocketSendBuffer(); } /** {@inheritDoc} */ @Override public int getMessageQueueLimit() { return TcpCommunicationSpi.this.getMessageQueueLimit(); } /** {@inheritDoc} */ @Override public int getSlowClientQueueLimit() { return TcpCommunicationSpi.this.getSlowClientQueueLimit(); } /** {@inheritDoc} */ @Override public void dumpStats() { TcpCommunicationSpi.this.dumpStats(); } /** {@inheritDoc} */ @Override public int getSentMessagesCount() { return TcpCommunicationSpi.this.getSentMessagesCount(); } /** {@inheritDoc} */ @Override public long getSentBytesCount() { return TcpCommunicationSpi.this.getSentBytesCount(); } /** {@inheritDoc} */ @Override public int getReceivedMessagesCount() { return TcpCommunicationSpi.this.getReceivedMessagesCount(); } /** {@inheritDoc} */ @Override public long getReceivedBytesCount() { return TcpCommunicationSpi.this.getReceivedBytesCount(); } /** {@inheritDoc} */ @Override public int getOutboundMessagesQueueSize() { return TcpCommunicationSpi.this.getOutboundMessagesQueueSize(); } } }