/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.spi.communication.tcp; import java.net.BindException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.cluster.ClusterNode; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.internal.IgniteInternalFuture; import org.apache.ignite.internal.managers.communication.GridIoMessageFactory; import org.apache.ignite.internal.util.lang.GridAbsPredicate; import org.apache.ignite.internal.util.nio.GridNioServer; import org.apache.ignite.internal.util.nio.GridNioSession; import org.apache.ignite.internal.util.typedef.CO; import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.lang.IgniteRunnable; import org.apache.ignite.plugin.extensions.communication.Message; import org.apache.ignite.spi.IgniteSpiAdapter; import org.apache.ignite.spi.IgniteSpiException; import org.apache.ignite.spi.communication.CommunicationListener; import org.apache.ignite.spi.communication.CommunicationSpi; import org.apache.ignite.spi.communication.GridTestMessage; import org.apache.ignite.testframework.GridSpiTestContext; import org.apache.ignite.testframework.GridTestNode; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.IgniteMock; import org.apache.ignite.testframework.junits.IgniteTestResources; import org.apache.ignite.testframework.junits.spi.GridSpiAbstractTest; import org.apache.ignite.testframework.junits.spi.GridSpiTest; import org.eclipse.jetty.util.ConcurrentHashSet; /** * */ @SuppressWarnings("unchecked") @GridSpiTest(spi = TcpCommunicationSpi.class, group = "Communication SPI") public class GridTcpCommunicationSpiRecoverySelfTest<T extends CommunicationSpi> extends GridSpiAbstractTest<T> { /** */ private static final Collection<IgniteTestResources> spiRsrcs = new ArrayList<>(); /** */ protected static final List<TcpCommunicationSpi> spis = new ArrayList<>(); /** */ protected static final List<ClusterNode> nodes = new ArrayList<>(); /** */ private static final int SPI_CNT = 2; /** */ private static final int ITERS = 10; /** */ protected static int port = 30_000; /** Use ssl. */ protected boolean useSsl; /** * */ static { GridIoMessageFactory.registerCustom(GridTestMessage.DIRECT_TYPE, new CO<Message>() { @Override public Message apply() { return new GridTestMessage(); } }); } /** * Disable SPI auto-start. */ public GridTcpCommunicationSpiRecoverySelfTest() { super(false); } /** */ @SuppressWarnings({"deprecation"}) private class TestListener implements CommunicationListener<Message> { /** */ private boolean block; /** */ private CountDownLatch blockLatch; /** */ private ConcurrentHashSet<Long> msgIds = new ConcurrentHashSet<>(); /** */ private AtomicInteger rcvCnt = new AtomicInteger(); /** {@inheritDoc} */ @Override public void onMessage(UUID nodeId, Message msg, IgniteRunnable msgC) { // info("Test listener received message: " + msg); assertTrue("Unexpected message: " + msg, msg instanceof GridTestMessage); GridTestMessage msg0 = (GridTestMessage)msg; assertTrue("Duplicated message received: " + msg0, msgIds.add(msg0.getMsgId())); rcvCnt.incrementAndGet(); msgC.run(); try { synchronized (this) { while (block) { info("Test listener blocks."); assert blockLatch != null; blockLatch.countDown(); wait(); if (block) continue; info("Test listener throws exception."); throw new RuntimeException("Test exception."); } } } catch (InterruptedException e) { fail("Unexpected error: " + e); } } /** * */ void block() { synchronized (this) { block = true; blockLatch = new CountDownLatch(1); } } /** * */ void unblock() { synchronized (this) { block = false; notifyAll(); } } /** {@inheritDoc} */ @Override public void onDisconnected(UUID nodeId) { // No-op. } } /** * Time to wait for socket write timeout. * * @return Timeout. */ protected long awaitForSocketWriteTimeout() { return 8000; } /** * @throws Exception If failed. */ public void testBlockListener() throws Exception { // Test listener throws exception and stops selector thread, so must restart SPI. for (int i = 0; i < ITERS; i++) { log.info("Creating SPIs: " + i); createSpis(); try { checkBlockListener(); } finally { stopSpis(); } } } /** * @throws Exception If failed. */ @SuppressWarnings("BusyWait") private void checkBlockListener() throws Exception { TcpCommunicationSpi spi0 = spis.get(0); TcpCommunicationSpi spi1 = spis.get(1); final TestListener lsnr0 = (TestListener)spi0.getListener(); final TestListener lsnr1 = (TestListener)spi1.getListener(); ClusterNode node0 = nodes.get(0); ClusterNode node1 = nodes.get(1); lsnr1.block(); int msgId = 0; for (int j = 0; j < 10; j++) { spi0.sendMessage(node1, new GridTestMessage(node0.id(), ++msgId, 0)); spi1.sendMessage(node0, new GridTestMessage(node1.id(), ++msgId, 0)); } lsnr1.blockLatch.await(); lsnr1.unblock(); Thread.sleep(500); int errCnt = 0; int msgs = 0; while (true) { try { int id = msgId + 1; spi0.sendMessage(node1, new GridTestMessage(node0.id(), id, 0)); msgId++; msgs++; if (msgs == 10) break; } catch (IgniteSpiException e) { errCnt++; if (errCnt > 10) fail("Failed to send message: " + e); } } for (int j = 0; j < 10; j++) spi1.sendMessage(node0, new GridTestMessage(node1.id(), ++msgId, 0)); final int expMsgs = 20; GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return lsnr0.rcvCnt.get() >= expMsgs && lsnr1.rcvCnt.get() >= expMsgs; } }, awaitForSocketWriteTimeout()); assertEquals(expMsgs, lsnr0.rcvCnt.get()); assertEquals(expMsgs, lsnr1.rcvCnt.get()); } /** * @throws Exception If failed. */ public void testBlockRead1() throws Exception { createSpis(); try { final TcpCommunicationSpi spi0 = spis.get(0); final TcpCommunicationSpi spi1 = spis.get(1); final TestListener lsnr1 = (TestListener)spi1.getListener(); final ClusterNode node0 = nodes.get(0); final ClusterNode node1 = nodes.get(1); final AtomicInteger msgId = new AtomicInteger(); // Send message to establish connection. spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); final AtomicInteger sentCnt = new AtomicInteger(1); int errCnt = 0; for (int i = 0; i < ITERS; i++) { log.info("Iteration: " + i); try { final GridNioSession ses0 = communicationSession(spi0, false); final GridNioSession ses1 = communicationSession(spi1, true); ses1.pauseReads().get(); IgniteInternalFuture<?> sndFut = GridTestUtils.runAsync(new Callable<Void>() { @Override public Void call() throws Exception { for (int i = 0; i < 6000; i++) { spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); sentCnt.incrementAndGet(); } return null; } }); // Wait when session is closed because of write timeout. GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return ses0.closeTime() != 0; } }, awaitForSocketWriteTimeout()); assertTrue("Failed to wait for session close", ses0.closeTime() != 0); try { ses1.resumeReads().get(); } catch (IgniteCheckedException ignore) { // Can fail is ses1 was closed. } for (int j = 0; j < 100; j++) { spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); sentCnt.incrementAndGet(); } sndFut.get(); final int expMsgs = sentCnt.get(); GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return lsnr1.rcvCnt.get() >= expMsgs; } }, 60_000); assertEquals(expMsgs, lsnr1.rcvCnt.get()); } catch (IgniteCheckedException e) { if (e.hasCause(BindException.class)) { errCnt++; if (errCnt > 3) { log.warning("Got exception > 3 times, test fails."); throw e; } if (i < ITERS - 1) { info("Got exception caused by BindException, will retry after delay: " + e); U.sleep(10_000); } else info("Got exception caused by BindException, will ignore: " + e); } else { log.warning("Unexpected exception: " + e, e); throw e; } } } } finally { stopSpis(); } } /** * @throws Exception If failed. */ public void testBlockRead2() throws Exception { createSpis(); try { final TcpCommunicationSpi spi0 = spis.get(0); final TcpCommunicationSpi spi1 = spis.get(1); final TestListener lsnr0 = (TestListener)spi0.getListener(); final TestListener lsnr1 = (TestListener)spi1.getListener(); final ClusterNode node0 = nodes.get(0); final ClusterNode node1 = nodes.get(1); final AtomicInteger msgId = new AtomicInteger(); final AtomicInteger expCnt0 = new AtomicInteger(); final AtomicInteger expCnt1 = new AtomicInteger(); // Send message to establish connection. spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); expCnt1.incrementAndGet(); int errCnt = 0; for (int i = 0; i < ITERS; i++) { log.info("Iteration: " + i); try { final GridNioSession ses0 = communicationSession(spi0, false); final GridNioSession ses1 = communicationSession(spi1, true); ses1.pauseReads().get(); IgniteInternalFuture<?> sndFut = GridTestUtils.runAsync(new Callable<Void>() { @Override public Void call() throws Exception { for (int i = 0; i < 6000; i++) { spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); expCnt1.incrementAndGet(); } return null; } }); // Wait when session is closed because of write timeout. GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return ses0.closeTime() != 0; } }, awaitForSocketWriteTimeout()); assertTrue("Failed to wait for session close", ses0.closeTime() != 0); try { ses1.resumeReads().get(); } catch (IgniteCheckedException ignore) { // Can fail is ses1 was closed. } // Wait when session is closed, then try to open new connection from node1. GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return ses1.closeTime() != 0; } }, awaitForSocketWriteTimeout()); assertTrue("Failed to wait for session close", ses1.closeTime() != 0); for (int j = 0; j < 100; j++) { spi1.sendMessage(node0, new GridTestMessage(node1.id(), msgId.incrementAndGet(), 0)); expCnt0.incrementAndGet(); } sndFut.get(); final int expMsgs0 = expCnt0.get(); final int expMsgs1 = expCnt1.get(); GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return lsnr0.rcvCnt.get() >= expMsgs0 && lsnr1.rcvCnt.get() >= expMsgs1; } }, 60_000); assertEquals(expMsgs0, lsnr0.rcvCnt.get()); assertEquals(expMsgs1, lsnr1.rcvCnt.get()); } catch (IgniteCheckedException e) { if (e.hasCause(BindException.class)) { errCnt++; if (errCnt > 3) { log.warning("Got exception > 3 times, test fails."); throw e; } if (i < ITERS - 1) { info("Got exception caused by BindException, will retry after delay: " + e); U.sleep(10_000); } else info("Got exception caused by BindException, will ignore: " + e); } else { log.warning("Unexpected exception: " + e, e); throw e; } } } } finally { stopSpis(); } } /** * @throws Exception If failed. */ public void testBlockRead3() throws Exception { createSpis(); try { final TcpCommunicationSpi spi0 = spis.get(0); final TcpCommunicationSpi spi1 = spis.get(1); final TestListener lsnr1 = (TestListener)spi1.getListener(); final ClusterNode node0 = nodes.get(0); final ClusterNode node1 = nodes.get(1); final AtomicInteger msgId = new AtomicInteger(); // Send message to establish connection. spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); final AtomicInteger sentCnt = new AtomicInteger(1); int errCnt = 0; for (int i = 0; i < ITERS; i++) { log.info("Iteration: " + i); try { final GridNioSession ses0 = communicationSession(spi0, false); final GridNioSession ses1 = communicationSession(spi1, true); ses1.pauseReads().get(); IgniteInternalFuture<?> sndFut = GridTestUtils.runAsync(new Callable<Void>() { @Override public Void call() throws Exception { for (int i = 0; i < 6000; i++) { spi0.sendMessage(node1, new GridTestMessage(node0.id(), msgId.incrementAndGet(), 0)); sentCnt.incrementAndGet(); } return null; } }); // Wait when session is closed because of write timeout. GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return ses0.closeTime() != 0; } }, awaitForSocketWriteTimeout()); assertTrue("Failed to wait for session close", ses0.closeTime() != 0); try { ses1.resumeReads().get(); } catch (IgniteCheckedException ignore) { // Can fail is ses1 was closed. } sndFut.get(); final int expMsgs = sentCnt.get(); GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return lsnr1.rcvCnt.get() >= expMsgs; } }, 60_000); assertEquals(expMsgs, lsnr1.rcvCnt.get()); } catch (IgniteCheckedException e) { if (e.hasCause(BindException.class)) { errCnt++; if (errCnt > 3) { log.warning("Got exception > 3 times, test fails."); throw e; } if (i < ITERS - 1) { info("Got exception caused by BindException, will retry after delay: " + e); U.sleep(10_000); } else info("Got exception caused by BindException, will ignore: " + e); } else { log.warning("Unexpected exception: " + e, e); throw e; } } } } finally { stopSpis(); } } /** * @param spi SPI. * @param in {@code True} if need find inbound session. * @return Session. * @throws Exception If failed. */ @SuppressWarnings("unchecked") private GridNioSession communicationSession(TcpCommunicationSpi spi, boolean in) throws Exception { final GridNioServer srv = U.field(spi, "nioSrvr"); GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { Collection<? extends GridNioSession> sessions = GridTestUtils.getFieldValue(srv, "sessions"); return !sessions.isEmpty(); } }, awaitForSocketWriteTimeout()); Collection<? extends GridNioSession> sessions = GridTestUtils.getFieldValue(srv, "sessions"); for (GridNioSession ses : sessions) { if (in == ses.accepted()) return ses; } fail("Failed to find session"); return null; } /** * @return {@code True}. */ protected boolean usePairedConnections() { return true; } /** * @param idx SPI index. * @return SPI instance. */ protected TcpCommunicationSpi getSpi(int idx) { TcpCommunicationSpi spi = new TcpCommunicationSpi(); spi.setSharedMemoryPort(-1); spi.setLocalPort(port++); spi.setIdleConnectionTimeout(10_000); spi.setConnectTimeout(10_000); spi.setAckSendThreshold(5); spi.setSocketWriteTimeout(1000); spi.setSocketSendBuffer(512); spi.setSocketReceiveBuffer(512); spi.setConnectionsPerNode(1); spi.setUsePairedConnections(usePairedConnections()); return spi; } /** * @throws Exception If failed. */ private void startSpis() throws Exception { spis.clear(); nodes.clear(); spiRsrcs.clear(); Map<ClusterNode, GridSpiTestContext> ctxs = new HashMap<>(); for (int i = 0; i < SPI_CNT; i++) { TcpCommunicationSpi spi = getSpi(i); GridTestUtils.setFieldValue(spi, IgniteSpiAdapter.class, "igniteInstanceName", "grid-" + i); IgniteTestResources rsrcs = new IgniteTestResources(); GridTestNode node = new GridTestNode(rsrcs.getNodeId()); node.order(i); GridSpiTestContext ctx = initSpiContext(); ctx.setLocalNode(node); spiRsrcs.add(rsrcs); rsrcs.inject(spi); if (useSsl) { IgniteMock ignite = GridTestUtils.getFieldValue(spi, IgniteSpiAdapter.class, "ignite"); IgniteConfiguration cfg = ignite.configuration() .setSslContextFactory(GridTestUtils.sslFactory()); ignite.setStaticCfg(cfg); } spi.setListener(new TestListener()); node.setAttributes(spi.getNodeAttributes()); nodes.add(node); spi.spiStart(getTestIgniteInstanceName() + (i + 1)); spis.add(spi); spi.onContextInitialized(ctx); ctxs.put(node, ctx); } // For each context set remote nodes. for (Map.Entry<ClusterNode, GridSpiTestContext> e : ctxs.entrySet()) { for (ClusterNode n : nodes) { if (!n.equals(e.getKey())) e.getValue().remoteNodes().add(n); } } } /** * @throws Exception If failed. */ private void createSpis() throws Exception { for (int i = 0; i < 3; i++) { try { startSpis(); break; } catch (IgniteCheckedException e) { if (e.hasCause(BindException.class)) { if (i < 2) { info("Failed to start SPIs because of BindException, will retry after delay."); stopSpis(); U.sleep(10_000); } else throw e; } else throw e; } } } /** * @throws Exception If failed. */ private void stopSpis() throws Exception { for (CommunicationSpi<Message> spi : spis) { spi.onContextDestroyed(); spi.setListener(null); spi.spiStop(); } for (IgniteTestResources rsrcs : spiRsrcs) rsrcs.stopThreads(); spis.clear(); nodes.clear(); spiRsrcs.clear(); } }