/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.cluster.integration;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.apache.nifi.cluster.coordination.node.ClusterRoles;
import org.apache.nifi.cluster.coordination.node.DisconnectionCode;
import org.apache.nifi.cluster.coordination.node.NodeConnectionState;
import org.apache.nifi.cluster.coordination.node.NodeConnectionStatus;
import org.apache.nifi.cluster.protocol.NodeIdentifier;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class ClusterConnectionIT {
private Cluster cluster;
@BeforeClass
public static void setup() {
System.setProperty("nifi.properties.file.path", "src/test/resources/conf/nifi.properties");
}
@Before
public void createCluster() throws IOException {
cluster = new Cluster();
cluster.start();
}
@After
public void destroyCluster() {
if (cluster != null) {
cluster.stop();
}
}
@Test(timeout = 20000)
public void testSingleNode() throws InterruptedException {
final Node firstNode = cluster.createNode();
firstNode.waitUntilConnected(10, TimeUnit.SECONDS);
firstNode.waitUntilElectedForRole(ClusterRoles.CLUSTER_COORDINATOR, 10, TimeUnit.SECONDS);
firstNode.waitUntilElectedForRole(ClusterRoles.PRIMARY_NODE, 10, TimeUnit.SECONDS);
}
@Test(timeout = 60000)
public void testThreeNodeCluster() throws InterruptedException {
cluster.createNode();
cluster.createNode();
cluster.createNode();
cluster.waitUntilAllNodesConnected(10, TimeUnit.SECONDS);
final Node clusterCoordinator = cluster.waitForClusterCoordinator(10, TimeUnit.SECONDS);
final Node primaryNode = cluster.waitForPrimaryNode(10, TimeUnit.SECONDS);
System.out.println("\n\n");
System.out.println("Cluster Coordinator = " + clusterCoordinator);
System.out.println("Primary Node = " + primaryNode);
System.out.println("\n\n");
}
@Test(timeout = 60000)
public void testNewCoordinatorElected() throws IOException {
final Node firstNode = cluster.createNode();
final Node secondNode = cluster.createNode();
cluster.waitUntilAllNodesConnected(10, TimeUnit.SECONDS);
final Node clusterCoordinator = cluster.waitForClusterCoordinator(10, TimeUnit.SECONDS);
clusterCoordinator.stop();
final Node otherNode = firstNode == clusterCoordinator ? secondNode : firstNode;
otherNode.waitUntilElectedForRole(ClusterRoles.CLUSTER_COORDINATOR, 10, TimeUnit.SECONDS);
}
@Test(timeout = 60000)
public void testReconnectGetsCorrectClusterTopology() throws IOException {
final Node firstNode = cluster.createNode();
final Node secondNode = cluster.createNode();
final Node thirdNode = cluster.createNode();
cluster.waitUntilAllNodesConnected(10, TimeUnit.SECONDS);
// shutdown node
secondNode.stop();
System.out.println("\n\nNode 2 Shut Down\n\n");
// wait for node 1 and 3 to recognize that node 2 is gone
Stream.of(firstNode, thirdNode).forEach(node -> {
node.assertNodeDisconnects(secondNode.getIdentifier(), 10, TimeUnit.SECONDS);
});
// restart node
secondNode.start();
System.out.println("\n\nNode 2 Restarted\n\n");
secondNode.waitUntilConnected(20, TimeUnit.SECONDS);
System.out.println("\n\nNode 2 Reconnected\n\n");
// wait for all 3 nodes to agree that node 2 is connected
Stream.of(firstNode, secondNode, thirdNode).forEach(node -> {
ClusterUtils.waitUntilConditionMet(5, TimeUnit.SECONDS,
() -> firstNode.getClusterCoordinator().getConnectionStatus(secondNode.getIdentifier()).getState() == NodeConnectionState.CONNECTED);
});
// Ensure that all 3 nodes see a cluster of 3 connected nodes.
Stream.of(firstNode, secondNode, thirdNode).forEach(node -> {
node.assertNodeIsConnected(firstNode.getIdentifier());
node.assertNodeIsConnected(secondNode.getIdentifier());
node.assertNodeIsConnected(thirdNode.getIdentifier());
});
// Ensure that we get both a cluster coordinator and a primary node elected
cluster.waitForClusterCoordinator(10, TimeUnit.SECONDS);
cluster.waitForPrimaryNode(10, TimeUnit.SECONDS);
}
@Test(timeout = 60000)
public void testRestartAllNodes() throws IOException, InterruptedException {
final Node firstNode = cluster.createNode();
final Node secondNode = cluster.createNode();
final Node thirdNode = cluster.createNode();
firstNode.waitUntilConnected(10, TimeUnit.SECONDS);
System.out.println("**** Node 1 Connected ****");
secondNode.waitUntilConnected(10, TimeUnit.SECONDS);
System.out.println("**** Node 2 Connected ****");
thirdNode.waitUntilConnected(10, TimeUnit.SECONDS);
System.out.println("**** Node 3 Connected ****");
// shutdown node
firstNode.stop();
secondNode.stop();
thirdNode.stop();
System.out.println("\n\nRestarting all nodes\n\n");
thirdNode.start();
firstNode.start();
secondNode.start();
firstNode.waitUntilConnected(20, TimeUnit.SECONDS);
System.out.println("\n\n\n**** Node 1 Re-Connected ****\n\n\n");
secondNode.waitUntilConnected(10, TimeUnit.SECONDS);
System.out.println("**** Node 2 Re-Connected ****");
thirdNode.waitUntilConnected(10, TimeUnit.SECONDS);
System.out.println("**** Node 3 Re-Connected ****");
// wait for all 3 nodes to agree that node 2 is connected
Stream.of(firstNode, secondNode, thirdNode).forEach(node -> {
ClusterUtils.waitUntilConditionMet(5, TimeUnit.SECONDS,
() -> firstNode.getClusterCoordinator().getConnectionStatus(secondNode.getIdentifier()).getState() == NodeConnectionState.CONNECTED);
});
// Ensure that all 3 nodes see a cluster of 3 connected nodes.
Stream.of(firstNode, secondNode, thirdNode).forEach(node -> {
node.assertNodeConnects(firstNode.getIdentifier(), 10, TimeUnit.SECONDS);
node.assertNodeConnects(secondNode.getIdentifier(), 10, TimeUnit.SECONDS);
node.assertNodeConnects(thirdNode.getIdentifier(), 10, TimeUnit.SECONDS);
});
// Ensure that we get both a cluster coordinator and a primary node elected
cluster.waitForClusterCoordinator(10, TimeUnit.SECONDS);
cluster.waitForPrimaryNode(10, TimeUnit.SECONDS);
}
@Test(timeout = 30000)
public void testHeartbeatsMonitored() throws IOException {
final Node firstNode = cluster.createNode();
final Node secondNode = cluster.createNode();
cluster.waitUntilAllNodesConnected(10, TimeUnit.SECONDS);
final Node nodeToSuspend = firstNode;
final Node otherNode = secondNode;
nodeToSuspend.suspendHeartbeating();
// Heartbeat interval in nifi.properties is set to 1 sec. This means that the node should be kicked out
// due to lack of heartbeat after 8 times this amount of time, or 8 seconds.
otherNode.assertNodeDisconnects(nodeToSuspend.getIdentifier(), 12, TimeUnit.SECONDS);
nodeToSuspend.resumeHeartbeating();
otherNode.assertNodeConnects(nodeToSuspend.getIdentifier(), 10, TimeUnit.SECONDS);
}
@Test(timeout = 60000)
public void testNodeInheritsClusterTopologyOnHeartbeat() throws InterruptedException {
final Node node1 = cluster.createNode();
final Node node2 = cluster.createNode();
final Node node3 = cluster.createNode();
cluster.waitUntilAllNodesConnected(10, TimeUnit.SECONDS);
final Node coordinator = cluster.waitForClusterCoordinator(10, TimeUnit.SECONDS);
final NodeIdentifier node4NotReallyInCluster = new NodeIdentifier(UUID.randomUUID().toString(), "localhost", 9283, "localhost", 9284, "localhost", 9285, null, false, null);
final Map<NodeIdentifier, NodeConnectionStatus> replacementStatuses = new HashMap<>();
replacementStatuses.put(node1.getIdentifier(), new NodeConnectionStatus(node1.getIdentifier(), DisconnectionCode.USER_DISCONNECTED));
replacementStatuses.put(node4NotReallyInCluster, new NodeConnectionStatus(node4NotReallyInCluster, NodeConnectionState.CONNECTING));
// reset coordinator status so that other nodes with get its now-fake view of the cluster
coordinator.getClusterCoordinator().resetNodeStatuses(replacementStatuses);
final List<NodeConnectionStatus> expectedStatuses = coordinator.getClusterCoordinator().getConnectionStatuses();
// give nodes a bit to heartbeat in. We need to wait long enough that each node heartbeats.
// But we need to not wait more than 8 seconds because that's when nodes start getting kicked out.
Thread.sleep(6000L);
for (final Node node : new Node[] {node1, node2, node3}) {
assertEquals(expectedStatuses, node.getClusterCoordinator().getConnectionStatuses());
}
}
}