/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.discovery.zen; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.logging.log4j.util.Supplier; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.service.MasterService; import org.elasticsearch.cluster.service.MasterServiceTests; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.BaseFuture; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static java.util.Collections.shuffle; import static org.elasticsearch.cluster.ESAllocationTestCase.createAllocationService; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_CREATION_DATE; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_VERSION_CREATED; import static org.elasticsearch.cluster.routing.RoutingTableTests.updateActiveAllocations; import static org.elasticsearch.cluster.service.MasterServiceTests.discoveryState; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @TestLogging("org.elasticsearch.discovery.zen:TRACE,org.elasticsearch.cluster.service:TRACE") public class NodeJoinControllerTests extends ESTestCase { private static ThreadPool threadPool; private MasterService masterService; private NodeJoinController nodeJoinController; @BeforeClass public static void beforeClass() { threadPool = new TestThreadPool("NodeJoinControllerTests"); } @AfterClass public static void afterClass() { ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); threadPool = null; } @Before public void setUp() throws Exception { super.setUp(); } @After public void tearDown() throws Exception { super.tearDown(); masterService.close(); } private static ClusterState initialState(boolean withMaster) { DiscoveryNode localNode = new DiscoveryNode("node", ESTestCase.buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(Arrays.asList(DiscoveryNode.Role.values())),Version.CURRENT); ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName())) .nodes(DiscoveryNodes.builder() .add(localNode) .localNodeId(localNode.getId()) .masterNodeId(withMaster ? localNode.getId() : null)) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK).build(); return initialClusterState; } private void setupMasterServiceAndNodeJoinController(ClusterState initialState) { if (masterService != null || nodeJoinController != null) { throw new IllegalStateException("method setupMasterServiceAndNodeJoinController can only be called once"); } masterService = ClusterServiceUtils.createMasterService(threadPool, initialState); nodeJoinController = new NodeJoinController(masterService, createAllocationService(Settings.EMPTY), new ElectMasterService(Settings.EMPTY), Settings.EMPTY); } public void testSimpleJoinAccumulation() throws InterruptedException, ExecutionException { setupMasterServiceAndNodeJoinController(initialState(true)); List<DiscoveryNode> nodes = new ArrayList<>(); nodes.add(discoveryState(masterService).nodes().getLocalNode()); int nodeId = 0; for (int i = randomInt(5); i > 0; i--) { DiscoveryNode node = newNode(nodeId++); nodes.add(node); joinNode(node); } nodeJoinController.startElectionContext(); ArrayList<Future<Void>> pendingJoins = new ArrayList<>(); for (int i = randomInt(5); i > 0; i--) { DiscoveryNode node = newNode(nodeId++); nodes.add(node); pendingJoins.add(joinNodeAsync(node)); } nodeJoinController.stopElectionContext("test"); boolean hadSyncJoin = false; for (int i = randomInt(5); i > 0; i--) { DiscoveryNode node = newNode(nodeId++); nodes.add(node); joinNode(node); hadSyncJoin = true; } if (hadSyncJoin) { for (Future<Void> joinFuture : pendingJoins) { assertThat(joinFuture.isDone(), equalTo(true)); } } for (Future<Void> joinFuture : pendingJoins) { joinFuture.get(); } } public void testFailingJoinsWhenNotMaster() throws ExecutionException, InterruptedException { setupMasterServiceAndNodeJoinController(initialState(false)); int nodeId = 0; try { joinNode(newNode(nodeId++)); fail("failed to fail node join when not a master"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } logger.debug("--> testing joins fail post accumulation"); ArrayList<Future<Void>> pendingJoins = new ArrayList<>(); nodeJoinController.startElectionContext(); for (int i = 1 + randomInt(5); i > 0; i--) { DiscoveryNode node = newNode(nodeId++); final Future<Void> future = joinNodeAsync(node); pendingJoins.add(future); assertThat(future.isDone(), equalTo(false)); } nodeJoinController.stopElectionContext("test"); for (Future<Void> future : pendingJoins) { try { future.get(); fail("failed to fail accumulated node join when not a master"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } } } public void testSimpleMasterElectionWithoutRequiredJoins() throws InterruptedException, ExecutionException { setupMasterServiceAndNodeJoinController(initialState(false)); int nodeId = 0; final int requiredJoins = 0; logger.debug("--> using requiredJoins [{}]", requiredJoins); // initial (failing) joins shouldn't count for (int i = randomInt(5); i > 0; i--) { try { joinNode(newNode(nodeId++)); fail("failed to fail node join when not a master"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } } nodeJoinController.startElectionContext(); final SimpleFuture electionFuture = new SimpleFuture("master election"); final Thread masterElection = new Thread(new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.error("unexpected error from waitToBeElectedAsMaster", e); electionFuture.markAsFailed(e); } @Override protected void doRun() throws Exception { nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() { @Override public void onElectedAsMaster(ClusterState state) { assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true)); electionFuture.markAsDone(); } @Override public void onFailure(Throwable t) { logger.error("unexpected error while waiting to be elected as master", t); electionFuture.markAsFailed(t); } }); } }); masterElection.start(); logger.debug("--> requiredJoins is set to 0. verifying election finished"); electionFuture.get(); } public void testSimpleMasterElection() throws InterruptedException, ExecutionException { setupMasterServiceAndNodeJoinController(initialState(false)); int nodeId = 0; final int requiredJoins = 1 + randomInt(5); logger.debug("--> using requiredJoins [{}]", requiredJoins); // initial (failing) joins shouldn't count for (int i = randomInt(5); i > 0; i--) { try { joinNode(newNode(nodeId++)); fail("failed to fail node join when not a master"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } } nodeJoinController.startElectionContext(); final SimpleFuture electionFuture = new SimpleFuture("master election"); final Thread masterElection = new Thread(new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.error("unexpected error from waitToBeElectedAsMaster", e); electionFuture.markAsFailed(e); } @Override protected void doRun() throws Exception { nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() { @Override public void onElectedAsMaster(ClusterState state) { assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true)); electionFuture.markAsDone(); } @Override public void onFailure(Throwable t) { logger.error("unexpected error while waiting to be elected as master", t); electionFuture.markAsFailed(t); } }); } }); masterElection.start(); assertThat("election finished immediately but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false)); final int initialJoins = randomIntBetween(0, requiredJoins - 1); final ArrayList<SimpleFuture> pendingJoins = new ArrayList<>(); ArrayList<DiscoveryNode> nodesToJoin = new ArrayList<>(); for (int i = 0; i < initialJoins; i++) { DiscoveryNode node = newNode(nodeId++, true); for (int j = 1 + randomInt(3); j > 0; j--) { nodesToJoin.add(node); } } // data nodes shouldn't count for (int i = 0; i < requiredJoins; i++) { DiscoveryNode node = newNode(nodeId++, false); for (int j = 1 + randomInt(3); j > 0; j--) { nodesToJoin.add(node); } } // add shuffle(nodesToJoin, random()); logger.debug("--> joining [{}] unique master nodes. Total of [{}] join requests", initialJoins, nodesToJoin.size()); for (DiscoveryNode node : nodesToJoin) { pendingJoins.add(joinNodeAsync(node)); } logger.debug("--> asserting master election didn't finish yet"); assertThat("election finished after [" + initialJoins + "] master nodes but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false)); final int finalJoins = requiredJoins - initialJoins + randomInt(5); nodesToJoin.clear(); for (int i = 0; i < finalJoins; i++) { DiscoveryNode node = newNode(nodeId++, true); for (int j = 1 + randomInt(3); j > 0; j--) { nodesToJoin.add(node); } } for (int i = 0; i < requiredJoins; i++) { DiscoveryNode node = newNode(nodeId++, false); for (int j = 1 + randomInt(3); j > 0; j--) { nodesToJoin.add(node); } } shuffle(nodesToJoin, random()); logger.debug("--> joining [{}] nodes, with repetition a total of [{}]", finalJoins, nodesToJoin.size()); for (DiscoveryNode node : nodesToJoin) { pendingJoins.add(joinNodeAsync(node)); } logger.debug("--> waiting for master election to with no exception"); electionFuture.get(); logger.debug("--> waiting on all joins to be processed"); for (SimpleFuture future : pendingJoins) { logger.debug("waiting on {}", future); future.get(); // throw any exception } logger.debug("--> testing accumulation stopped"); nodeJoinController.startElectionContext(); nodeJoinController.stopElectionContext("test"); } public void testMasterElectionTimeout() throws InterruptedException { setupMasterServiceAndNodeJoinController(initialState(false)); int nodeId = 0; final int requiredJoins = 1 + randomInt(5); logger.debug("--> using requiredJoins [{}]", requiredJoins); // initial (failing) joins shouldn't count for (int i = randomInt(5); i > 0; i--) { try { joinNode(newNode(nodeId++)); fail("failed to fail node join when not a master"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } } nodeJoinController.startElectionContext(); final int initialJoins = randomIntBetween(0, requiredJoins - 1); final ArrayList<SimpleFuture> pendingJoins = new ArrayList<>(); ArrayList<DiscoveryNode> nodesToJoin = new ArrayList<>(); for (int i = 0; i < initialJoins; i++) { DiscoveryNode node = newNode(nodeId++); for (int j = 1 + randomInt(3); j > 0; j--) { nodesToJoin.add(node); } } shuffle(nodesToJoin, random()); logger.debug("--> joining [{}] nodes, with repetition a total of [{}]", initialJoins, nodesToJoin.size()); for (DiscoveryNode node : nodesToJoin) { pendingJoins.add(joinNodeAsync(node)); } final AtomicReference<Throwable> failure = new AtomicReference<>(); final CountDownLatch latch = new CountDownLatch(1); nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueMillis(1), new NodeJoinController.ElectionCallback() { @Override public void onElectedAsMaster(ClusterState state) { assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true)); latch.countDown(); } @Override public void onFailure(Throwable t) { failure.set(t); latch.countDown(); } }); latch.await(); logger.debug("--> verifying election timed out"); assertThat(failure.get(), instanceOf(NotMasterException.class)); logger.debug("--> verifying all joins are failed"); for (SimpleFuture future : pendingJoins) { logger.debug("waiting on {}", future); try { future.get(); // throw any exception fail("failed to fail node join [" + future + "]"); } catch (ExecutionException e) { assertThat(e.getCause(), instanceOf(NotMasterException.class)); } } } public void testNewClusterStateOnExistingNodeJoin() throws InterruptedException, ExecutionException { ClusterState state = initialState(true); final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(state.nodes()); final DiscoveryNode other_node = new DiscoveryNode("other_node", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); nodesBuilder.add(other_node); setupMasterServiceAndNodeJoinController(ClusterState.builder(state).nodes(nodesBuilder).build()); state = discoveryState(masterService); joinNode(other_node); assertTrue("failed to publish a new state upon existing join", discoveryState(masterService) != state); } public void testNormalConcurrentJoins() throws InterruptedException { setupMasterServiceAndNodeJoinController(initialState(true)); Thread[] threads = new Thread[3 + randomInt(5)]; ArrayList<DiscoveryNode> nodes = new ArrayList<>(); nodes.add(discoveryState(masterService).nodes().getLocalNode()); final CyclicBarrier barrier = new CyclicBarrier(threads.length); final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>(); for (int i = 0; i < threads.length; i++) { final DiscoveryNode node = newNode(i); final int iterations = rarely() ? randomIntBetween(1, 4) : 1; nodes.add(node); threads[i] = new Thread(new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.error("unexpected error in join thread", e); backgroundExceptions.add(e); } @Override protected void doRun() throws Exception { barrier.await(); for (int i = 0; i < iterations; i++) { logger.debug("{} joining", node); joinNode(node); } } }, "t_" + i); threads[i].start(); } logger.info("--> waiting for joins to complete"); for (Thread thread : threads) { thread.join(); } assertNodesInCurrentState(nodes); } public void testElectionWithConcurrentJoins() throws InterruptedException, BrokenBarrierException { setupMasterServiceAndNodeJoinController(initialState(false)); nodeJoinController.startElectionContext(); Thread[] threads = new Thread[3 + randomInt(5)]; final int requiredJoins = randomInt(threads.length); ArrayList<DiscoveryNode> nodes = new ArrayList<>(); nodes.add(discoveryState(masterService).nodes().getLocalNode()); final CyclicBarrier barrier = new CyclicBarrier(threads.length + 1); final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>(); for (int i = 0; i < threads.length; i++) { final DiscoveryNode node = newNode(i, true); final int iterations = rarely() ? randomIntBetween(1, 4) : 1; nodes.add(node); threads[i] = new Thread(new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.error("unexpected error in join thread", e); backgroundExceptions.add(e); } @Override protected void doRun() throws Exception { barrier.await(); for (int i = 0; i < iterations; i++) { logger.debug("{} joining", node); joinNode(node); } } }, "t_" + i); threads[i].start(); } barrier.await(); logger.info("--> waiting to be elected as master (required joins [{}])", requiredJoins); final AtomicReference<Throwable> failure = new AtomicReference<>(); final CountDownLatch latch = new CountDownLatch(1); nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() { @Override public void onElectedAsMaster(ClusterState state) { assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true)); latch.countDown(); } @Override public void onFailure(Throwable t) { logger.error("unexpected error while waiting to be elected as master", t); failure.set(t); latch.countDown(); } }); latch.await(); ExceptionsHelper.reThrowIfNotNull(failure.get()); logger.info("--> waiting for joins to complete"); for (Thread thread : threads) { thread.join(); } assertNodesInCurrentState(nodes); } public void testRejectingJoinWithSameAddressButDifferentId() throws InterruptedException, ExecutionException { addNodes(randomInt(5)); ClusterState state = discoveryState(masterService); final DiscoveryNode existing = randomFrom(StreamSupport.stream(state.nodes().spliterator(), false).collect(Collectors.toList())); final DiscoveryNode other_node = new DiscoveryNode("other_node", existing.getAddress(), emptyMap(), emptySet(), Version.CURRENT); ExecutionException e = expectThrows(ExecutionException.class, () -> joinNode(other_node)); assertThat(e.getMessage(), containsString("found existing node")); } public void testRejectingJoinWithSameIdButDifferentNode() throws InterruptedException, ExecutionException { addNodes(randomInt(5)); ClusterState state = discoveryState(masterService); final DiscoveryNode existing = randomFrom(StreamSupport.stream(state.nodes().spliterator(), false).collect(Collectors.toList())); final DiscoveryNode other_node = new DiscoveryNode( randomBoolean() ? existing.getName() : "other_name", existing.getId(), randomBoolean() ? existing.getAddress() : buildNewFakeTransportAddress(), randomBoolean() ? existing.getAttributes() : Collections.singletonMap("attr", "other"), randomBoolean() ? existing.getRoles() : new HashSet<>(randomSubsetOf(Arrays.asList(DiscoveryNode.Role.values()))), randomBoolean() ? existing.getVersion() : VersionUtils.randomVersion(random())); ExecutionException e = expectThrows(ExecutionException.class, () -> joinNode(other_node)); assertThat(e.getMessage(), containsString("found existing node")); } public void testRejectingRestartedNodeJoinsBeforeProcessingNodeLeft() throws InterruptedException, ExecutionException { addNodes(randomInt(5)); ClusterState state = discoveryState(masterService); final DiscoveryNode existing = randomFrom(StreamSupport.stream(state.nodes().spliterator(), false).collect(Collectors.toList())); joinNode(existing); // OK final DiscoveryNode other_node = new DiscoveryNode(existing.getId(), existing.getAddress(), existing.getAttributes(), existing.getRoles(), Version.CURRENT); ExecutionException e = expectThrows(ExecutionException.class, () -> joinNode(other_node)); assertThat(e.getMessage(), containsString("found existing node")); } /** * Tests tha node can become a master, even though the last cluster state it knows contains * nodes that conflict with the joins it got and needs to become a master */ public void testElectionBasedOnConflictingNodes() throws InterruptedException, ExecutionException { ClusterState initialState = initialState(true); final DiscoveryNode masterNode = initialState.nodes().getLocalNode(); final DiscoveryNode otherNode = new DiscoveryNode("other_node", buildNewFakeTransportAddress(), emptyMap(), EnumSet.allOf(DiscoveryNode.Role.class), Version.CURRENT); // simulate master going down with stale nodes in it's cluster state (for example when min master nodes is set to 2) // also add some shards to that node DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(initialState.nodes()); discoBuilder.masterNodeId(null); discoBuilder.add(otherNode); ClusterState.Builder stateBuilder = ClusterState.builder(initialState).nodes(discoBuilder); if (randomBoolean()) { IndexMetaData indexMetaData = IndexMetaData.builder("test").settings(Settings.builder() .put(SETTING_VERSION_CREATED, Version.CURRENT) .put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 1) .put(SETTING_CREATION_DATE, System.currentTimeMillis())).build(); IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(indexMetaData.getIndex()); RoutingTable.Builder routing = new RoutingTable.Builder(); routing.addAsNew(indexMetaData); final ShardId shardId = new ShardId("test", "_na_", 0); IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); final DiscoveryNode primaryNode = randomBoolean() ? masterNode : otherNode; final DiscoveryNode replicaNode = primaryNode.equals(masterNode) ? otherNode : masterNode; final boolean primaryStarted = randomBoolean(); indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting("test", 0, primaryNode.getId(), null, true, primaryStarted ? ShardRoutingState.STARTED : ShardRoutingState.INITIALIZING, primaryStarted ? null : new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, "getting there"))); if (primaryStarted) { boolean replicaStared = randomBoolean(); indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting("test", 0, replicaNode.getId(), null, false, replicaStared ? ShardRoutingState.STARTED : ShardRoutingState.INITIALIZING, replicaStared ? null : new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "getting there"))); } else { indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting("test", 0, null, null, false, ShardRoutingState.UNASSIGNED, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "life sucks"))); } indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build()); IndexRoutingTable indexRoutingTable = indexRoutingTableBuilder.build(); IndexMetaData updatedIndexMetaData = updateActiveAllocations(indexRoutingTable, indexMetaData); stateBuilder.metaData(MetaData.builder().put(updatedIndexMetaData, false).generateClusterUuidIfNeeded()) .routingTable(RoutingTable.builder().add(indexRoutingTable).build()); } setupMasterServiceAndNodeJoinController(stateBuilder.build()); // conflict on node id or address final DiscoveryNode conflictingNode = randomBoolean() ? new DiscoveryNode(otherNode.getId(), randomBoolean() ? otherNode.getAddress() : buildNewFakeTransportAddress(), otherNode.getAttributes(), otherNode.getRoles(), Version.CURRENT) : new DiscoveryNode("conflicting_address_node", otherNode.getAddress(), otherNode.getAttributes(), otherNode.getRoles(), Version.CURRENT); nodeJoinController.startElectionContext(); final SimpleFuture joinFuture = joinNodeAsync(conflictingNode); final CountDownLatch elected = new CountDownLatch(1); nodeJoinController.waitToBeElectedAsMaster(1, TimeValue.timeValueHours(5), new NodeJoinController.ElectionCallback() { @Override public void onElectedAsMaster(ClusterState state) { elected.countDown(); } @Override public void onFailure(Throwable t) { logger.error("failed to be elected as master", t); throw new AssertionError("failed to be elected as master", t); } }); elected.await(); joinFuture.get(); // throw any exception final ClusterState finalState = discoveryState(masterService); final DiscoveryNodes finalNodes = finalState.nodes(); assertTrue(finalNodes.isLocalNodeElectedMaster()); assertThat(finalNodes.getLocalNode(), equalTo(masterNode)); assertThat(finalNodes.getSize(), equalTo(2)); assertThat(finalNodes.get(conflictingNode.getId()), equalTo(conflictingNode)); List<ShardRouting> activeShardsOnRestartedNode = StreamSupport.stream(finalState.getRoutingNodes().node(conflictingNode.getId()).spliterator(), false) .filter(ShardRouting::active).collect(Collectors.toList()); assertThat(activeShardsOnRestartedNode, empty()); } private void addNodes(int count) { ClusterState state = initialState(true); final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(state.nodes()); for (int i = 0;i< count;i++) { final DiscoveryNode node = new DiscoveryNode("node_" + state.nodes().getSize() + i, buildNewFakeTransportAddress(), emptyMap(), new HashSet<>(randomSubsetOf(Arrays.asList(DiscoveryNode.Role.values()))), Version.CURRENT); nodesBuilder.add(node); } setupMasterServiceAndNodeJoinController(ClusterState.builder(state).nodes(nodesBuilder).build()); } protected void assertNodesInCurrentState(List<DiscoveryNode> expectedNodes) { final ClusterState state = discoveryState(masterService); logger.info("assert for [{}] in:\n{}", expectedNodes, state); DiscoveryNodes discoveryNodes = state.nodes(); for (DiscoveryNode node : expectedNodes) { assertThat("missing " + node + "\n" + discoveryNodes, discoveryNodes.get(node.getId()), equalTo(node)); } assertThat(discoveryNodes.getSize(), equalTo(expectedNodes.size())); } static class SimpleFuture extends BaseFuture<Void> { final String description; SimpleFuture(String description) { this.description = description; } public void markAsDone() { set(null); } public void markAsFailed(Throwable t) { setException(t); } @Override public String toString() { return "future [" + description + "]"; } } static final AtomicInteger joinId = new AtomicInteger(); private SimpleFuture joinNodeAsync(final DiscoveryNode node) throws InterruptedException { final SimpleFuture future = new SimpleFuture("join of " + node + " (id [" + joinId.incrementAndGet() + "]"); logger.debug("starting {}", future); // clone the node before submitting to simulate an incoming join, which is guaranteed to have a new // disco node object serialized off the network nodeJoinController.handleJoinRequest(cloneNode(node), new MembershipAction.JoinCallback() { @Override public void onSuccess() { logger.debug("{} completed", future); future.markAsDone(); } @Override public void onFailure(Exception e) { logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected error for {}", future), e); future.markAsFailed(e); } }); return future; } /** * creates an object clone of node, so it will be a different object instance */ private DiscoveryNode cloneNode(DiscoveryNode node) { return new DiscoveryNode(node.getName(), node.getId(), node.getEphemeralId(), node.getHostName(), node.getHostAddress(), node.getAddress(), node.getAttributes(), node.getRoles(), node.getVersion()); } private void joinNode(final DiscoveryNode node) throws InterruptedException, ExecutionException { joinNodeAsync(node).get(); } protected DiscoveryNode newNode(int i) { return newNode(i, randomBoolean()); } protected DiscoveryNode newNode(int i, boolean master) { Set<DiscoveryNode.Role> roles = new HashSet<>(); if (master) { roles.add(DiscoveryNode.Role.MASTER); } final String prefix = master ? "master_" : "data_"; return new DiscoveryNode(prefix + i, i + "", buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); } }