/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.test; import static junit.framework.Assert.fail; import static org.apache.lucene.util.LuceneTestCase.TEST_NIGHTLY; import static org.apache.lucene.util.LuceneTestCase.rarely; import static org.apache.lucene.util.LuceneTestCase.usually; import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.elasticsearch.test.ESTestCase.assertBusy; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.junit.Assert.assertThat; import java.io.Closeable; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URL; import java.net.URLClassLoader; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Random; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.service.ElassandraDaemon; import org.apache.cassandra.service.StorageService; import org.apache.lucene.store.StoreRateLimiting; import org.apache.lucene.util.IOUtils; import org.elasticsearch.Version; import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.cache.recycler.PageCacheRecycler; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.action.index.MappingUpdatedAction; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.OperationRouting; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.io.FileSystemUtils; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings.Builder; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.discovery.DiscoveryService; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.cache.IndexCacheModule; import org.elasticsearch.index.engine.CommitStats; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.EngineClosedException; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.MockEngineFactoryPlugin; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.indices.breaker.HierarchyCircuitBreakerService; import org.elasticsearch.indices.cache.request.IndicesRequestCache; import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.indices.store.IndicesStore; import org.elasticsearch.node.MockNode; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeMocksPlugin; import org.elasticsearch.node.internal.InternalSettingsPreparer; import org.elasticsearch.node.service.NodeService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.MockSearchService; import org.elasticsearch.search.SearchService; import org.elasticsearch.test.disruption.ServiceDisruptionScheme; import org.elasticsearch.test.store.MockFSIndexStore; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.netty.NettyTransport; import org.junit.Assert; import com.carrotsearch.randomizedtesting.RandomizedTest; import com.carrotsearch.randomizedtesting.SysGlobals; import com.carrotsearch.randomizedtesting.generators.RandomInts; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import com.carrotsearch.randomizedtesting.generators.RandomStrings; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.Collections2; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.net.InetAddresses; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; /** * ElassandraTestCluster is a singleton, a cluster used for all tests. * First node is created as a singleton by the ESSingleNodeTestCase. This first node is seed for all nodes created thereafter by starting a JVM. * First node listen on 127.0.0.1, other are listening on 127.0.0.n (n is increased when adding new nodes) * Clients are available to any nodes. * Between each tests, all configuration is removed and we check that all nodes are running. * Plugin class should be dynamically loaded on demand, but currently is loaded when node 1 is created. */ /** * InternalTestCluster manages a set of JVM private nodes and allows convenient access to them. * The cluster supports randomized configuration such that nodes started in the cluster will * automatically load asserting services tracking resources like file handles or open searchers. * <p> * The Cluster is bound to a test lifecycle where tests must call {@link #beforeTest(java.util.Random, double)} and * {@link #afterTest()} to initialize and reset the cluster in order to be more reproducible. The term "more" relates * to the async nature of Elasticsearch in combination with randomized testing. Once Threads and asynchronous calls * are involved reproducibility is very limited. This class should only be used through {@link ESIntegTestCase}. * </p> */ public final class InternalTestCluster extends TestCluster { private final ESLogger logger = Loggers.getLogger(getClass()); static NodeConfigurationSource DEFAULT_SETTINGS_SOURCE = NodeConfigurationSource.EMPTY; /** * A node level setting that holds a per node random seed that is consistent across node restarts */ public static final String SETTING_CLUSTER_NODE_SEED = "test.cluster.node.seed"; /** * The number of ports in the range used for this JVM */ public static final int PORTS_PER_JVM = 100; /** * The number of ports in the range used for this cluster */ public static final int PORTS_PER_CLUSTER = 20; private static final int GLOBAL_TRANSPORT_BASE_PORT = 9300; private static final int GLOBAL_HTTP_BASE_PORT = 19200; private static final int JVM_ORDINAL = Integer.parseInt(System.getProperty(SysGlobals.CHILDVM_SYSPROP_JVM_ID, "0")); /** a per-JVM unique offset to be used for calculating unique port ranges. */ public static final int JVM_BASE_PORT_OFFEST = PORTS_PER_JVM * (JVM_ORDINAL + 1); private static final AtomicInteger clusterOrdinal = new AtomicInteger(); private final int CLUSTER_BASE_PORT_OFFSET = JVM_BASE_PORT_OFFEST + (clusterOrdinal.getAndIncrement() * PORTS_PER_CLUSTER) % PORTS_PER_JVM; public final int TRANSPORT_BASE_PORT = GLOBAL_TRANSPORT_BASE_PORT + CLUSTER_BASE_PORT_OFFSET; public final int HTTP_BASE_PORT = GLOBAL_HTTP_BASE_PORT + CLUSTER_BASE_PORT_OFFSET; static final int DEFAULT_MIN_NUM_DATA_NODES = 1; static final int DEFAULT_MAX_NUM_DATA_NODES = TEST_NIGHTLY ? 6 : 3; static final int DEFAULT_NUM_CLIENT_NODES = -1; static final int DEFAULT_MIN_NUM_CLIENT_NODES = 0; static final int DEFAULT_MAX_NUM_CLIENT_NODES = 1; static final boolean DEFAULT_ENABLE_HTTP_PIPELINING = true; /* sorted map to make traverse order reproducible, concurrent since we do checks on it not within a sync block */ private final NavigableMap<String, NodeAndClient> nodes = new TreeMap<>(); private final Set<Path> dataDirToClean = new HashSet<>(); private final AtomicBoolean open = new AtomicBoolean(true); private Settings defaultSettings; private AtomicInteger nextNodeId = new AtomicInteger(0); private NodeConfigurationSource nodeConfigurationSource; private final ExecutorService executor; private boolean enableMockModules; /** * All nodes started by the cluster will have their name set to nodePrefix followed by a positive number */ private String nodePrefix; private Path baseDir; private ServiceDisruptionScheme activeDisruptionScheme; private int numberOfNodes = 1; private static InternalTestCluster CLUSTER = null; InternalTestCluster(ESSingleNodeTestCase rootTest) { super(0); executor = EsExecutors.newCached("test runner", 0, TimeUnit.SECONDS, EsExecutors.daemonThreadFactory("test_" + getClusterName())); this.defaultSettings = rootTest.nodeSettings(1); } public static InternalTestCluster getTestCluster(ESSingleNodeTestCase rootTest) { if (CLUSTER == null) { CLUSTER = new InternalTestCluster(rootTest); } return CLUSTER; } @Override public String getClusterName() { return DatabaseDescriptor.getClusterName(); } public String[] getNodeNames() { return StorageService.instance.getEndpointToHostId().values().stream().map(uuid -> uuid.toString()).toArray(String[]::new); } private Settings getSettings(int nodeOrdinal, long nodeSeed, Settings others) { Builder builder = Settings.settingsBuilder() .put(defaultSettings) .put(getRandomNodeSettings(nodeSeed)); Settings settings = nodeConfigurationSource.nodeSettings(nodeOrdinal); if (settings != null) { if (settings.get(ClusterName.SETTING) != null) { throw new IllegalStateException("Tests must not set a '" + ClusterName.SETTING + "' as a node setting set '" + ClusterName.SETTING + "': [" + settings.get(ClusterName.SETTING) + "]"); } builder.put(settings); } if (others != null) { builder.put(others); } builder.put(ClusterName.SETTING, getClusterName()); return builder.build(); } private Collection<Class<? extends Plugin>> getPlugins(long seed) { Set<Class<? extends Plugin>> plugins = new HashSet<>(); if (nodeConfigurationSource != null) plugins.addAll(nodeConfigurationSource.nodePlugins()); Random random = new Random(seed); if (enableMockModules && usually(random)) { plugins.add(MockTransportService.TestPlugin.class); plugins.add(MockFSIndexStore.TestPlugin.class); plugins.add(NodeMocksPlugin.class); plugins.add(MockEngineFactoryPlugin.class); plugins.add(MockSearchService.TestPlugin.class); /* if (isLocalTransportConfigured()) { plugins.add(AssertingLocalTransport.TestPlugin.class); } */ } return plugins; } private Settings getRandomNodeSettings(long seed) { Random random = new Random(seed); Builder builder = Settings.settingsBuilder() .put(SETTING_CLUSTER_NODE_SEED, seed); /* if (isLocalTransportConfigured() == false) { builder.put(Transport.TransportSettings.TRANSPORT_TCP_COMPRESS, rarely(random)); } */ if (random.nextBoolean()) { builder.put("cache.recycler.page.type", RandomPicks.randomFrom(random, PageCacheRecycler.Type.values())); } if (random.nextInt(10) == 0) { // 10% of the nodes have a very frequent check interval builder.put(SearchService.KEEPALIVE_INTERVAL_KEY, TimeValue.timeValueMillis(10 + random.nextInt(2000))); } else if (random.nextInt(10) != 0) { // 90% of the time - 10% of the time we don't set anything builder.put(SearchService.KEEPALIVE_INTERVAL_KEY, TimeValue.timeValueSeconds(10 + random.nextInt(5 * 60))); } if (random.nextBoolean()) { // sometimes set a builder.put(SearchService.DEFAULT_KEEPALIVE_KEY, TimeValue.timeValueSeconds(100 + random.nextInt(5 * 60))); } if (random.nextInt(10) == 0) { // node gets an extra cpu this time builder.put(EsExecutors.PROCESSORS, 1 + EsExecutors.boundedNumberOfProcessors(Settings.EMPTY)); } if (random.nextBoolean()) { if (random.nextBoolean()) { builder.put("indices.fielddata.cache.size", 1 + random.nextInt(1000), ByteSizeUnit.MB); } } // randomize netty settings if (random.nextBoolean()) { builder.put(NettyTransport.WORKER_COUNT, random.nextInt(3) + 1); builder.put(NettyTransport.CONNECTIONS_PER_NODE_RECOVERY, random.nextInt(2) + 1); builder.put(NettyTransport.CONNECTIONS_PER_NODE_BULK, random.nextInt(3) + 1); builder.put(NettyTransport.CONNECTIONS_PER_NODE_REG, random.nextInt(6) + 1); } if (random.nextBoolean()) { builder.put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT, new TimeValue(RandomInts.randomIntBetween(random, 10, 30), TimeUnit.SECONDS)); } if (random.nextInt(10) == 0) { builder.put(HierarchyCircuitBreakerService.REQUEST_CIRCUIT_BREAKER_TYPE_SETTING, "noop"); builder.put(HierarchyCircuitBreakerService.FIELDDATA_CIRCUIT_BREAKER_TYPE_SETTING, "noop"); } if (random.nextBoolean()) { builder.put(IndexCacheModule.QUERY_CACHE_TYPE, random.nextBoolean() ? IndexCacheModule.INDEX_QUERY_CACHE : IndexCacheModule.NONE_QUERY_CACHE); } if (random.nextBoolean()) { builder.put(IndexCacheModule.QUERY_CACHE_EVERYTHING, random.nextBoolean()); } if (random.nextBoolean()) { if (random.nextInt(10) == 0) { // do something crazy slow here builder.put(IndicesStore.INDICES_STORE_THROTTLE_MAX_BYTES_PER_SEC, new ByteSizeValue(RandomInts.randomIntBetween(random, 1, 10), ByteSizeUnit.MB)); } else { builder.put(IndicesStore.INDICES_STORE_THROTTLE_MAX_BYTES_PER_SEC, new ByteSizeValue(RandomInts.randomIntBetween(random, 10, 200), ByteSizeUnit.MB)); } } if (random.nextBoolean()) { builder.put(IndicesStore.INDICES_STORE_THROTTLE_TYPE, RandomPicks.randomFrom(random, StoreRateLimiting.Type.values())); } if (random.nextBoolean()) { if (random.nextInt(10) == 0) { // do something crazy slow here builder.put(RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC, new ByteSizeValue(RandomInts.randomIntBetween(random, 1, 10), ByteSizeUnit.MB)); } else { builder.put(RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC, new ByteSizeValue(RandomInts.randomIntBetween(random, 10, 200), ByteSizeUnit.MB)); } } if (random.nextBoolean()) { builder.put(RecoverySettings.INDICES_RECOVERY_COMPRESS, random.nextBoolean()); } if (random.nextBoolean()) { builder.put(IndicesRequestCache.INDICES_CACHE_QUERY_CONCURRENCY_LEVEL, RandomInts.randomIntBetween(random, 1, 32)); builder.put(IndicesFieldDataCache.FIELDDATA_CACHE_CONCURRENCY_LEVEL, RandomInts.randomIntBetween(random, 1, 32)); } if (random.nextBoolean()) { builder.put(NettyTransport.PING_SCHEDULE, RandomInts.randomIntBetween(random, 100, 2000) + "ms"); } if (random.nextBoolean()) { builder.put(ScriptService.SCRIPT_CACHE_SIZE_SETTING, RandomInts.randomIntBetween(random, -100, 2000)); } if (random.nextBoolean()) { builder.put(ScriptService.SCRIPT_CACHE_EXPIRE_SETTING, TimeValue.timeValueMillis(RandomInts.randomIntBetween(random, 750, 10000000))); } // always default delayed allocation to 0 to make sure we have tests are not delayed builder.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, 0); return builder.build(); } private void ensureOpen() { if (!open.get()) { throw new RuntimeException("Cluster is already closed"); } } private synchronized NodeAndClient getOrBuildRandomNode() { ensureOpen(); NodeAndClient randomNodeAndClient = getRandomNodeAndClient(); if (randomNodeAndClient != null) { return randomNodeAndClient; } NodeAndClient buildNode = buildNode(); buildNode.node().start(); publishNode(buildNode); return buildNode; } private synchronized NodeAndClient getRandomNodeAndClient() { Predicate<NodeAndClient> all = Predicates.alwaysTrue(); return getRandomNodeAndClient(all); } private synchronized NodeAndClient getRandomNodeAndClient(Predicate<NodeAndClient> predicate) { ensureOpen(); Collection<NodeAndClient> values = Collections2.filter(nodes.values(), predicate); if (!values.isEmpty()) { int whichOne = random.nextInt(values.size()); for (NodeAndClient nodeAndClient : values) { if (whichOne-- == 0) { return nodeAndClient; } } } return null; } /** * Ensures that at least <code>n</code> data nodes are present in the cluster. * if more nodes than <code>n</code> are present this method will not * stop any of the running nodes. */ @SuppressForbidden(reason="File used to fork jvms") public void ensureAtLeastNumDataNodes(int n) { if (this.numberOfNodes < n) { while (this.numberOfNodes < n) { // starting child nodes, once per minute for gossip logger.warn("Starting Elassandra node {}", numberOfNodes+1); String classpath = Arrays.stream( ((URLClassLoader)Thread.currentThread().getContextClassLoader()).getURLs()) .map(URL::getFile) .collect(Collectors.joining(File.pathSeparator)); List<String> cmd = new ArrayList<String>(); cmd.add( System.getProperty("java.home")+File.separator+"bin"+File.separator+"java" ); //cmd.add("-cp"); //cmd.add(classpath); for(Object p : System.getProperties().keySet()) if (!p.equals("cassandra.jmx.local.port")) cmd.add("-D"+p+"="+System.getProperty(p.toString())); cmd.add("-Dcassandra.jmx.local.port="+(7198+this.numberOfNodes)); cmd.add("-XX:+DisableExplicitGC"); cmd.add(ElassandraDaemon.class.getCanonicalName()); logger.warn("Starting Elassandra node #{}, cmd={}", numberOfNodes, String.join(" ", cmd)); try { Runtime.getRuntime().exec(cmd.toArray(new String[cmd.size()]), new String[] { "cassandra.node_ordinal="+this.numberOfNodes }, null); } catch (IOException e) { throw new RuntimeException(e); } try { Thread.sleep(61*1000); } catch (InterruptedException e) { throw new RuntimeException(e); } this.numberOfNodes++; } } logger.info("Cluster nodes = {}", String.join(",",getNodeNames())); assertThat(getNodeNames().length, org.hamcrest.Matchers.greaterThanOrEqualTo(n)); } /** * Ensures that at most <code>n</code> are up and running. * If less nodes that <code>n</code> are running this method * will not start any additional nodes. */ public synchronized void ensureAtMostNumDataNodes(int n) throws IOException { /* int size = numDataNodes(); if (size <= n) { return; } // prevent killing the master if possible and client nodes final Iterator<NodeAndClient> values = n == 0 ? nodes.values().iterator() : Iterators.filter(nodes.values().iterator(), Predicates.and(new DataNodePredicate(), Predicates.not(new MasterNodePredicate(getMasterName())))); final Iterator<NodeAndClient> limit = Iterators.limit(values, size - n); logger.info("changing cluster size from {} to {}, {} data nodes", size(), n + numSharedClientNodes, n); Set<NodeAndClient> nodesToRemove = new HashSet<>(); while (limit.hasNext()) { NodeAndClient next = limit.next(); nodesToRemove.add(next); //removeDisruptionSchemeFromNode(next); next.close(); } for (NodeAndClient toRemove : nodesToRemove) { nodes.remove(toRemove.name); } if (!nodesToRemove.isEmpty() && size() > 0) { assertNoTimeout(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(nodes.size())).get()); } */ } private NodeAndClient buildNode(Settings settings, Version version) { int ord = nextNodeId.getAndIncrement(); return buildNode(ord, random.nextLong(), settings, version); } private NodeAndClient buildNode() { int ord = nextNodeId.getAndIncrement(); return buildNode(ord, random.nextLong(), null, Version.CURRENT); } private NodeAndClient buildNode(int nodeId, long seed, Settings settings, Version version) { assert Thread.holdsLock(this); ensureOpen(); settings = getSettings(nodeId, seed, settings); Collection<Class<? extends Plugin>> plugins = getPlugins(seed); String name = buildNodeName(nodeId); assert !nodes.containsKey(name); Settings finalSettings = settingsBuilder() .put("path.home", baseDir) // allow overriding path.home .put(settings) .put("name", name) .put(DiscoveryService.SETTING_DISCOVERY_SEED, seed) .build(); MockNode node = new MockNode(finalSettings, version, plugins); return new NodeAndClient(name, node); } private String buildNodeName(int id) { return nodePrefix + id; } /** * Returns the common node name prefix for this test cluster. */ public String nodePrefix() { return nodePrefix; } @Override public synchronized Client client() { ensureOpen(); /* Randomly return a client to one of the nodes in the cluster */ return getOrBuildRandomNode().client(random); } /** * Returns a node client to a data node in the cluster. * Note: use this with care tests should not rely on a certain nodes client. */ public synchronized Client dataNodeClient() { ensureOpen(); /* Randomly return a client to one of the nodes in the cluster */ return getRandomNodeAndClient(new DataNodePredicate()).client(random); } /** * Returns a node client to the current master node. * Note: use this with care tests should not rely on a certain nodes client. */ public synchronized Client masterClient() { ensureOpen(); NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new MasterNodePredicate(getMasterName())); if (randomNodeAndClient != null) { return randomNodeAndClient.nodeClient(); // ensure node client master is requested } Assert.fail("No master client found"); return null; // can't happen } /** * Returns a node client to random node but not the master. This method will fail if no non-master client is available. */ public synchronized Client nonMasterClient() { ensureOpen(); NodeAndClient randomNodeAndClient = getRandomNodeAndClient(Predicates.not(new MasterNodePredicate(getMasterName()))); if (randomNodeAndClient != null) { return randomNodeAndClient.nodeClient(); // ensure node client non-master is requested } Assert.fail("No non-master client found"); return null; // can't happen } /** * Returns a client to a node started with "node.client: true" */ public synchronized Client clientNodeClient() { ensureOpen(); NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new ClientNodePredicate()); if (randomNodeAndClient != null) { return randomNodeAndClient.client(random); } int nodeId = nextNodeId.getAndIncrement(); Settings settings = getSettings(nodeId, random.nextLong(), Settings.EMPTY); startNodeClient(settings); return getRandomNodeAndClient(new ClientNodePredicate()).client(random); } public synchronized Client startNodeClient(Settings settings) { ensureOpen(); // currently unused Builder builder = settingsBuilder().put(settings).put("node.client", true); if (size() == 0) { // if we are the first node - don't wait for a state builder.put("discovery.initial_state_timeout", 0); } String name = startNode(builder); return nodes.get(name).nodeClient(); } /** * Returns a transport client */ public synchronized Client transportClient() { ensureOpen(); // randomly return a transport client going to one of the nodes in the cluster return getOrBuildRandomNode().transportClient(); } /** * Returns a node client to a given node. */ public synchronized Client client(String nodeName) { ensureOpen(); NodeAndClient nodeAndClient = nodes.get(nodeName); if (nodeAndClient != null) { return nodeAndClient.client(random); } Assert.fail("No node found with name: [" + nodeName + "]"); return null; // can't happen } /** * Returns a "smart" node client to a random node in the cluster */ public synchronized Client smartClient() { NodeAndClient randomNodeAndClient = getRandomNodeAndClient(); if (randomNodeAndClient != null) { return randomNodeAndClient.nodeClient(); } Assert.fail("No smart client found"); return null; // can't happen } /** * Returns a random node that applies to the given predicate. * The predicate can filter nodes based on the nodes settings. * If all nodes are filtered out this method will return <code>null</code> */ public synchronized Client client(final Predicate<Settings> filterPredicate) { ensureOpen(); final NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new Predicate<NodeAndClient>() { @Override public boolean apply(NodeAndClient nodeAndClient) { return filterPredicate.apply(nodeAndClient.node.settings()); } }); if (randomNodeAndClient != null) { return randomNodeAndClient.client(random); } return null; } @Override public void close() { if (this.open.compareAndSet(true, false)) { if (activeDisruptionScheme != null) { activeDisruptionScheme.testClusterClosed(); activeDisruptionScheme = null; } IOUtils.closeWhileHandlingException(nodes.values()); nodes.clear(); executor.shutdownNow(); } } public String getNodeMode() { return "network"; } private final class NodeAndClient implements Closeable { private MockNode node; private Client nodeClient; private Client transportClient; private final AtomicBoolean closed = new AtomicBoolean(false); private final String name; NodeAndClient(String name, MockNode node) { this.node = node; this.name = name; } Node node() { if (closed.get()) { throw new RuntimeException("already closed"); } return node; } Client client(Random random) { if (closed.get()) { throw new RuntimeException("already closed"); } double nextDouble = random.nextDouble(); if (nextDouble < transportClientRatio) { if (logger.isTraceEnabled()) { logger.trace("Using transport client for node [{}] sniff: [{}]", node.settings().get("name"), false); } return getOrBuildTransportClient(); } else { return getOrBuildNodeClient(); } } Client nodeClient() { if (closed.get()) { throw new RuntimeException("already closed"); } return getOrBuildNodeClient(); } Client transportClient() { if (closed.get()) { throw new RuntimeException("already closed"); } return getOrBuildTransportClient(); } private Client getOrBuildNodeClient() { if (nodeClient != null) { return nodeClient; } return nodeClient = node.client(); } private Client getOrBuildTransportClient() { if (transportClient != null) { return transportClient; } /* no sniff client for now - doesn't work will all tests since it might throw NoNodeAvailableException if nodes are shut down. * we first need support of transportClientRatio as annotations or so */ return transportClient = new TransportClientFactory(false, nodeConfigurationSource.transportClientSettings(), baseDir, "network", nodeConfigurationSource.transportClientPlugins()).client(node, getClusterName()); } void resetClient() throws IOException { if (closed.get() == false) { Releasables.close(nodeClient, transportClient); nodeClient = null; transportClient = null; } } void closeNode() { registerDataPath(); node.close(); } void restart(RestartCallback callback) throws Exception { assert callback != null; resetClient(); if (!node.isClosed()) { closeNode(); } Settings newSettings = callback.onNodeStopped(name); if (newSettings == null) { newSettings = Settings.EMPTY; } if (callback.clearData(name)) { NodeEnvironment nodeEnv = getInstanceFromNode(NodeEnvironment.class, node); if (nodeEnv.hasNodeFile()) { IOUtils.rm(nodeEnv.nodeDataPaths()); } } final long newIdSeed = node.settings().getAsLong(DiscoveryService.SETTING_DISCOVERY_SEED, 0l) + 1; // use a new seed to make sure we have new node id Settings finalSettings = Settings.builder().put(node.settings()).put(newSettings).put(DiscoveryService.SETTING_DISCOVERY_SEED, newIdSeed).build(); Collection<Class<? extends Plugin>> plugins = node.getPlugins(); Version version = node.getVersion(); node = new MockNode(finalSettings, version, plugins); node.start(); } void registerDataPath() { NodeEnvironment nodeEnv = getInstanceFromNode(NodeEnvironment.class, node); if (nodeEnv.hasNodeFile()) { dataDirToClean.addAll(Arrays.asList(nodeEnv.nodeDataPaths())); } } @Override public void close() throws IOException { resetClient(); closed.set(true); closeNode(); } } public static final String TRANSPORT_CLIENT_PREFIX = "transport_client_"; static class TransportClientFactory { private final boolean sniff; private final Settings settings; private final Path baseDir; private final String nodeMode; private final Collection<Class<? extends Plugin>> plugins; TransportClientFactory(boolean sniff, Settings settings, Path baseDir, String nodeMode, Collection<Class<? extends Plugin>> plugins) { this.sniff = sniff; this.settings = settings != null ? settings : Settings.EMPTY; this.baseDir = baseDir; this.nodeMode = nodeMode; this.plugins = plugins; } public Client client(Node node, String clusterName) { TransportAddress addr = node.injector().getInstance(TransportService.class).boundAddress().publishAddress(); Settings nodeSettings = node.settings(); Builder builder = settingsBuilder() .put("client.transport.nodes_sampler_interval", "1s") .put("path.home", baseDir) .put("name", TRANSPORT_CLIENT_PREFIX + node.settings().get("name")) .put(ClusterName.SETTING, clusterName).put("client.transport.sniff", sniff) .put("node.mode", nodeSettings.get("node.mode", nodeMode)) .put("node.local", nodeSettings.get("node.local", "")) .put("logger.prefix", nodeSettings.get("logger.prefix", "")) .put("logger.level", nodeSettings.get("logger.level", "INFO")) .put(InternalSettingsPreparer.IGNORE_SYSTEM_PROPERTIES_SETTING, true) .put(settings); TransportClient.Builder clientBuilder = TransportClient.builder().settings(builder.build()); for (Class<? extends Plugin> plugin : plugins) { clientBuilder.addPlugin(plugin); } TransportClient client = clientBuilder.build(); client.addTransportAddress(addr); return client; } } @Override public synchronized void beforeTest(Random random, double transportClientRatio) throws IOException, InterruptedException { super.beforeTest(random, transportClientRatio); reset(true); } private synchronized void reset(boolean wipeData) throws IOException { // clear all rules for mock transport services /* for (NodeAndClient nodeAndClient : nodes.values()) { TransportService transportService = nodeAndClient.node.injector().getInstance(TransportService.class); if (transportService instanceof MockTransportService) { final MockTransportService mockTransportService = (MockTransportService) transportService; mockTransportService.clearAllRules(); mockTransportService.clearTracers(); } } randomlyResetClients(); if (wipeData) { wipeDataDirectories(); } if (nextNodeId.get() == sharedNodesSeeds.length && nodes.size() == sharedNodesSeeds.length) { logger.debug("Cluster hasn't changed - moving out - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length); return; } logger.debug("Cluster is NOT consistent - restarting shared nodes - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length); Set<NodeAndClient> sharedNodes = new HashSet<>(); assert sharedNodesSeeds.length == numSharedDataNodes + numSharedClientNodes; boolean changed = false; for (int i = 0; i < numSharedDataNodes; i++) { String buildNodeName = buildNodeName(i); NodeAndClient nodeAndClient = nodes.get(buildNodeName); if (nodeAndClient == null) { changed = true; nodeAndClient = buildNode(i, sharedNodesSeeds[i], null, Version.CURRENT); nodeAndClient.node.start(); logger.info("Start Shared Node [{}] not shared", nodeAndClient.name); } sharedNodes.add(nodeAndClient); } for (int i = numSharedDataNodes; i < numSharedDataNodes + numSharedClientNodes; i++) { String buildNodeName = buildNodeName(i); NodeAndClient nodeAndClient = nodes.get(buildNodeName); if (nodeAndClient == null) { changed = true; Builder clientSettingsBuilder = Settings.builder().put("node.client", true); nodeAndClient = buildNode(i, sharedNodesSeeds[i], clientSettingsBuilder.build(), Version.CURRENT); nodeAndClient.node.start(); logger.info("Start Shared Node [{}] not shared", nodeAndClient.name); } sharedNodes.add(nodeAndClient); } if (!changed && sharedNodes.size() == nodes.size()) { logger.debug("Cluster is consistent - moving out - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length); if (size() > 0) { client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(sharedNodesSeeds.length)).get(); } return; // we are consistent - return } for (NodeAndClient nodeAndClient : sharedNodes) { nodes.remove(nodeAndClient.name); } // trash the remaining nodes final Collection<NodeAndClient> toShutDown = nodes.values(); for (NodeAndClient nodeAndClient : toShutDown) { logger.debug("Close Node [{}] not shared", nodeAndClient.name); nodeAndClient.close(); } nodes.clear(); for (NodeAndClient nodeAndClient : sharedNodes) { publishNode(nodeAndClient); } nextNodeId.set(sharedNodesSeeds.length); assert size() == sharedNodesSeeds.length; if (size() > 0) { client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(sharedNodesSeeds.length)).get(); } logger.debug("Cluster is consistent again - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length); */ } @Override public synchronized void afterTest() throws IOException { wipeDataDirectories(); randomlyResetClients(); /* reset all clients - each test gets its own client based on the Random instance created above. */ } @Override public void beforeIndexDeletion() { // Check that the operations counter on index shard has reached 0. // The assumption here is that after a test there are no ongoing write operations. // test that have ongoing write operations after the test (for example because ttl is used // and not all docs have been purged after the test) and inherit from // ElasticsearchIntegrationTest must override beforeIndexDeletion() to avoid failures. assertShardIndexCounter(); //check that shards that have same sync id also contain same number of documents assertSameSyncIdSameDocs(); } private void assertSameSyncIdSameDocs() { Map<String, Long> docsOnShards = new HashMap<>(); final Collection<NodeAndClient> nodesAndClients = nodes.values(); for (NodeAndClient nodeAndClient : nodesAndClients) { IndicesService indexServices = getInstance(IndicesService.class, nodeAndClient.name); for (IndexService indexService : indexServices) { for (IndexShard indexShard : indexService) { try { CommitStats commitStats = indexShard.engine().commitStats(); String syncId = commitStats.getUserData().get(Engine.SYNC_COMMIT_ID); if (syncId != null) { long liveDocsOnShard = commitStats.getNumDocs(); if (docsOnShards.get(syncId) != null) { assertThat("sync id is equal but number of docs does not match on node " + nodeAndClient.name + ". expected " + docsOnShards.get(syncId) + " but got " + liveDocsOnShard, docsOnShards.get(syncId), equalTo(liveDocsOnShard)); } else { docsOnShards.put(syncId, liveDocsOnShard); } } } catch (EngineClosedException e) { // nothing to do, shard is closed } } } } } private void assertShardIndexCounter() { final Collection<NodeAndClient> nodesAndClients = nodes.values(); for (NodeAndClient nodeAndClient : nodesAndClients) { IndicesService indexServices = getInstance(IndicesService.class, nodeAndClient.name); for (IndexService indexService : indexServices) { for (IndexShard indexShard : indexService) { assertThat("index shard counter on shard " + indexShard.shardId() + " on node " + nodeAndClient.name + " not 0", indexShard.getOperationsCount(), equalTo(0)); } } } } private void randomlyResetClients() throws IOException { // only reset the clients on nightly tests, it causes heavy load... if (RandomizedTest.isNightly() && rarely(random)) { final Collection<NodeAndClient> nodesAndClients = nodes.values(); for (NodeAndClient nodeAndClient : nodesAndClients) { nodeAndClient.resetClient(); } } } private void wipeDataDirectories() { if (!dataDirToClean.isEmpty()) { try { for (Path path : dataDirToClean) { try { FileSystemUtils.deleteSubDirectories(path); logger.info("Successfully wiped data directory for node location: {}", path); } catch (IOException e) { logger.info("Failed to wipe data directory for node location: {}", path); } } } finally { dataDirToClean.clear(); } } } /** * Returns a reference to a random node's {@link ClusterService} */ public ClusterService clusterService() { return clusterService(null); } /** * Returns a reference to a node's {@link ClusterService}. If the given node is null, a random node will be selected. */ public synchronized ClusterService clusterService(@Nullable String node) { return getInstance(ClusterService.class, node); } /** * Returns an Iterable to all instances for the given class >T< across all nodes in the cluster. */ public synchronized <T> Iterable<T> getInstances(Class<T> clazz) { List<T> instances = new ArrayList<>(nodes.size()); for (NodeAndClient nodeAndClient : nodes.values()) { instances.add(getInstanceFromNode(clazz, nodeAndClient.node)); } return instances; } /** * Returns an Iterable to all instances for the given class >T< across all data nodes in the cluster. */ public synchronized <T> Iterable<T> getDataNodeInstances(Class<T> clazz) { return getInstances(clazz, new DataNodePredicate()); } private synchronized <T> Iterable<T> getInstances(Class<T> clazz, Predicate<NodeAndClient> predicate) { Iterable<NodeAndClient> filteredNodes = Iterables.filter(nodes.values(), predicate); List<T> instances = new ArrayList<>(); for (NodeAndClient nodeAndClient : filteredNodes) { instances.add(getInstanceFromNode(clazz, nodeAndClient.node)); } return instances; } /** * Returns a reference to the given nodes instances of the given class >T< */ public synchronized <T> T getInstance(Class<T> clazz, final String node) { final Predicate<InternalTestCluster.NodeAndClient> predicate; if (node != null) { predicate = new Predicate<InternalTestCluster.NodeAndClient>() { @Override public boolean apply(NodeAndClient nodeAndClient) { return node.equals(nodeAndClient.name); } }; } else { predicate = Predicates.alwaysTrue(); } return getInstance(clazz, predicate); } public synchronized <T> T getDataNodeInstance(Class<T> clazz) { return getInstance(clazz, new DataNodePredicate()); } private synchronized <T> T getInstance(Class<T> clazz, Predicate<NodeAndClient> predicate) { NodeAndClient randomNodeAndClient = getRandomNodeAndClient(predicate); assert randomNodeAndClient != null; return getInstanceFromNode(clazz, randomNodeAndClient.node); } /** * Returns a reference to a random nodes instances of the given class >T< */ public synchronized <T> T getInstance(Class<T> clazz) { return getInstance(clazz, Predicates.<NodeAndClient>alwaysTrue()); } private synchronized <T> T getInstanceFromNode(Class<T> clazz, Node node) { return node.injector().getInstance(clazz); } @Override public synchronized int size() { return this.numberOfNodes; } @Override public InetSocketAddress[] httpAddresses() { List<InetSocketAddress> addresses = new ArrayList<>(); for(int i=1; i <= this.numberOfNodes; i++) addresses.add(new InetSocketAddress(InetAddresses.forString("127.0.0."+i), 9200)); /* for (HttpServerTransport httpServerTransport : getInstances(HttpServerTransport.class)) { addresses.add(((InetSocketTransportAddress) httpServerTransport.boundAddress().publishAddress()).address()); } */ return addresses.toArray(new InetSocketAddress[addresses.size()]); } /** * Stops a random data node in the cluster. Returns true if a node was found to stop, false otherwise. */ public synchronized boolean stopRandomDataNode() throws IOException { ensureOpen(); NodeAndClient nodeAndClient = getRandomNodeAndClient(new DataNodePredicate()); if (nodeAndClient != null) { logger.info("Closing random node [{}] ", nodeAndClient.name); //removeDisruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); return true; } return false; } /** * Stops a random node in the cluster that applies to the given filter or non if the non of the nodes applies to the * filter. */ public synchronized void stopRandomNode(final Predicate<Settings> filter) throws IOException { ensureOpen(); NodeAndClient nodeAndClient = getRandomNodeAndClient(new Predicate<InternalTestCluster.NodeAndClient>() { @Override public boolean apply(NodeAndClient nodeAndClient) { return filter.apply(nodeAndClient.node.settings()); } }); if (nodeAndClient != null) { logger.info("Closing filtered random node [{}] ", nodeAndClient.name); //removeDisruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); } } /** * Stops the current master node forcefully */ public synchronized void stopCurrentMasterNode() throws IOException { ensureOpen(); assert size() > 0; String masterNodeName = getMasterName(); assert nodes.containsKey(masterNodeName); logger.info("Closing master node [{}] ", masterNodeName); //removeDisruptionSchemeFromNode(nodes.get(masterNodeName)); NodeAndClient remove = nodes.remove(masterNodeName); remove.close(); } /** * Stops the any of the current nodes but not the master node. */ public void stopRandomNonMasterNode() throws IOException { NodeAndClient nodeAndClient = getRandomNodeAndClient(Predicates.not(new MasterNodePredicate(getMasterName()))); if (nodeAndClient != null) { logger.info("Closing random non master node [{}] current master [{}] ", nodeAndClient.name, getMasterName()); //removeDisruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); } } /** * Restarts a random node in the cluster */ public void restartRandomNode() throws Exception { restartRandomNode(EMPTY_CALLBACK); } /** * Restarts a random node in the cluster and calls the callback during restart. */ public void restartRandomNode(RestartCallback callback) throws Exception { restartRandomNode(Predicates.<NodeAndClient>alwaysTrue(), callback); } /** * Restarts a random data node in the cluster */ public void restartRandomDataNode() throws Exception { restartRandomDataNode(EMPTY_CALLBACK); } /** * Restarts a random data node in the cluster and calls the callback during restart. */ public void restartRandomDataNode(RestartCallback callback) throws Exception { restartRandomNode(new DataNodePredicate(), callback); } /** * Restarts a random node in the cluster and calls the callback during restart. */ private void restartRandomNode(Predicate<NodeAndClient> predicate, RestartCallback callback) throws Exception { ensureOpen(); NodeAndClient nodeAndClient = getRandomNodeAndClient(predicate); if (nodeAndClient != null) { logger.info("Restarting random node [{}] ", nodeAndClient.name); nodeAndClient.restart(callback); } } /** * Restarts a node and calls the callback during restart. */ public void restartNode(String nodeName, RestartCallback callback) throws Exception { ensureOpen(); NodeAndClient nodeAndClient = nodes.get(nodeName); if (nodeAndClient != null) { logger.info("Restarting node [{}] ", nodeAndClient.name); nodeAndClient.restart(callback); } } private void restartAllNodes(boolean rollingRestart, RestartCallback callback) throws Exception { /* ensureOpen(); List<NodeAndClient> toRemove = new ArrayList<>(); try { for (NodeAndClient nodeAndClient : nodes.values()) { if (!callback.doRestart(nodeAndClient.name)) { logger.info("Closing node [{}] during restart", nodeAndClient.name); toRemove.add(nodeAndClient); if (activeDisruptionScheme != null) { activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); } nodeAndClient.close(); } } } finally { for (NodeAndClient nodeAndClient : toRemove) { nodes.remove(nodeAndClient.name); } } logger.info("Restarting remaining nodes rollingRestart [{}]", rollingRestart); if (rollingRestart) { int numNodesRestarted = 0; for (NodeAndClient nodeAndClient : nodes.values()) { callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient()); logger.info("Restarting node [{}] ", nodeAndClient.name); if (activeDisruptionScheme != null) { activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); } nodeAndClient.restart(callback); if (activeDisruptionScheme != null) { activeDisruptionScheme.applyToNode(nodeAndClient.name, this); } } } else { int numNodesRestarted = 0; for (NodeAndClient nodeAndClient : nodes.values()) { callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient()); logger.info("Stopping node [{}] ", nodeAndClient.name); if (activeDisruptionScheme != null) { activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); } nodeAndClient.closeNode(); } for (NodeAndClient nodeAndClient : nodes.values()) { logger.info("Starting node [{}] ", nodeAndClient.name); if (activeDisruptionScheme != null) { activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); } nodeAndClient.restart(callback); if (activeDisruptionScheme != null) { activeDisruptionScheme.applyToNode(nodeAndClient.name, this); } } } */ } public static final RestartCallback EMPTY_CALLBACK = new RestartCallback() { @Override public Settings onNodeStopped(String node) { return null; } }; /** * Restarts all nodes in the cluster. It first stops all nodes and then restarts all the nodes again. */ public void fullRestart() throws Exception { fullRestart(EMPTY_CALLBACK); } /** * Restarts all nodes in a rolling restart fashion ie. only restarts on node a time. */ public void rollingRestart() throws Exception { rollingRestart(EMPTY_CALLBACK); } /** * Restarts all nodes in a rolling restart fashion ie. only restarts on node a time. */ public void rollingRestart(RestartCallback function) throws Exception { restartAllNodes(true, function); } /** * Restarts all nodes in the cluster. It first stops all nodes and then restarts all the nodes again. */ public void fullRestart(RestartCallback function) throws Exception { restartAllNodes(false, function); } /** * Returns the name of the current master node in the cluster. */ public String getMasterName() { return getMasterName(null); } /** * Returns the name of the current master node in the cluster and executes the request via the node specified * in the viaNode parameter. If viaNode isn't specified a random node will be picked to the send the request to. */ public String getMasterName(@Nullable String viaNode) { try { Client client = viaNode != null ? client(viaNode) : client(); ClusterState state = client.admin().cluster().prepareState().execute().actionGet().getState(); return state.nodes().masterNode().name(); } catch (Throwable e) { logger.warn("Can't fetch cluster state", e); throw new RuntimeException("Can't get master node " + e.getMessage(), e); } } synchronized Set<String> allDataNodesButN(int numNodes) { return nRandomDataNodes(numDataNodes() - numNodes); } private synchronized Set<String> nRandomDataNodes(int numNodes) { assert size() >= numNodes; NavigableMap<String, NodeAndClient> dataNodes = Maps.filterEntries(nodes, new EntryNodePredicate(new DataNodePredicate())); return Sets.newHashSet(Iterators.limit(dataNodes.keySet().iterator(), numNodes)); } /** * Returns a set of nodes that have at least one shard of the given index. */ public synchronized Set<String> nodesInclude(String index) { if (clusterService().state().routingTable().hasIndex(index)) { List<ShardRouting> allShards = clusterService().state().routingTable().allShards(index); DiscoveryNodes discoveryNodes = clusterService().state().getNodes(); Set<String> nodes = new HashSet<>(); for (ShardRouting shardRouting : allShards) { if (shardRouting.assignedToNode()) { DiscoveryNode discoveryNode = discoveryNodes.get(shardRouting.currentNodeId()); nodes.add(discoveryNode.getName()); } } return nodes; } return Collections.emptySet(); } /** * Starts a node with default settings and returns it's name. */ public synchronized String startNode() { return startNode(Settings.EMPTY, Version.CURRENT); } /** * Starts a node with default settings ad the specified version and returns it's name. */ public synchronized String startNode(Version version) { return startNode(Settings.EMPTY, version); } /** * Starts a node with the given settings builder and returns it's name. */ public synchronized String startNode(Settings.Builder settings) { return startNode(settings.build(), Version.CURRENT); } /** * Starts a node with the given settings and returns it's name. */ public synchronized String startNode(Settings settings) { return startNode(settings, Version.CURRENT); } /** * Starts a node with the given settings and version and returns it's name. */ public synchronized String startNode(Settings settings, Version version) { /* NodeAndClient buildNode = buildNode(settings, version); buildNode.node().start(); publishNode(buildNode); return buildNode.name; */ return ESSingleNodeTestCase.nodeName(); } public synchronized ListenableFuture<List<String>> startMasterOnlyNodesAsync(int numNodes) { return startMasterOnlyNodesAsync(numNodes, Settings.EMPTY); } public synchronized ListenableFuture<List<String>> startMasterOnlyNodesAsync(int numNodes, Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", true).put("node.data", false).build(); return startNodesAsync(numNodes, settings1, Version.CURRENT); } public synchronized ListenableFuture<List<String>> startDataOnlyNodesAsync(int numNodes) { return startDataOnlyNodesAsync(numNodes, Settings.EMPTY); } public synchronized ListenableFuture<List<String>> startDataOnlyNodesAsync(int numNodes, Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", false).put("node.data", true).build(); return startNodesAsync(numNodes, settings1, Version.CURRENT); } public synchronized ListenableFuture<String> startMasterOnlyNodeAsync() { return startMasterOnlyNodeAsync(Settings.EMPTY); } public synchronized ListenableFuture<String> startMasterOnlyNodeAsync(Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", true).put("node.data", false).build(); return startNodeAsync(settings1, Version.CURRENT); } public synchronized String startMasterOnlyNode(Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", true).put("node.data", false).build(); return startNode(settings1, Version.CURRENT); } public synchronized ListenableFuture<String> startDataOnlyNodeAsync() { return startDataOnlyNodeAsync(Settings.EMPTY); } public synchronized ListenableFuture<String> startDataOnlyNodeAsync(Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", false).put("node.data", true).build(); return startNodeAsync(settings1, Version.CURRENT); } public synchronized String startDataOnlyNode(Settings settings) { Settings settings1 = Settings.builder().put(settings).put("node.master", false).put("node.data", true).build(); return startNode(settings1, Version.CURRENT); } /** * Starts a node in an async manner with the given settings and returns future with its name. */ public synchronized ListenableFuture<String> startNodeAsync() { return startNodeAsync(Settings.EMPTY, Version.CURRENT); } /** * Starts a node in an async manner with the given settings and returns future with its name. */ public synchronized ListenableFuture<String> startNodeAsync(final Settings settings) { return startNodeAsync(settings, Version.CURRENT); } /** * Starts a node in an async manner with the given settings and version and returns future with its name. */ public synchronized ListenableFuture<String> startNodeAsync(final Settings settings, final Version version) { final SettableFuture<String> future = SettableFuture.create(); final NodeAndClient buildNode = buildNode(settings, version); Runnable startNode = new Runnable() { @Override public void run() { try { buildNode.node().start(); publishNode(buildNode); future.set(buildNode.name); } catch (Throwable t) { future.setException(t); } } }; executor.execute(startNode); return future; } /** * Starts multiple nodes in an async manner and returns future with its name. */ public synchronized ListenableFuture<List<String>> startNodesAsync(final int numNodes) { return startNodesAsync(numNodes, Settings.EMPTY, Version.CURRENT); } /** * Starts multiple nodes in an async manner with the given settings and returns future with its name. */ public synchronized ListenableFuture<List<String>> startNodesAsync(final int numNodes, final Settings settings) { return startNodesAsync(numNodes, settings, Version.CURRENT); } /** * Starts multiple nodes in an async manner with the given settings and version and returns future with its name. */ public synchronized ListenableFuture<List<String>> startNodesAsync(final int numNodes, final Settings settings, final Version version) { List<ListenableFuture<String>> futures = new ArrayList<>(); for (int i = 0; i < numNodes; i++) { futures.add(startNodeAsync(settings, version)); } return Futures.allAsList(futures); } /** * Starts multiple nodes (based on the number of settings provided) in an async manner, with explicit settings for each node. * The order of the node names returned matches the order of the settings provided. */ public synchronized ListenableFuture<List<String>> startNodesAsync(final Settings... settings) { List<ListenableFuture<String>> futures = new ArrayList<>(); for (Settings setting : settings) { futures.add(startNodeAsync(setting, Version.CURRENT)); } return Futures.allAsList(futures); } private synchronized void publishNode(NodeAndClient nodeAndClient) { assert !nodeAndClient.node().isClosed(); NodeEnvironment nodeEnv = getInstanceFromNode(NodeEnvironment.class, nodeAndClient.node); if (nodeEnv.hasNodeFile()) { dataDirToClean.addAll(Arrays.asList(nodeEnv.nodeDataPaths())); } nodes.put(nodeAndClient.name, nodeAndClient); //applyDisruptionSchemeToNode(nodeAndClient); } public void closeNonSharedNodes(boolean wipeData) throws IOException { reset(wipeData); } @Override public int numDataNodes() { return dataNodeAndClients().size(); } @Override public int numDataAndMasterNodes() { return dataAndMasterNodes().size(); } public void setDisruptionScheme(ServiceDisruptionScheme scheme) { clearDisruptionScheme(); //scheme.applyToCluster(this); activeDisruptionScheme = scheme; } public void clearDisruptionScheme() { if (activeDisruptionScheme != null) { TimeValue expectedHealingTime = activeDisruptionScheme.expectedTimeToHeal(); logger.info("Clearing active scheme {}, expected healing time {}", activeDisruptionScheme, expectedHealingTime); //activeDisruptionScheme.removeAndEnsureHealthy(this); } activeDisruptionScheme = null; } /* private void applyDisruptionSchemeToNode(NodeAndClient nodeAndClient) { if (activeDisruptionScheme != null) { assert nodes.containsKey(nodeAndClient.name); activeDisruptionScheme.applyToNode(nodeAndClient.name, this); } } private void removeDisruptionSchemeFromNode(NodeAndClient nodeAndClient) { if (activeDisruptionScheme != null) { assert nodes.containsKey(nodeAndClient.name); activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); } } */ private synchronized Collection<NodeAndClient> dataNodeAndClients() { return Collections2.filter(nodes.values(), new DataNodePredicate()); } private synchronized Collection<NodeAndClient> dataAndMasterNodes() { return Collections2.filter(nodes.values(), new DataOrMasterNodePredicate()); } private static final class DataNodePredicate implements Predicate<NodeAndClient> { @Override public boolean apply(NodeAndClient nodeAndClient) { return DiscoveryNode.dataNode(nodeAndClient.node.settings()); } } private static final class DataOrMasterNodePredicate implements Predicate<NodeAndClient> { @Override public boolean apply(NodeAndClient nodeAndClient) { return DiscoveryNode.dataNode(nodeAndClient.node.settings()) || DiscoveryNode.masterNode(nodeAndClient.node.settings()); } } private static final class MasterNodePredicate implements Predicate<NodeAndClient> { private final String masterNodeName; public MasterNodePredicate(String masterNodeName) { this.masterNodeName = masterNodeName; } @Override public boolean apply(NodeAndClient nodeAndClient) { return masterNodeName.equals(nodeAndClient.name); } } private static final class ClientNodePredicate implements Predicate<NodeAndClient> { @Override public boolean apply(NodeAndClient nodeAndClient) { return DiscoveryNode.clientNode(nodeAndClient.node.settings()); } } private static final class EntryNodePredicate implements Predicate<Map.Entry<String, NodeAndClient>> { private final Predicate<NodeAndClient> delegateNodePredicate; EntryNodePredicate(Predicate<NodeAndClient> delegateNodePredicate) { this.delegateNodePredicate = delegateNodePredicate; } @Override public boolean apply(Map.Entry<String, NodeAndClient> entry) { return delegateNodePredicate.apply(entry.getValue()); } } synchronized String routingKeyForShard(String index, String type, int shard, Random random) { assertThat(shard, greaterThanOrEqualTo(0)); assertThat(shard, greaterThanOrEqualTo(0)); for (NodeAndClient n : nodes.values()) { Node node = n.node; IndicesService indicesService = getInstanceFromNode(IndicesService.class, node); ClusterService clusterService = getInstanceFromNode(ClusterService.class, node); IndexService indexService = indicesService.indexService(index); if (indexService != null) { assertThat(indexService.indexSettings().getAsInt(IndexMetaData.SETTING_NUMBER_OF_SHARDS, -1), greaterThan(shard)); OperationRouting operationRouting = indexService.injector().getInstance(OperationRouting.class); while (true) { String routing = RandomStrings.randomAsciiOfLength(random, 10); final int targetShard = operationRouting.indexShards(clusterService.state(), index, type, null, routing).shardId().getId(); if (shard == targetShard) { return routing; } } } } fail("Could not find a node that holds " + index); return null; } @Override public synchronized Iterator<Client> iterator() { ensureOpen(); final Iterator<NodeAndClient> iterator = nodes.values().iterator(); return new Iterator<Client>() { @Override public boolean hasNext() { return iterator.hasNext(); } @Override public Client next() { return iterator.next().client(random); } @Override public void remove() { throw new UnsupportedOperationException(""); } }; } /** * Returns a predicate that only accepts settings of nodes with one of the given names. */ public static Predicate<Settings> nameFilter(String... nodeName) { return new NodeNamePredicate(new HashSet<>(Arrays.asList(nodeName))); } private static final class NodeNamePredicate implements Predicate<Settings> { private final HashSet<String> nodeNames; public NodeNamePredicate(HashSet<String> nodeNames) { this.nodeNames = nodeNames; } @Override public boolean apply(Settings settings) { return nodeNames.contains(settings.get("name")); } } /** * An abstract class that is called during {@link #rollingRestart(InternalTestCluster.RestartCallback)} * and / or {@link #fullRestart(InternalTestCluster.RestartCallback)} to execute actions at certain * stages of the restart. */ public static class RestartCallback { /** * Executed once the give node name has been stopped. */ public Settings onNodeStopped(String nodeName) throws Exception { return Settings.EMPTY; } /** * Executed for each node before the <tt>n+1</tt> node is restarted. The given client is * an active client to the node that will be restarted next. */ public void doAfterNodes(int n, Client client) throws Exception { } /** * If this returns <code>true</code> all data for the node with the given node name will be cleared including * gateways and all index data. Returns <code>false</code> by default. */ public boolean clearData(String nodeName) { return false; } /** * If this returns <code>false</code> the node with the given node name will not be restarted. It will be * closed and removed from the cluster. Returns <code>true</code> by default. */ public boolean doRestart(String nodeName) { return true; } } public Settings getDefaultSettings() { return defaultSettings; } @Override public void ensureEstimatedStats() { if (size() > 0) { // Checks that the breakers have been reset without incurring a // network request, because a network request can increment one // of the breakers for (NodeAndClient nodeAndClient : nodes.values()) { final IndicesFieldDataCache fdCache = getInstanceFromNode(IndicesFieldDataCache.class, nodeAndClient.node); // Clean up the cache, ensuring that entries' listeners have been called fdCache.getCache().cleanUp(); final String name = nodeAndClient.name; final CircuitBreakerService breakerService = getInstanceFromNode(CircuitBreakerService.class, nodeAndClient.node); CircuitBreaker fdBreaker = breakerService.getBreaker(CircuitBreaker.FIELDDATA); assertThat("Fielddata breaker not reset to 0 on node: " + name, fdBreaker.getUsed(), equalTo(0L)); // Anything that uses transport or HTTP can increase the // request breaker (because they use bigarrays), because of // that the breaker can sometimes be incremented from ping // requests from other clusters because Jenkins is running // multiple ES testing jobs in parallel on the same machine. // To combat this we check whether the breaker has reached 0 // in an assertBusy loop, so it will try for 10 seconds and // fail if it never reached 0 try { assertBusy(new Runnable() { @Override public void run() { CircuitBreaker reqBreaker = breakerService.getBreaker(CircuitBreaker.REQUEST); assertThat("Request breaker not reset to 0 on node: " + name, reqBreaker.getUsed(), equalTo(0L)); } }); } catch (Exception e) { fail("Exception during check for request breaker reset to 0: " + e); } NodeService nodeService = getInstanceFromNode(NodeService.class, nodeAndClient.node); NodeStats stats = nodeService.stats(CommonStatsFlags.ALL, false, false, false, false, false, false, false, false, false); assertThat("Fielddata size must be 0 on node: " + stats.getNode(), stats.getIndices().getFieldData().getMemorySizeInBytes(), equalTo(0l)); assertThat("Query cache size must be 0 on node: " + stats.getNode(), stats.getIndices().getQueryCache().getMemorySizeInBytes(), equalTo(0l)); assertThat("FixedBitSet cache size must be 0 on node: " + stats.getNode(), stats.getIndices().getSegments().getBitsetMemoryInBytes(), equalTo(0l)); } } } @Override public void assertAfterTest() throws IOException { super.assertAfterTest(); assertRequestsFinished(); for (NodeEnvironment env : this.getInstances(NodeEnvironment.class)) { Set<ShardId> shardIds = env.lockedShards(); for (ShardId id : shardIds) { try { env.shardLock(id, TimeUnit.SECONDS.toMillis(5)).close(); } catch (IOException ex) { fail("Shard " + id + " is still locked after 5 sec waiting"); } } } } private void assertRequestsFinished() { if (size() > 0) { for (final NodeAndClient nodeAndClient : nodes.values()) { final CircuitBreaker inFlightRequestsBreaker = getInstance(HierarchyCircuitBreakerService.class, nodeAndClient.name) .getBreaker(CircuitBreaker.IN_FLIGHT_REQUESTS); try { // see #ensureEstimatedStats() assertBusy(new Runnable() { @Override public void run() { // ensure that our size accounting on transport level is reset properly long bytesUsed = inFlightRequestsBreaker.getUsed(); assertThat("All incoming requests on node [" + nodeAndClient.name + "] should have finished. Expected 0 but got " + bytesUsed, bytesUsed, equalTo(0L)); } }); } catch (Exception e) { logger.error("Could not assert finished requests within timeout", e); fail("Could not assert finished requests within timeout on node [" + nodeAndClient.name + "]"); } } } } public String unicastHosts() { StringBuilder b = new StringBuilder(); boolean first = true; for (NodeAndClient node: nodes.values()) { if (first) { first = false; } else { b.append(','); } b.append("localhost:").append(node.node().injector().getInstance(TransportService.class).boundAddress().publishAddress().getPort()); } return b.toString(); } @Override protected Settings settingsForRandomRepoPath() { return getDefaultSettings(); } }