/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.JSONTestUtil;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.MockCoreContainer.MockCoreDescriptor;
import org.apache.solr.util.RTimer;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simulates HTTP partitions between a leader and replica but the replica does
* not lose its ZooKeeper connection.
*/
@Slow
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// To prevent the test assertions firing too fast before cluster state
// recognizes (and propagates) partitions
protected static final long sleepMsBeforeHealPartition = 300;
// give plenty of time for replicas to recover when running in slow Jenkins test envs
protected static final int maxWaitSecsToSeeAllActive = 90;
private final boolean onlyLeaderIndexes = random().nextBoolean();
public HttpPartitionTest() {
super();
sliceCount = 2;
fixShardCount(3);
}
@Override
protected int getRealtimeReplicas() {
return onlyLeaderIndexes? 1 : -1;
}
/**
* We need to turn off directUpdatesToLeadersOnly due to SOLR-9512
*/
@Override
protected CloudSolrClient createCloudClient(String defaultCollection) {
CloudSolrClient client = new CloudSolrClient.Builder()
.withZkHost(zkServer.getZkAddress())
.sendDirectUpdatesToAnyShardReplica()
.build();
client.setParallelUpdates(random().nextBoolean());
if (defaultCollection != null) client.setDefaultCollection(defaultCollection);
client.getLbClient().setConnectionTimeout(30000);
client.getLbClient().setSoTimeout(60000);
return client;
}
/**
* Overrides the parent implementation to install a SocketProxy in-front of the Jetty server.
*/
@Override
public JettySolrRunner createJetty(File solrHome, String dataDir,
String shardList, String solrConfigOverride, String schemaOverride)
throws Exception
{
return createProxiedJetty(solrHome, dataDir, shardList, solrConfigOverride, schemaOverride);
}
@Test
public void test() throws Exception {
waitForThingsToLevelOut(30000);
testLeaderInitiatedRecoveryCRUD();
// Tests that if we set a minRf that's not satisfied, no recovery is requested, but if minRf is satisfied,
// recovery is requested
testMinRf();
waitForThingsToLevelOut(30000);
// test a 1x2 collection
testRf2();
waitForThingsToLevelOut(30000);
// now do similar for a 1x3 collection while taking 2 replicas on-and-off
// each time
testRf3();
waitForThingsToLevelOut(30000);
// have the leader lose its Zk session temporarily
testLeaderZkSessionLoss();
waitForThingsToLevelOut(30000);
log.info("HttpPartitionTest succeeded ... shutting down now!");
}
/**
* Tests handling of lir state znodes.
*/
protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
String testCollectionName = "c8n_crud_1x2";
String shardId = "shard1";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
CoreContainer cores = leaderJetty.getCoreContainer();
ZkController zkController = cores.getZkController();
assertNotNull("ZkController is null", zkController);
Replica notLeader =
ensureAllReplicasAreActive(testCollectionName, shardId, 1, 2, maxWaitSecsToSeeAllActive).get(0);
ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
String replicaUrl = replicaCoreNodeProps.getCoreUrl();
MockCoreDescriptor cd = new MockCoreDescriptor() {
public CloudDescriptor getCloudDescriptor() {
return new CloudDescriptor(leader.getStr(ZkStateReader.CORE_NAME_PROP), new Properties(), this) {
@Override
public String getCoreNodeName() {
return leader.getName();
}
@Override
public boolean isLeader() {
return true;
}
};
}
};
zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
Map<String,Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
// test old non-json format handling
SolrZkClient zkClient = zkController.getZkClient();
String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(testCollectionName, shardId, notLeader.getName());
zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
zkClient.delete(znodePath, -1, false);
// try to clean up
try {
new CollectionAdminRequest.Delete()
.setCollectionName(testCollectionName).process(cloudClient);
} catch (Exception e) {
// don't fail the test
log.warn("Could not delete collection {} after test completed", testCollectionName);
}
}
protected void testMinRf() throws Exception {
// create a collection that has 1 shard and 3 replicas
String testCollectionName = "collMinRf_1x3";
createCollection(testCollectionName, 1, 3, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1, 2);
List<Replica> notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName
+ " but found " + notLeaders.size() + "; clusterState: "
+ printClusterStateInfo(testCollectionName),
notLeaders.size() == 2);
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
// Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
proxy0.close();
// indexing during a partition
int achievedRf = sendDoc(2, 2);
assertEquals("Unexpected achieved replication factor", 2, achievedRf);
Thread.sleep(sleepMsBeforeHealPartition);
// Verify that the partitioned replica is DOWN
ZkStateReader zkr = cloudClient.getZkStateReader();
zkr.forceUpdateCollection(testCollectionName);; // force the state to be fresh
ClusterState cs = zkr.getClusterState();
Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
Slice slice = slices.iterator().next();
Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
assertEquals("The partitioned replica did not get marked down",
Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
proxy0.reopen();
notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
// Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
// Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
proxy0 = getProxyForReplica(notLeaders.get(0));
proxy0.close();
SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
proxy1.close();
achievedRf = sendDoc(3, 2);
assertEquals("Unexpected achieved replication factor", 1, achievedRf);
Thread.sleep(sleepMsBeforeHealPartition);
// Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
proxy0.reopen();
proxy1.reopen();
notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
// Check that doc 3 is on the leader but not on the notLeaders
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
assertDocExists(leaderSolr, testCollectionName, "3");
}
for (Replica notLeader : notLeaders) {
try (HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName)) {
assertDocNotExists(notLeaderSolr, testCollectionName, "3");
}
}
// Retry sending doc 3
achievedRf = sendDoc(3, 2);
assertEquals("Unexpected achieved replication factor", 3, achievedRf);
// Now doc 3 should be on all replicas
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
}
protected void testRf2() throws Exception {
// create a collection that has 1 shard but 2 replicas
String testCollectionName = "c8n_1x2";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
Replica notLeader =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive).get(0);
// ok, now introduce a network partition between the leader and the replica
SocketProxy proxy = getProxyForReplica(notLeader);
proxy.close();
// indexing during a partition
sendDoc(2);
// Have the partition last at least 1 sec
// While this gives the impression that recovery is timing related, this is
// really only
// to give time for the state to be written to ZK before the test completes.
// In other words,
// without a brief pause, the test finishes so quickly that it doesn't give
// time for the recovery process to kick-in
Thread.sleep(sleepMsBeforeHealPartition);
proxy.reopen();
List<Replica> notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
sendDoc(3);
// sent 3 docs in so far, verify they are on the leader and replica
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
// Get the max version from the replica core to make sure it gets updated after recovery (see SOLR-7625)
JettySolrRunner replicaJetty = getJettyOnPort(getReplicaPort(notLeader));
CoreContainer coreContainer = replicaJetty.getCoreContainer();
ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
String coreName = replicaCoreNodeProps.getCoreName();
Long maxVersionBefore = null;
try (SolrCore core = coreContainer.getCore(coreName)) {
assertNotNull("Core '"+coreName+"' not found for replica: "+notLeader.getName(), core);
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
maxVersionBefore = ulog.getCurrentMaxVersion();
}
assertNotNull("max version bucket seed not set for core " + coreName, maxVersionBefore);
log.info("Looked up max version bucket seed "+maxVersionBefore+" for core "+coreName);
// now up the stakes and do more docs
int numDocs = TEST_NIGHTLY ? 1000 : 100;
boolean hasPartition = false;
for (int d = 0; d < numDocs; d++) {
// create / restore partition every 100 docs
if (d % 10 == 0) {
if (hasPartition) {
proxy.reopen();
hasPartition = false;
} else {
if (d >= 10) {
proxy.close();
hasPartition = true;
Thread.sleep(sleepMsBeforeHealPartition);
}
}
}
sendDoc(d + 4); // 4 is offset as we've already indexed 1-3
}
// restore connectivity if lost
if (hasPartition) {
proxy.reopen();
}
notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
try (SolrCore core = coreContainer.getCore(coreName)) {
assertNotNull("Core '" + coreName + "' not found for replica: " + notLeader.getName(), core);
Long currentMaxVersion = core.getUpdateHandler().getUpdateLog().getCurrentMaxVersion();
log.info("After recovery, looked up NEW max version bucket seed " + currentMaxVersion +
" for core " + coreName + ", was: " + maxVersionBefore);
assertTrue("max version bucket seed not updated after recovery!", currentMaxVersion > maxVersionBefore);
}
// verify all docs received
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, numDocs + 3);
log.info("testRf2 succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
new CollectionAdminRequest.Delete()
.setCollectionName(testCollectionName).process(cloudClient);
} catch (Exception e) {
// don't fail the test
log.warn("Could not delete collection {} after test completed", testCollectionName);
}
}
protected void testRf3() throws Exception {
// create a collection that has 1 shard but 2 replicas
String testCollectionName = "c8n_1x3";
createCollectionRetry(testCollectionName, 1, 3, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
List<Replica> notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
assertTrue("Expected 2 replicas for collection " + testCollectionName
+ " but found " + notLeaders.size() + "; clusterState: "
+ printClusterStateInfo(testCollectionName),
notLeaders.size() == 2);
// ok, now introduce a network partition between the leader and the replica
SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
proxy0.close();
// indexing during a partition
sendDoc(2);
Thread.sleep(sleepMsBeforeHealPartition);
proxy0.reopen();
SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
proxy1.close();
sendDoc(3);
Thread.sleep(sleepMsBeforeHealPartition);
proxy1.reopen();
// sent 4 docs in so far, verify they are on the leader and replica
notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
sendDoc(4);
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 4);
log.info("testRf3 succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
req.setCollectionName(testCollectionName);
req.process(cloudClient);
} catch (Exception e) {
// don't fail the test
log.warn("Could not delete collection {} after test completed", testCollectionName);
}
}
// test inspired by SOLR-6511
protected void testLeaderZkSessionLoss() throws Exception {
String testCollectionName = "c8n_1x2_leader_session_loss";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
List<Replica> notLeaders =
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
assertTrue("Expected 1 replicas for collection " + testCollectionName
+ " but found " + notLeaders.size() + "; clusterState: "
+ printClusterStateInfo(testCollectionName),
notLeaders.size() == 1);
Replica leader =
cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
String leaderNode = leader.getNodeName();
assertNotNull("Could not find leader for shard1 of "+
testCollectionName+"; clusterState: "+printClusterStateInfo(testCollectionName), leader);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(2));
doc.addField("a_t", "hello" + 2);
// cause leader migration by expiring the current leader's zk session
chaosMonkey.expireSession(leaderJetty);
String expectedNewLeaderCoreNodeName = notLeaders.get(0).getName();
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
String currentLeaderName = null;
try {
Replica currentLeader =
cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
currentLeaderName = currentLeader.getName();
} catch (Exception exc) {}
if (expectedNewLeaderCoreNodeName.equals(currentLeaderName))
break; // new leader was elected after zk session expiration
Thread.sleep(500);
}
Replica currentLeader =
cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
assertEquals(expectedNewLeaderCoreNodeName, currentLeader.getName());
// TODO: This test logic seems to be timing dependent and fails on Jenkins
// need to come up with a better approach
log.info("Sending doc 2 to old leader "+leader.getName());
try ( HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
leaderSolr.add(doc);
leaderSolr.close();
// if the add worked, then the doc must exist on the new leader
try (HttpSolrClient newLeaderSolr = getHttpSolrClient(currentLeader, testCollectionName)) {
assertDocExists(newLeaderSolr, testCollectionName, "2");
}
} catch (SolrException exc) {
// this is ok provided the doc doesn't exist on the current leader
try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
client.add(doc); // this should work
}
}
List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
Set<String> replicasToCheck = new HashSet<>();
for (Replica stillUp : participatingReplicas)
replicasToCheck.add(stillUp.getName());
waitToSeeReplicasActive(testCollectionName, "shard1", replicasToCheck, 20);
assertDocsExistInAllReplicas(participatingReplicas, testCollectionName, 1, 2);
log.info("testLeaderZkSessionLoss succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
req.setCollectionName(testCollectionName);
req.process(cloudClient);
} catch (Exception e) {
// don't fail the test
log.warn("Could not delete collection {} after test completed", testCollectionName);
}
}
protected List<Replica> getActiveOrRecoveringReplicas(String testCollectionName, String shardId) throws Exception {
Map<String,Replica> activeReplicas = new HashMap<String,Replica>();
ZkStateReader zkr = cloudClient.getZkStateReader();
ClusterState cs = zkr.getClusterState();
assertNotNull(cs);
for (Slice shard : cs.getActiveSlices(testCollectionName)) {
if (shard.getName().equals(shardId)) {
for (Replica replica : shard.getReplicas()) {
final Replica.State state = replica.getState();
if (state == Replica.State.ACTIVE || state == Replica.State.RECOVERING) {
activeReplicas.put(replica.getName(), replica);
}
}
}
}
List<Replica> replicas = new ArrayList<Replica>();
replicas.addAll(activeReplicas.values());
return replicas;
}
protected void assertDocsExistInAllReplicas(List<Replica> notLeaders,
String testCollectionName, int firstDocId, int lastDocId)
throws Exception {
Replica leader =
cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
List<HttpSolrClient> replicas =
new ArrayList<HttpSolrClient>(notLeaders.size());
for (Replica r : notLeaders) {
replicas.add(getHttpSolrClient(r, testCollectionName));
}
try {
for (int d = firstDocId; d <= lastDocId; d++) {
String docId = String.valueOf(d);
assertDocExists(leaderSolr, testCollectionName, docId);
for (HttpSolrClient replicaSolr : replicas) {
assertDocExists(replicaSolr, testCollectionName, docId);
}
}
} finally {
if (leaderSolr != null) {
leaderSolr.close();
}
for (HttpSolrClient replicaSolr : replicas) {
replicaSolr.close();
}
}
}
protected HttpSolrClient getHttpSolrClient(Replica replica, String coll) throws Exception {
ZkCoreNodeProps zkProps = new ZkCoreNodeProps(replica);
String url = zkProps.getBaseUrl() + "/" + coll;
return getHttpSolrClient(url);
}
protected int sendDoc(int docId) throws Exception {
return sendDoc(docId, null);
}
protected int sendDoc(int docId, Integer minRf) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(docId));
doc.addField("a_t", "hello" + docId);
UpdateRequest up = new UpdateRequest();
if (minRf != null) {
up.setParam(UpdateRequest.MIN_REPFACT, String.valueOf(minRf));
}
up.add(doc);
return cloudClient.getMinAchievedReplicationFactor(cloudClient.getDefaultCollection(), cloudClient.request(up));
}
/**
* Query the real-time get handler for a specific doc by ID to verify it
* exists in the provided server, using distrib=false so it doesn't route to another replica.
*/
@SuppressWarnings("rawtypes")
protected void assertDocExists(HttpSolrClient solr, String coll, String docId) throws Exception {
NamedList rsp = realTimeGetDocId(solr, docId);
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
assertTrue("Doc with id=" + docId + " not found in " + solr.getBaseURL()
+ " due to: " + match + "; rsp="+rsp, match == null);
}
protected void assertDocNotExists(HttpSolrClient solr, String coll, String docId) throws Exception {
NamedList rsp = realTimeGetDocId(solr, docId);
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
assertTrue("Doc with id=" + docId + " is found in " + solr.getBaseURL()
+ " due to: " + match + "; rsp="+rsp, match != null);
}
private NamedList realTimeGetDocId(HttpSolrClient solr, String docId) throws SolrServerException, IOException {
QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
return solr.request(qr);
}
protected int getReplicaPort(Replica replica) {
String replicaNode = replica.getNodeName();
String tmp = replicaNode.substring(replicaNode.indexOf(':')+1);
if (tmp.indexOf('_') != -1)
tmp = tmp.substring(0,tmp.indexOf('_'));
return Integer.parseInt(tmp);
}
protected void waitToSeeReplicasActive(String testCollectionName, String shardId, Set<String> replicasToCheck, int maxWaitSecs) throws Exception {
final RTimer timer = new RTimer();
ZkStateReader zkr = cloudClient.getZkStateReader();
zkr.forceUpdateCollection(testCollectionName);
ClusterState cs = zkr.getClusterState();
Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
boolean allReplicasUp = false;
long waitMs = 0L;
long maxWaitMs = maxWaitSecs * 1000L;
while (waitMs < maxWaitMs && !allReplicasUp) {
cs = cloudClient.getZkStateReader().getClusterState();
assertNotNull(cs);
Slice shard = cs.getSlice(testCollectionName, shardId);
assertNotNull("No Slice for "+shardId, shard);
allReplicasUp = true; // assume true
// wait to see all replicas are "active"
for (Replica replica : shard.getReplicas()) {
if (!replicasToCheck.contains(replica.getName()))
continue;
final Replica.State state = replica.getState();
if (state != Replica.State.ACTIVE) {
log.info("Replica " + replica.getName() + " is currently " + state);
allReplicasUp = false;
}
}
if (!allReplicasUp) {
try {
Thread.sleep(200L);
} catch (Exception ignoreMe) {}
waitMs += 200L;
}
} // end while
if (!allReplicasUp)
fail("Didn't see replicas "+ replicasToCheck +
" come up within " + maxWaitMs + " ms! ClusterState: " + printClusterStateInfo(testCollectionName));
log.info("Took {} ms to see replicas [{}] become active.", timer.getTime(), replicasToCheck);
}
}