package org.apache.solr.cloud;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.servlet.SolrDispatchFilter;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
/**
* Test sync phase that occurs when Leader goes down and a new Leader is
* elected.
*/
@Slow
public class SyncSliceTest extends AbstractFullDistribZkTestBase {
@BeforeClass
public static void beforeSuperClass() {
}
@AfterClass
public static void afterSuperClass() {
}
@Before
@Override
public void setUp() throws Exception {
super.setUp();
// we expect this time of exception as shards go up and down...
//ignoreException(".*");
System.setProperty("numShards", Integer.toString(sliceCount));
}
@Override
@After
public void tearDown() throws Exception {
super.tearDown();
resetExceptionIgnores();
}
public SyncSliceTest() {
super();
sliceCount = 1;
shardCount = TEST_NIGHTLY ? 7 : 4;
}
@Override
public void doTest() throws Exception {
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
waitForThingsToLevelOut(15);
del("*:*");
List<String> skipServers = new ArrayList<String>();
int docId = 0;
indexDoc(skipServers, id, docId++, i1, 50, tlong, 50, t1,
"to come to the aid of their country.");
indexDoc(skipServers, id, docId++, i1, 50, tlong, 50, t1,
"old haven was blue.");
skipServers.add(shardToJetty.get("shard1").get(1).url + "/");
indexDoc(skipServers, id, docId++, i1, 50, tlong, 50, t1,
"but the song was fancy.");
skipServers.add(shardToJetty.get("shard1").get(2).url + "/");
indexDoc(skipServers, id,docId++, i1, 50, tlong, 50, t1,
"under the moon and over the lake");
commit();
waitForRecoveriesToFinish(false);
// shard should be inconsistent
String shardFailMessage = checkShardConsistency("shard1", true);
assertNotNull(shardFailMessage);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("action", CollectionAction.SYNCSHARD.toString());
params.set("collection", "collection1");
params.set("shard", "shard1");
SolrRequest request = new QueryRequest(params);
request.setPath("/admin/collections");
String baseUrl = ((HttpSolrServer) shardToJetty.get("shard1").get(2).client.solrClient)
.getBaseURL();
baseUrl = baseUrl.substring(0, baseUrl.length() - "collection1".length());
HttpSolrServer baseServer = new HttpSolrServer(baseUrl);
baseServer.request(request);
waitForThingsToLevelOut(15);
checkShardConsistency(false, true);
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertEquals(4, cloudClientDocs);
// kill the leader - new leader could have all the docs or be missing one
CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
skipServers = getRandomOtherJetty(leaderJetty, null); // but not the leader
// this doc won't be on one node
indexDoc(skipServers, id, docId++, i1, 50, tlong, 50, t1,
"to come to the aid of their country.");
Set<CloudJettyRunner> jetties = new HashSet<CloudJettyRunner>();
jetties.addAll(shardToJetty.get("shard1"));
jetties.remove(leaderJetty);
assertEquals(shardCount - 1, jetties.size());
chaosMonkey.killJetty(leaderJetty);
// we are careful to make sure the downed node is no longer in the state,
// because on some systems (especially freebsd w/ blackhole enabled), trying
// to talk to a downed node causes grief
waitToSeeDownInClusterState(leaderJetty, jetties);
waitForThingsToLevelOut(15);
checkShardConsistency(false, true);
cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertEquals(5, cloudClientDocs);
CloudJettyRunner deadJetty = leaderJetty;
// let's get the latest leader
while (deadJetty == leaderJetty) {
updateMappingsFromZk(this.jettys, this.clients);
leaderJetty = shardToLeaderJetty.get("shard1");
}
// bring back dead node
ChaosMonkey.start(deadJetty.jetty); // he is not the leader anymore
// give a moment to be sure it has started recovering
Thread.sleep(2000);
waitForThingsToLevelOut(15);
waitForRecoveriesToFinish(false);
skipServers = getRandomOtherJetty(leaderJetty, null);
skipServers.addAll( getRandomOtherJetty(leaderJetty, null));
// skip list should be
//System.out.println("leader:" + leaderJetty.url);
//System.out.println("skip list:" + skipServers);
// we are skipping one nodes
assertEquals(2, skipServers.size());
// more docs than can peer sync
for (int i = 0; i < 300; i++) {
indexDoc(skipServers, id, docId++, i1, 50, tlong, 50, t1,
"to come to the aid of their country.");
}
commit();
waitForRecoveriesToFinish(false);
// shard should be inconsistent
shardFailMessage = checkShardConsistency("shard1", true);
assertNotNull(shardFailMessage);
jetties = new HashSet<CloudJettyRunner>();
jetties.addAll(shardToJetty.get("shard1"));
jetties.remove(leaderJetty);
assertEquals(shardCount - 1, jetties.size());
// kill the current leader
chaosMonkey.killJetty(leaderJetty);
waitToSeeDownInClusterState(leaderJetty, jetties);
Thread.sleep(4000);
waitForRecoveriesToFinish(false);
checkShardConsistency(true, true);
}
private List<String> getRandomJetty() {
return getRandomOtherJetty(null, null);
}
private List<String> getRandomOtherJetty(CloudJettyRunner leader, CloudJettyRunner down) {
List<String> skipServers = new ArrayList<String>();
List<CloudJettyRunner> candidates = new ArrayList<CloudJettyRunner>();
candidates.addAll(shardToJetty.get("shard1"));
if (leader != null) {
candidates.remove(leader);
}
if (down != null) {
candidates.remove(down);
}
CloudJettyRunner cjetty = candidates.get(random().nextInt(candidates.size()));
skipServers.add(cjetty.url + "/");
return skipServers;
}
private void waitToSeeDownInClusterState(CloudJettyRunner leaderJetty,
Set<CloudJettyRunner> jetties) throws InterruptedException {
for (CloudJettyRunner cjetty : jetties) {
waitToSeeNotLive(((SolrDispatchFilter) cjetty.jetty.getDispatchFilter()
.getFilter()).getCores().getZkController().getZkStateReader(),
leaderJetty);
}
waitToSeeNotLive(cloudClient.getZkStateReader(), leaderJetty);
}
protected void indexDoc(List<String> skipServers, Object... fields) throws IOException,
SolrServerException {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
addFields(doc, "rnd_b", true);
controlClient.add(doc);
UpdateRequest ureq = new UpdateRequest();
ureq.add(doc);
ModifiableSolrParams params = new ModifiableSolrParams();
for (String skip : skipServers) {
params.add("test.distrib.skip.servers", skip);
}
ureq.setParams(params);
ureq.process(cloudClient);
}
// skip the randoms - they can deadlock...
protected void indexr(Object... fields) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
addFields(doc, "rnd_b", true);
indexDoc(doc);
}
}