/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.zookeeper.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.log4j.Logger; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZKTestCase; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.server.ZKDatabase; import org.apache.zookeeper.server.quorum.Leader; import org.apache.zookeeper.test.ClientBase.CountdownWatcher; import org.junit.Test; public class FollowerResyncConcurrencyTest extends ZKTestCase { volatile int counter = 0; volatile int errors = 0; private static final Logger LOG = Logger.getLogger(FollowerResyncConcurrencyTest.class); public static final long CONNECTION_TIMEOUT = ClientTest.CONNECTION_TIMEOUT; /** * See ZOOKEEPER-962. This tests for one of the bugs hit while fixing this, * setting the ZXID of the SNAP packet * Starts up 3 ZKs. Shut down F1, write a node, restart the one that was shut down * The non-leader ZKs are writing to cluster * Shut down F1 again * Restart after sessions are expired, expect to get a snap file * Shut down, run some transactions through. * Restart to a diff while transactions are running in leader * @throws IOException * @throws InterruptedException * @throws KeeperException */ @Test public void testResyncBySnapThenDiffAfterFollowerCrashes () throws IOException, InterruptedException, KeeperException, Throwable{ final Semaphore sem = new Semaphore(0); QuorumUtil qu = new QuorumUtil(1); qu.startAll(); CountdownWatcher watcher1 = new CountdownWatcher(); CountdownWatcher watcher2 = new CountdownWatcher(); CountdownWatcher watcher3 = new CountdownWatcher(); int index = 1; while(qu.getPeer(index).peer.leader == null) index++; Leader leader = qu.getPeer(index).peer.leader; assertNotNull(leader); /* * Reusing the index variable to select a follower to connect to */ index = (index == 1) ? 2 : 1; qu.shutdown(index); final ZooKeeper zk3 = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(3).peer.getClientPort(), 1000,watcher3); watcher3.waitForConnected(CONNECTION_TIMEOUT); zk3.create("/mybar", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL); qu.restart(index); ZooKeeper zk = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(index).peer.getClientPort(), 1000, watcher1); ZooKeeper zk2 = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(index).peer.getClientPort(), 1000, watcher2); watcher1.waitForConnected(CONNECTION_TIMEOUT); watcher2.waitForConnected(CONNECTION_TIMEOUT); zk.create("/first", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); Thread t = new Thread(new Runnable() { @Override public void run() { for(int i = 0; i < 1000; i++) { zk3.create("/mytestfoo", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter == 14200){ sem.release(); } } }, null); if(i%10==0){ try { Thread.sleep(100); } catch (Exception e) { } } } } }); for(int i = 0; i < 13000; i++) { zk3.create("/mybar", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter == 14200){ sem.release(); } } }, null); if(i == 5000){ qu.shutdown(index); LOG.info("Shutting down s1"); } if(i == 12000){ //Restart off of snap, then get some txns for a log, then shut down qu.restart(index); Thread.sleep(300); qu.shutdown(index); t.start(); Thread.sleep(300); qu.restart(index); LOG.info("Setting up server: " + index); } if((i % 1000) == 0){ Thread.sleep(1000); } if(i%50 == 0) { zk2.create("/newbaz", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter == 14200){ sem.release(); } } }, null); } } // Wait until all updates return if(!sem.tryAcquire(20000, TimeUnit.MILLISECONDS)) { LOG.warn("Did not aquire semaphore fast enough"); } t.join(10000); Thread.sleep(1000); verifyState(qu, index, leader); } /** * This test: * Starts up 3 ZKs. The non-leader ZKs are writing to cluster * Shut down one of the non-leader ZKs. * Restart after sessions have expired but <500 txns have taken place (get a diff) * Shut down immediately after restarting, start running separate thread with other transactions * Restart to a diff while transactions are running in leader * * * Before fixes for ZOOKEEPER-962, restarting off of diff could get an inconsistent view of data missing transactions that * completed during diff syncing. Follower would also be considered "restarted" before all forwarded transactions * were completely processed, so restarting would cause a snap file with a too-high zxid to be written, and transactions * would be missed * * This test should pretty reliably catch the failure of restarting the server before all diff messages have been processed, * however, due to the transient nature of the system it may not catch failures due to concurrent processing of transactions * during the leader's diff forwarding. * * @throws IOException * @throws InterruptedException * @throws KeeperException * @throws Throwable */ @Test public void testResyncByDiffAfterFollowerCrashes () throws IOException, InterruptedException, KeeperException, Throwable{ final Semaphore sem = new Semaphore(0); QuorumUtil qu = new QuorumUtil(1); qu.startAll(); CountdownWatcher watcher1 = new CountdownWatcher(); CountdownWatcher watcher2 = new CountdownWatcher(); CountdownWatcher watcher3 = new CountdownWatcher(); int index = 1; while(qu.getPeer(index).peer.leader == null) index++; Leader leader = qu.getPeer(index).peer.leader; assertNotNull(leader); /* * Reusing the index variable to select a follower to connect to */ index = (index == 1) ? 2 : 1; ZooKeeper zk = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(index).peer.getClientPort(), 1000, watcher1); ZooKeeper zk2 = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(index).peer.getClientPort(), 1000,watcher2); final ZooKeeper zk3 = new DisconnectableZooKeeper("127.0.0.1:" + qu.getPeer(3).peer.getClientPort(), 1000, watcher3); watcher1.waitForConnected(CONNECTION_TIMEOUT); watcher2.waitForConnected(CONNECTION_TIMEOUT); watcher3.waitForConnected(CONNECTION_TIMEOUT); zk.create("/first", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk2.create("/mybar", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL); final AtomicBoolean runNow = new AtomicBoolean(false); Thread t = new Thread(new Runnable() { @Override public void run() { int inSyncCounter = 0; while(inSyncCounter < 400) { if(runNow.get()) { zk3.create("/mytestfoo", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter > 7300){ sem.release(); } } }, null); try { Thread.sleep(10); } catch (Exception e) { } inSyncCounter++; } else { Thread.yield(); } } } }); t.start(); for(int i = 0; i < 5000; i++) { zk2.create("/mybar", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter > 7300){ sem.release(); } } }, null); if(i == 1000){ qu.shutdown(index); Thread.sleep(1100); LOG.info("Shutting down s1"); } if(i == 1100 || i == 1150 || i == 1200) { Thread.sleep(1000); } if(i == 1200){ qu.startThenShutdown(index); runNow.set(true); qu.restart(index); LOG.info("Setting up server: " + index); } if(i>=1000 && i%2== 0) { zk3.create("/newbaz", null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL, new AsyncCallback.StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { counter++; if (rc != 0) { errors++; } if(counter > 7300){ sem.release(); } } }, null); } if(i == 1050 || i == 1100 || i == 1150) { Thread.sleep(1000); } } // Wait until all updates return if(!sem.tryAcquire(15000, TimeUnit.MILLISECONDS)) { LOG.warn("Did not aquire semaphore fast enough"); } t.join(10000); Thread.sleep(1000); // Verify that server is following and has the same epoch as the leader verifyState(qu, index, leader); } private void verifyState(QuorumUtil qu, int index, Leader leader) { assertTrue("Not following", qu.getPeer(index).peer.follower != null); long epochF = (qu.getPeer(index).peer.getActiveServer().getZxid() >> 32L); long epochL = (leader.getEpoch() >> 32L); assertTrue("Zxid: " + qu.getPeer(index).peer.getActiveServer().getZKDatabase().getDataTreeLastProcessedZxid() + "Current epoch: " + epochF, epochF == epochL); int leaderIndex = (index == 1) ? 2 : 1; Collection<Long> sessionsRestarted = qu.getPeer(index).peer.getActiveServer().getZKDatabase().getSessions(); Collection<Long> sessionsNotRestarted = qu.getPeer(leaderIndex).peer.getActiveServer().getZKDatabase().getSessions(); for(Long l : sessionsRestarted) { assertTrue("Should have same set of sessions in both servers, did not expect: " + l, sessionsNotRestarted.contains(l)); } assertEquals("Should have same number of sessions", sessionsNotRestarted.size(), sessionsRestarted.size()); ZKDatabase restarted = qu.getPeer(index).peer.getActiveServer().getZKDatabase(); ZKDatabase clean = qu.getPeer(3).peer.getActiveServer().getZKDatabase(); ZKDatabase lead = qu.getPeer(leaderIndex).peer.getActiveServer().getZKDatabase(); for(Long l : sessionsRestarted) { assertTrue("Should have same set of sessions in both servers, did not expect: " + l, sessionsNotRestarted.contains(l)); HashSet ephemerals = restarted.getEphemerals(l); HashSet cleanEphemerals = clean.getEphemerals(l); for(Object o : cleanEphemerals) { if(!ephemerals.contains(o)) { LOG.info("Restarted follower doesn't contain ephemeral " + o); } } HashSet leadEphemerals = lead.getEphemerals(l); for(Object o : leadEphemerals) { if(!cleanEphemerals.contains(o)) { LOG.info("Follower doesn't contain ephemeral from leader " + o); } } assertEquals("Should have same number of ephemerals in both followers", ephemerals.size(), cleanEphemerals.size()); assertEquals("Leader should equal follower", lead.getEphemerals(l).size(), cleanEphemerals.size()); } } }