package zx.soft.zookeeper.book; import java.util.List; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import zx.soft.zookeeper.book.recovery.RecoveredAssignments; import zx.soft.zookeeper.book.recovery.RecoveredAssignments.RecoveryCallback; public class TestAssignmentRecovery extends BaseTestCase { private static final Logger logger = LoggerFactory.getLogger(TestTaskAssignment.class); boolean connected = false; boolean recoveryDone = false; int status = RecoveryCallback.FAILED; List<String> recoveredTasks; @Test(timeout = 50000) public void testRecovery() { try { ZooKeeper zk = new ZooKeeper("localhost:" + port, 10000, new Watcher() { @Override public void process(WatchedEvent e) { if (e.getState() == KeeperState.SyncConnected) { connected = true; } logger.info("Event: " + e.toString()); } }); while (!connected) { Thread.sleep(100); } /* * The number of recovered tasks should be 2 because * there is a single active worker, one task has been * assigned to an absent worker, and one task hasn't * been assigned at all. The last two need to be * assigned, and consequently they are part of the * list of recovered tasks. * * Note that recovery here refers to tasks that a new * master needs to reassign when failing over. It is * not related to the recovery of the tasks themselves * as it could happen if a worker crashes before * completing a task. Recovering a task is out of the * scope of this example and it is application specific. * */ zk.create("/tasks", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-003", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status/task-001", "done".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); RecoveredAssignments ra = new RecoveredAssignments(zk); ra.recover(new RecoveryCallback() { @Override public void recoveryComplete(int rc, List<String> tasks) { logger.info("Completed recovery: " + rc); recoveryDone = true; status = rc; recoveredTasks = tasks; } }); while (!recoveryDone) { Thread.sleep(100); } Assert.assertTrue("It hasn't returned ok", status == RecoveryCallback.OK); Assert.assertTrue("List size is incorrect: " + recoveredTasks.size(), recoveredTasks.size() == 2); Assert.assertTrue("List doesn't contain task-002 ", recoveredTasks.contains("task-002")); Assert.assertTrue("List doesn't contain task-003 ", recoveredTasks.contains("task-003")); } catch (Exception e) { logger.warn("Got exception", e); Assert.fail(); } } @Test(timeout = 50000) public void testRecoveryStatus() { try { ZooKeeper zk = new ZooKeeper("localhost:" + port, 10000, new Watcher() { @Override public void process(WatchedEvent e) { if (e.getState() == KeeperState.SyncConnected) { connected = true; } logger.info("Event: " + e.toString()); } }); while (!connected) { Thread.sleep(100); } /* * The two tasks that have been assigned to the absent worker * have completed, so there is no need to reassign them. */ zk.create("/tasks", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-003", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002/task-003", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status/task-002", "done".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status/task-003", "done".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); RecoveredAssignments ra = new RecoveredAssignments(zk); ra.recover(new RecoveryCallback() { @Override public void recoveryComplete(int rc, List<String> tasks) { logger.info("Completed recovery: " + rc); recoveryDone = true; status = rc; recoveredTasks = tasks; } }); while (!recoveryDone) { Thread.sleep(100); } Assert.assertTrue("It hasn't returned ok", status == RecoveryCallback.OK); Assert.assertTrue("List size is incorrect: " + recoveredTasks.size(), recoveredTasks.size() == 0); } catch (Exception e) { logger.warn("Got exception", e); Assert.fail(); } } @Test(timeout = 50000) public void testRecoveryNoStatus() { try { ZooKeeper zk = new ZooKeeper("localhost:" + port, 10000, new Watcher() { @Override public void process(WatchedEvent e) { if (e.getState() == KeeperState.SyncConnected) { connected = true; } logger.info("Event: " + e.toString()); } }); while (!connected) { Thread.sleep(100); } /* * There is no status znode, so two tasks need to be assigned, one * needs to be reassigned because it has been assigned to an absent * worker. */ zk.create("/tasks", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-003", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); RecoveredAssignments ra = new RecoveredAssignments(zk); ra.recover(new RecoveryCallback() { @Override public void recoveryComplete(int rc, List<String> tasks) { logger.info("Completed recovery: " + rc); recoveryDone = true; status = rc; recoveredTasks = tasks; } }); while (!recoveryDone) { Thread.sleep(100); } Assert.assertTrue("It hasn't returned ok", status == RecoveryCallback.OK); Assert.assertTrue("List size is incorrect: " + recoveredTasks.size(), recoveredTasks.size() == 2); Assert.assertTrue("List doesn't contain task-002 ", recoveredTasks.contains("task-002")); Assert.assertTrue("List doesn't contain task-003 ", recoveredTasks.contains("task-003")); } catch (Exception e) { logger.warn("Got exception", e); Assert.fail(); } } @Test(timeout = 50000) public void testRecoveryMissingTaskFromTasks() { try { ZooKeeper zk = new ZooKeeper("localhost:" + port, 10000, new Watcher() { @Override public void process(WatchedEvent e) { if (e.getState() == KeeperState.SyncConnected) { connected = true; } logger.info("Event: " + e.toString()); } }); while (!connected) { Thread.sleep(100); } /* * Task has been assigned to a worker that has crashed and it is not in the * list of tasks any longer. */ zk.create("/tasks", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/status", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/tasks/task-003", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/workers/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-001/task-001", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create("/assign/worker-002/task-002", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); RecoveredAssignments ra = new RecoveredAssignments(zk); ra.recover(new RecoveryCallback() { @Override public void recoveryComplete(int rc, List<String> tasks) { logger.info("Completed recovery: " + rc); recoveryDone = true; status = rc; recoveredTasks = tasks; } }); while (!recoveryDone) { Thread.sleep(100); } Assert.assertTrue("It hasn't returned ok", status == RecoveryCallback.OK); Assert.assertTrue("List size is incorrect: " + recoveredTasks.size(), recoveredTasks.size() == 2); Assert.assertTrue("List doesn't contain task-002 ", recoveredTasks.contains("task-002")); Assert.assertTrue("List doesn't contain task-003 ", recoveredTasks.contains("task-003")); } catch (Exception e) { logger.warn("Got exception", e); Assert.fail(); } } }