// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.zookeeper;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.concurrent.ThreadSafe;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Ordering;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import com.twitter.common.base.MorePreconditions;
/**
* Distributed locking via ZooKeeper. Assuming there are N clients that all try to acquire a lock,
* the algorithm works as follows. Each host creates an ephemeral|sequential node, and requests a
* list of children for the lock node. Due to the nature of sequential, all the ids are increasing
* in order, therefore the client with the least ID according to natural ordering will hold the
* lock. Every other client watches the id immediately preceding its own id and checks for the lock
* in case of notification. The client holding the lock does the work and finally deletes the node,
* thereby triggering the next client in line to acquire the lock. Deadlocks are possible but
* avoided in most cases because if a client drops dead while holding the lock, the ZK session
* should timeout and since the node is ephemeral, it will be removed in such a case. Deadlocks
* could occur if the the worker thread on a client hangs but the zk-client thread is still alive.
* There could be an external monitor client that ensures that alerts are triggered if the least-id
* ephemeral node is present past a time-out.
* <p/>
* Note: Locking attempts will fail in case session expires!
*
* @author Florian Leibert
*/
@ThreadSafe
public class DistributedLockImpl implements DistributedLock {
private static final Logger LOG = Logger.getLogger(DistributedLockImpl.class.getName());
private final ZooKeeperClient zkClient;
private final String lockPath;
private final ImmutableList<ACL> acl;
private final AtomicBoolean aborted = new AtomicBoolean(false);
private CountDownLatch syncPoint;
private boolean holdsLock = false;
private String currentId;
private String currentNode;
private String watchedNode;
private LockWatcher watcher;
/**
* Equivalent to {@link #DistributedLockImpl(ZooKeeperClient, String, Iterable)} with a default
* wide open {@code acl} ({@link ZooDefs.Ids#OPEN_ACL_UNSAFE}).
*/
public DistributedLockImpl(ZooKeeperClient zkClient, String lockPath) {
this(zkClient, lockPath, ZooDefs.Ids.OPEN_ACL_UNSAFE);
}
/**
* Creates a distributed lock using the given {@code zkClient} to coordinate locking.
*
* @param zkClient The ZooKeeper client to use.
* @param lockPath The path used to manage the lock under.
* @param acl The acl to apply to newly created lock nodes.
*/
public DistributedLockImpl(ZooKeeperClient zkClient, String lockPath, Iterable<ACL> acl) {
this.zkClient = Preconditions.checkNotNull(zkClient);
this.lockPath = MorePreconditions.checkNotBlank(lockPath);
this.acl = ImmutableList.copyOf(acl);
this.syncPoint = new CountDownLatch(1);
}
private synchronized void prepare()
throws ZooKeeperClient.ZooKeeperConnectionException, InterruptedException, KeeperException {
ZooKeeperUtils.ensurePath(zkClient, acl, lockPath);
LOG.log(Level.FINE, "Working with locking path:" + lockPath);
// Create an EPHEMERAL_SEQUENTIAL node.
currentNode =
zkClient.get().create(lockPath + "/member_", null, acl, CreateMode.EPHEMERAL_SEQUENTIAL);
// We only care about our actual id since we want to compare ourselves to siblings.
if (currentNode.contains("/")) {
currentId = currentNode.substring(currentNode.lastIndexOf("/") + 1);
}
LOG.log(Level.FINE, "Received ID from zk:" + currentId);
this.watcher = new LockWatcher();
}
@Override
public synchronized void lock() throws LockingException {
if (holdsLock) {
throw new LockingException("Error, already holding a lock. Call unlock first!");
}
try {
prepare();
watcher.checkForLock();
syncPoint.await();
if (!holdsLock) {
throw new LockingException("Error, couldn't acquire the lock!");
}
} catch (InterruptedException e) {
cancelAttempt();
throw new LockingException("InterruptedException while trying to acquire lock!", e);
} catch (KeeperException e) {
// No need to clean up since the node wasn't created yet.
throw new LockingException("KeeperException while trying to acquire lock!", e);
} catch (ZooKeeperClient.ZooKeeperConnectionException e) {
// No need to clean up since the node wasn't created yet.
throw new LockingException("ZooKeeperConnectionException while trying to acquire lock", e);
}
}
@Override
public synchronized boolean tryLock(long timeout, TimeUnit unit) {
if (holdsLock) {
throw new LockingException("Error, already holding a lock. Call unlock first!");
}
try {
prepare();
watcher.checkForLock();
boolean success = syncPoint.await(timeout, unit);
if (!success) {
return false;
}
if (!holdsLock) {
throw new LockingException("Error, couldn't acquire the lock!");
}
} catch (InterruptedException e) {
cancelAttempt();
return false;
} catch (KeeperException e) {
// No need to clean up since the node wasn't created yet.
throw new LockingException("KeeperException while trying to acquire lock!", e);
} catch (ZooKeeperClient.ZooKeeperConnectionException e) {
// No need to clean up since the node wasn't created yet.
throw new LockingException("ZooKeeperConnectionException while trying to acquire lock", e);
}
return true;
}
@Override
public synchronized void unlock() throws LockingException {
if (currentId == null) {
throw new LockingException("Error, neither attempting to lock nor holding a lock!");
}
Preconditions.checkNotNull(currentId);
// Try aborting!
if (!holdsLock) {
aborted.set(true);
LOG.log(Level.INFO, "Not holding lock, aborting acquisition attempt!");
} else {
LOG.log(Level.INFO, "Cleaning up this locks ephemeral node.");
cleanup();
}
}
//TODO(Florian Leibert): Make sure this isn't a runtime exception. Put exceptions into the token?
private synchronized void cancelAttempt() {
LOG.log(Level.INFO, "Cancelling lock attempt!");
cleanup();
// Bubble up failure...
holdsLock = false;
syncPoint.countDown();
}
private void cleanup() {
LOG.info("Cleaning up!");
Preconditions.checkNotNull(currentId);
try {
Stat stat = zkClient.get().exists(currentNode, false);
if (stat != null) {
zkClient.get().delete(currentNode, ZooKeeperUtils.ANY_VERSION);
} else {
LOG.log(Level.WARNING, "Called cleanup but nothing to cleanup!");
}
} catch (Exception e) {
throw new RuntimeException(e);
}
holdsLock = false;
aborted.set(false);
currentId = null;
currentNode = null;
watcher = null;
syncPoint = new CountDownLatch(1);
}
class LockWatcher implements Watcher {
public synchronized void checkForLock() {
MorePreconditions.checkNotBlank(currentId);
try {
List<String> candidates = zkClient.get().getChildren(lockPath, null);
ImmutableList<String> sortedMembers = Ordering.natural().immutableSortedCopy(candidates);
// Unexpected behavior if there are no children!
if (sortedMembers.isEmpty()) {
throw new LockingException("Error, member list is empty!");
}
int memberIndex = sortedMembers.indexOf(currentId);
// If we hold the lock
if (memberIndex == 0) {
holdsLock = true;
syncPoint.countDown();
} else {
final String nextLowestNode = sortedMembers.get(memberIndex - 1);
LOG.log(Level.INFO, String.format("Current LockWatcher with ephemeral node [%s], is " +
"waiting for [%s] to release lock.", currentId, nextLowestNode));
watchedNode = String.format("%s/%s", lockPath, nextLowestNode);
Stat stat = zkClient.get().exists(watchedNode, this);
if (stat == null) {
checkForLock();
}
}
} catch (InterruptedException e) {
LOG.log(Level.WARNING, String.format("Current LockWatcher with ephemeral node [%s] " +
"got interrupted. Trying to cancel lock acquisition.", currentId), e);
cancelAttempt();
} catch (KeeperException e) {
LOG.log(Level.WARNING, String.format("Current LockWatcher with ephemeral node [%s] " +
"got a KeeperException. Trying to cancel lock acquisition.", currentId), e);
cancelAttempt();
} catch (ZooKeeperClient.ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, String.format("Current LockWatcher with ephemeral node [%s] " +
"got a ConnectionException. Trying to cancel lock acquisition.", currentId), e);
cancelAttempt();
}
}
@Override
public synchronized void process(WatchedEvent event) {
// this handles the case where we have aborted a lock and deleted ourselves but still have a
// watch on the nextLowestNode. This is a workaround since ZK doesn't support unsub.
if (!event.getPath().equals(watchedNode)) {
LOG.log(Level.INFO, "Ignoring call for node:" + watchedNode);
return;
}
//TODO(Florian Leibert): Pull this into the outer class.
if (event.getType() == Watcher.Event.EventType.None) {
switch (event.getState()) {
case SyncConnected:
// TODO(Florian Leibert): maybe we should just try to "fail-fast" in this case and abort.
LOG.info("Reconnected...");
break;
case Expired:
LOG.log(Level.WARNING, String.format("Current ZK session expired![%s]", currentId));
cancelAttempt();
break;
}
} else if (event.getType() == Event.EventType.NodeDeleted) {
checkForLock();
} else {
LOG.log(Level.WARNING, String.format("Unexpected ZK event: %s", event.getType().name()));
}
}
}
}