// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.zookeeper;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.common.PathUtils;
import org.apache.zookeeper.data.ACL;
import com.twitter.common.base.Command;
import com.twitter.common.base.Commands;
import com.twitter.common.base.ExceptionalSupplier;
import com.twitter.common.base.MorePreconditions;
import com.twitter.common.util.BackoffHelper;
import com.twitter.common.zookeeper.ZooKeeperClient.ZooKeeperConnectionException;
/**
* This class exposes methods for joining and monitoring distributed groups. The groups this class
* monitors are realized as persistent paths in ZooKeeper with ephemeral sequential child nodes for
* each member of a group.
*
* @author John Sirois
*/
public class Group {
private static final Logger LOG = Logger.getLogger(Group.class.getName());
private static final Supplier<byte[]> NO_MEMBER_DATA = Suppliers.ofInstance(null);
private static final String DEFAULT_NODE_NAME_PREFIX = "member_";
private final ZooKeeperClient zkClient;
private final ImmutableList<ACL> acl;
private final String path;
private final String nodeNamePrefix;
private final NodeNameScheme nodeNameScheme;
private final BackoffHelper backoffHelper;
@VisibleForTesting static String normalizePath(String path) {
String normalizedPath = path.replaceAll("//+", "/").replaceFirst("(.+)/$", "$1");
PathUtils.validatePath(normalizedPath);
return normalizedPath;
}
/**
* Creates a group rooted at the given {@code path}. Paths must be absolute and trailing or
* duplicate slashes will be normalized. For example, all the following paths would create a
* group at the normalized path /my/distributed/group:
* <ul>
* <li>/my/distributed/group
* <li>/my/distributed/group/
* <li>/my/distributed//group
* </ul>
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param acl the ACL to use for creating the persistent group path if it does not already exist
* @param path the absolute persistent path that represents this group
* @param nodeNamePrefix Node name prefix that denotes group membership.
* @param nodeNameScheme the naming scheme that defines how nodes are named within the path
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, String nodeNamePrefix,
NodeNameScheme nodeNameScheme) {
this.zkClient = Preconditions.checkNotNull(zkClient);
this.acl = ImmutableList.copyOf(acl);
this.path = normalizePath(Preconditions.checkNotNull(path));
this.nodeNamePrefix = MorePreconditions.checkNotBlank(nodeNamePrefix);
this.nodeNameScheme = Preconditions.checkNotNull(nodeNameScheme);
backoffHelper = new BackoffHelper();
}
/**
* Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, String)} with a default
* {@code nodeNamePrefix} of 'member_' and a DefaultNamingScheme for a {@code nodeNameScheme}.
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path) {
this(zkClient, acl, path, DEFAULT_NODE_NAME_PREFIX,
new DefaultNamingScheme(DEFAULT_NODE_NAME_PREFIX));
}
/**
* Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, String)} with a
* DefaultNamingScheme for a {@code nodeNameScheme}.
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, String nodeNamePrefix) {
this(zkClient, acl, path, nodeNamePrefix, new DefaultNamingScheme(nodeNamePrefix));
}
/**
* Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, String)} with a default
* {@code nodeNamePrefix} of 'member_'.
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path,
NodeNameScheme nodeNameScheme) {
this(zkClient, acl, path, DEFAULT_NODE_NAME_PREFIX, nodeNameScheme);
}
public String getMemberPath(String memberId) {
return path + "/" + MorePreconditions.checkNotBlank(memberId);
}
public String getMemberId(String nodePath) {
MorePreconditions.checkNotBlank(nodePath);
Preconditions.checkArgument(nodePath.startsWith(path + "/"),
"Not a member of this group[%s]: %s", path, nodePath);
return nodeNameScheme.extractMemberId(nodePath);
}
/**
* Returns the current list of group member ids by querying ZooKeeper synchronously.
*
* @return the ids of all the present members of this group
* @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
* @throws KeeperException if there was a problem reading this group's member ids
* @throws InterruptedException if this thread is interrupted listing the group members
*/
public Iterable<String> getMemberIds()
throws ZooKeeperConnectionException, KeeperException, InterruptedException {
return Iterables.filter(zkClient.get().getChildren(path, false),
nodeNameScheme.getNodeNameFilter());
}
/**
* Gets the data for one of this groups members by querying ZooKeeper synchronously.
*
* @param memberId the id of the member whose data to retrieve
* @return the data associated with the {@code memberId}
* @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
* @throws KeeperException if there was a problem reading this member's data
* @throws InterruptedException if this thread is interrupted retrieving the member data
*/
public byte[] getMemberData(String memberId)
throws ZooKeeperConnectionException, KeeperException, InterruptedException {
return zkClient.get().getData(getMemberPath(memberId), false, null);
}
/**
* Represents membership in a distributed group.
*/
public interface Membership {
/**
* Returns the persistent ZooKeeper path that represents this group.
*/
String getGroupPath();
/**
* Returns the id (ZooKeeper node name) of this group member. May change over time if the
* ZooKeeper session expires.
*/
String getMemberId();
/**
* Returns the full ZooKeeper path to this group member. May change over time if the
* ZooKeeper session expires.
*/
String getMemberPath();
/**
* Updates the membership data synchronously using the {@code Supplier<byte[]>} passed to
* {@link Group#join()}.
*
* @return the new membership data
* @throws UpdateException if there was a problem updating the membership data
*/
byte[] updateMemberData() throws UpdateException;
/**
* Cancels group membership by deleting the associated ZooKeeper member node.
*
* @throws JoinException if there is a problem deleting the node
*/
void cancel() throws JoinException;
}
/**
* Indicates an error joining a group.
*/
public static class JoinException extends Exception {
public JoinException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Indicates an error updating a group member's data.
*/
public static class UpdateException extends Exception {
public UpdateException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Equivalent to calling {@code join(null, null)}.
*/
public final Membership join() throws JoinException, InterruptedException {
return join(NO_MEMBER_DATA, null);
}
/**
* Equivalent to calling {@code join(memberData, null)}.
*/
public final Membership join(Supplier<byte[]> memberData)
throws JoinException, InterruptedException {
return join(memberData, null);
}
/**
* Equivalent to calling {@code join(null, onLoseMembership)}.
*/
public final Membership join(@Nullable final Command onLoseMembership)
throws JoinException, InterruptedException {
return join(NO_MEMBER_DATA, onLoseMembership);
}
/**
* Joins this group and returns the resulting Membership when successful. Membership will be
* automatically cancelled when the current jvm process dies; however the returned Membership
* object can be used to cancel membership earlier. Unless
* {@link com.twitter.common.zookeeper.Group.Membership#cancel()} is called the membership will
* be maintained by re-establishing it silently in the background.
*
* <p>Any {@code memberData} given is persisted in the member node in ZooKeeper. If an
* {@code onLoseMembership} callback is supplied, it will be notified each time this member loses
* membership in the group.
*
* @param memberData a supplier of the data to store in the member node
* @param onLoseMembership a callback to notify when membership is lost
* @return a Membership object with the member details
* @throws JoinException if there was a problem joining the group
* @throws InterruptedException if this thread is interrupted awaiting completion of the join
*/
public final Membership join(Supplier<byte[]> memberData, @Nullable Command onLoseMembership)
throws JoinException, InterruptedException {
Preconditions.checkNotNull(memberData);
ensurePersistentGroupPath();
final ActiveMembership groupJoiner = new ActiveMembership(memberData, onLoseMembership);
return backoffHelper.doUntilResult(new ExceptionalSupplier<Membership, JoinException>() {
@Override public Membership get() throws JoinException {
try {
return groupJoiner.join();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to join group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
return null;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
return null;
} else {
throw new JoinException("Problem joining partition group at path: " + path, e);
}
}
}
});
}
private void ensurePersistentGroupPath() throws JoinException, InterruptedException {
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
@Override public Boolean get() throws JoinException {
try {
ZooKeeperUtils.ensurePath(zkClient, acl, path);
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to ensure group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error ensuring path: " + path, e);
return false;
} else {
throw new JoinException("Problem ensuring group at path: " + path, e);
}
}
}
});
}
private class ActiveMembership implements Membership {
private final Supplier<byte[]> memberData;
private final Command onLoseMembership;
private String nodePath;
private String memberId;
private volatile boolean cancelled;
private byte[] membershipData;
public ActiveMembership(Supplier<byte[]> memberData, @Nullable Command onLoseMembership) {
this.memberData = memberData;
this.onLoseMembership = (onLoseMembership == null) ? Commands.NOOP : onLoseMembership;
}
@Override
public String getGroupPath() {
return path;
}
@Override
public synchronized String getMemberId() {
return memberId;
}
@Override
public synchronized String getMemberPath() {
return nodePath;
}
@Override
public synchronized byte[] updateMemberData() throws UpdateException {
byte[] membershipData = memberData.get();
if (!ArrayUtils.isEquals(this.membershipData, membershipData)) {
try {
zkClient.get().setData(nodePath, membershipData, ZooKeeperUtils.ANY_VERSION);
this.membershipData = membershipData;
} catch (KeeperException e) {
throw new UpdateException("Problem updating membership data.", e);
} catch (InterruptedException e) {
throw new UpdateException("Interrupted attempting to update membership data.", e);
} catch (ZooKeeperConnectionException e) {
throw new UpdateException(
"Could not connect to the ZooKeeper cluster to update membership data.", e);
}
}
return membershipData;
}
@Override
public synchronized void cancel() throws JoinException {
if (!cancelled) {
try {
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
@Override public Boolean get() throws JoinException {
try {
zkClient.get().delete(nodePath, ZooKeeperUtils.ANY_VERSION);
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to cancel membership: " + nodePath, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (NoNodeException e) {
LOG.info("Membership already cancelled, node at path: " + nodePath +
" has been deleted");
return true;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error cancelling membership: " + nodePath, e);
return false;
} else {
throw new JoinException("Problem cancelling membership: " + nodePath, e);
}
}
}
});
cancelled = true; // Prevent auto-re-join logic from undoing this cancel.
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Problem cancelling membership: " + nodePath, e);
}
}
}
private class CancelledException extends IllegalStateException { /* marker */ }
synchronized Membership join()
throws ZooKeeperConnectionException, InterruptedException, KeeperException {
if (cancelled) {
throw new CancelledException();
}
if (nodePath == null) {
// Re-join if our ephemeral node goes away due to session expiry - only needs to be
// registered once.
zkClient.registerExpirationHandler(new Command() {
@Override public void execute() {
tryJoin();
}
});
}
byte[] membershipData = memberData.get();
nodePath = nodeNameScheme.createNodePath(zkClient, path, membershipData, acl);
memberId = nodeNameScheme.extractMemberId(nodePath);
LOG.info("Set group member ID to " + memberId);
this.membershipData = membershipData;
// Re-join if our ephemeral node goes away due to maliciousness.
zkClient.get().exists(nodePath, new Watcher() {
@Override public void process(WatchedEvent event) {
if (event.getType() == EventType.NodeDeleted) {
LOG.info("Member ID deleted. Rejoining. Event: " + event);
tryJoin();
}
}
});
return this;
}
private final ExceptionalSupplier<Boolean, InterruptedException> tryJoin =
new ExceptionalSupplier<Boolean, InterruptedException>() {
@Override public Boolean get() throws InterruptedException {
try {
join();
return true;
} catch (CancelledException e) {
// Lost a cancel race - that's ok.
return true;
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error re-joining group: " + path, e);
return false;
} else {
throw new IllegalStateException("Permanent problem re-joining group: " + path, e);
}
}
}
};
private synchronized void tryJoin() {
onLoseMembership.execute();
try {
backoffHelper.doUntilSuccess(tryJoin);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format("Interrupted while trying to re-join group: %s, giving up", path), e);
}
}
}
/**
* An interface to an object that listens for changes to a group's membership.
*/
public interface GroupChangeListener {
/**
* Called whenever group membership changes with the new list of member ids.
*
* @param memberIds the current member ids
*/
void onGroupChange(Iterable<String> memberIds);
}
/**
* An interface that dictates the naming scheme to use for storing and filtering nodes on the
* ZooKeeper server.
*/
public interface NodeNameScheme {
/**
* Returns a predicate that filters the names of nodes on the ZooKeeper path, leaving only the
* nodes that are named according to this scheme.
*/
Predicate<String> getNodeNameFilter();
/**
* Creates a node on the given {@code path} according to the naming scheme of the class.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param path the absolute persistent path that represents this group
* @param membershipData the data to store in the member node
* @param acl the ACL to use for creating the persistent group path if it does not already exist
* @throws InterruptedException if this thread is interrupted awaiting completion of the join
* @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
* @throws KeeperException if there was a problem reading this member's data
* @return the path to the newly created node
*/
String createNodePath(ZooKeeperClient zkClient, String path, byte[] membershipData,
ImmutableList<ACL> acl) throws ZooKeeperConnectionException, KeeperException,
InterruptedException;
/**
* Given a path to a node, determines the node's member ID from its name.
*
* @param nodePath the path to the node
* @return the node's member ID
*/
String extractMemberId(String nodePath);
}
/**
* Indicates an error watching a group.
*/
public static class WatchException extends Exception {
public WatchException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Watches this group for the lifetime of this jvm process. This method will block until the
* current group members are available, notify the {@code groupChangeListener} and then return.
* All further changes to the group membership will cause notifications on a background thread.
*
* @param groupChangeListener the listener to notify of group membership change events
* @throws WatchException if there is a problem generating the 1st group membership list
* @throws InterruptedException if interrupted waiting to gather the 1st group membership list
*/
public final void watch(final GroupChangeListener groupChangeListener)
throws WatchException, InterruptedException {
Preconditions.checkNotNull(groupChangeListener);
try {
ensurePersistentGroupPath();
} catch (JoinException e) {
throw new WatchException("Failed to create group path: " + path, e);
}
final GroupMonitor groupMonitor = new GroupMonitor(groupChangeListener);
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, WatchException>() {
@Override public Boolean get() throws WatchException {
try {
groupMonitor.watchGroup();
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new WatchException("Interrupted trying to watch group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
return null;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
return null;
} else {
throw new WatchException("Problem trying to watch group at path: " + path, e);
}
}
}
});
}
/**
* Helps continuously monitor a group for membership changes.
*/
private class GroupMonitor {
private final GroupChangeListener groupChangeListener;
private Set<String> members;
GroupMonitor(GroupChangeListener groupChangeListener) {
this.groupChangeListener = groupChangeListener;
}
private final Watcher groupWatcher = new Watcher() {
@Override public final void process(WatchedEvent event) {
if (event.getType() == EventType.NodeChildrenChanged) {
tryWatchGroup();
}
}
};
private final ExceptionalSupplier<Boolean, InterruptedException> tryWatchGroup =
new ExceptionalSupplier<Boolean, InterruptedException>() {
@Override public Boolean get() throws InterruptedException {
try {
watchGroup();
return true;
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error re-watching group: " + path, e);
return false;
} else {
throw new IllegalStateException("Permanent problem re-watching group: " + path, e);
}
}
}
};
private void tryWatchGroup() {
try {
backoffHelper.doUntilSuccess(tryWatchGroup);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format("Interrupted while trying to re-watch group: %s, giving up", path), e);
}
}
private void watchGroup()
throws ZooKeeperConnectionException, InterruptedException, KeeperException {
List<String> children = zkClient.get().getChildren(path, groupWatcher);
setMembers(Iterables.filter(children, nodeNameScheme.getNodeNameFilter()));
}
synchronized void setMembers(Iterable<String> members) {
if (this.members == null) {
// Reset our watch on the group if session expires - only needs to be registered once.
zkClient.registerExpirationHandler(new Command() {
@Override public void execute() {
tryWatchGroup();
}
});
}
Set<String> membership = ImmutableSet.copyOf(members);
if (!membership.equals(this.members)) {
groupChangeListener.onGroupChange(members);
this.members = membership;
}
}
}
/**
* Default naming scheme implementation. Stores nodes at [given path] + "/" + [given prefix] +
* ZooKeeper-generated member ID. For example, if the path is "/discovery/servicename", and the
* prefix is "member_", the node's full path will look something like
* {@code /discovery/servicename/member_0000000007}.
*/
private static class DefaultNamingScheme implements Group.NodeNameScheme {
private final String nodeNamePrefix;
private final Predicate<String> nodeNameFilter;
/**
* Creates a naming scheme based on the given prefix.
*
* @param nodeNamePrefix the prefix for the names of the member nodes
*/
public DefaultNamingScheme(String nodeNamePrefix) {
this.nodeNamePrefix = MorePreconditions.checkNotBlank(nodeNamePrefix);
final Pattern groupNodeNamePattern = Pattern.compile(
"^" + Pattern.quote(nodeNamePrefix) + "[0-9]+$");
nodeNameFilter = new Predicate<String>() {
@Override public boolean apply(String childNodeName) {
return groupNodeNamePattern.matcher(childNodeName).matches();
}
};
}
@Override
public String createNodePath(ZooKeeperClient zkClient, String path, byte[] membershipData,
ImmutableList<ACL> acl)
throws ZooKeeperConnectionException, InterruptedException, KeeperException {
return zkClient.get().create(path + "/" + nodeNamePrefix, membershipData, acl,
CreateMode.EPHEMERAL_SEQUENTIAL);
}
@Override
public String extractMemberId(String nodePath) {
String memberId = StringUtils.substringAfterLast(nodePath, "/");
Preconditions.checkArgument(nodeNameFilter.apply(memberId), "Not a group member: %s", memberId);
return memberId;
}
@Override
public Predicate<String> getNodeNameFilter() {
return nodeNameFilter;
}
}
@Override
public String toString() {
return "Group " + path;
}
}