// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.zookeeper;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.data.ACL;
import com.twitter.common.base.Command;
import com.twitter.common.base.Commands;
import com.twitter.common.base.ExceptionalSupplier;
import com.twitter.common.base.MorePreconditions;
import com.twitter.common.util.BackoffHelper;
import com.twitter.common.zookeeper.ZooKeeperClient.ZooKeeperConnectionException;
/**
* This class exposes methods for joining and monitoring distributed groups. The groups this class
* monitors are realized as persistent paths in ZooKeeper with ephemeral child nodes for
* each member of a group.
*/
public class Group {
private static final Logger LOG = Logger.getLogger(Group.class.getName());
private static final Supplier<byte[]> NO_MEMBER_DATA = Suppliers.ofInstance(null);
private static final String DEFAULT_NODE_NAME_PREFIX = "member_";
private final ZooKeeperClient zkClient;
private final ImmutableList<ACL> acl;
private final String path;
private final NodeScheme nodeScheme;
private final Predicate<String> nodeNameFilter;
private final BackoffHelper backoffHelper;
/**
* Creates a group rooted at the given {@code path}. Paths must be absolute and trailing or
* duplicate slashes will be normalized. For example, all the following paths would create a
* group at the normalized path /my/distributed/group:
* <ul>
* <li>/my/distributed/group
* <li>/my/distributed/group/
* <li>/my/distributed//group
* </ul>
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param acl the ACL to use for creating the persistent group path if it does not already exist
* @param path the absolute persistent path that represents this group
* @param nodeScheme the scheme that defines how nodes are created
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, NodeScheme nodeScheme) {
this.zkClient = Preconditions.checkNotNull(zkClient);
this.acl = ImmutableList.copyOf(acl);
this.path = ZooKeeperUtils.normalizePath(Preconditions.checkNotNull(path));
this.nodeScheme = Preconditions.checkNotNull(nodeScheme);
nodeNameFilter = new Predicate<String>() {
@Override public boolean apply(String nodeName) {
return Group.this.nodeScheme.isMember(nodeName);
}
};
backoffHelper = new BackoffHelper();
}
/**
* Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, String)} with a
* {@code namePrefix} of 'member_'.
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path) {
this(zkClient, acl, path, DEFAULT_NODE_NAME_PREFIX);
}
/**
* Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, NodeScheme)} with a
* {@link DefaultScheme} using {@code namePrefix}.
*/
public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, String namePrefix) {
this(zkClient, acl, path, new DefaultScheme(namePrefix));
}
public String getMemberPath(String memberId) {
return path + "/" + MorePreconditions.checkNotBlank(memberId);
}
public String getPath() {
return path;
}
public String getMemberId(String nodePath) {
MorePreconditions.checkNotBlank(nodePath);
Preconditions.checkArgument(nodePath.startsWith(path + "/"),
"Not a member of this group[%s]: %s", path, nodePath);
String memberId = StringUtils.substringAfterLast(nodePath, "/");
Preconditions.checkArgument(nodeScheme.isMember(memberId),
"Not a group member: %s", memberId);
return memberId;
}
/**
* Returns the current list of group member ids by querying ZooKeeper synchronously.
*
* @return the ids of all the present members of this group
* @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
* @throws KeeperException if there was a problem reading this group's member ids
* @throws InterruptedException if this thread is interrupted listing the group members
*/
public Iterable<String> getMemberIds()
throws ZooKeeperConnectionException, KeeperException, InterruptedException {
return Iterables.filter(zkClient.get().getChildren(path, false), nodeNameFilter);
}
/**
* Gets the data for one of this groups members by querying ZooKeeper synchronously.
*
* @param memberId the id of the member whose data to retrieve
* @return the data associated with the {@code memberId}
* @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
* @throws KeeperException if there was a problem reading this member's data
* @throws InterruptedException if this thread is interrupted retrieving the member data
*/
public byte[] getMemberData(String memberId)
throws ZooKeeperConnectionException, KeeperException, InterruptedException {
return zkClient.get().getData(getMemberPath(memberId), false, null);
}
/**
* Represents membership in a distributed group.
*/
public interface Membership {
/**
* Returns the persistent ZooKeeper path that represents this group.
*/
String getGroupPath();
/**
* Returns the id (ZooKeeper node name) of this group member. May change over time if the
* ZooKeeper session expires.
*/
String getMemberId();
/**
* Returns the full ZooKeeper path to this group member. May change over time if the
* ZooKeeper session expires.
*/
String getMemberPath();
/**
* Updates the membership data synchronously using the {@code Supplier<byte[]>} passed to
* {@link Group#join()}.
*
* @return the new membership data
* @throws UpdateException if there was a problem updating the membership data
*/
byte[] updateMemberData() throws UpdateException;
/**
* Cancels group membership by deleting the associated ZooKeeper member node.
*
* @throws JoinException if there is a problem deleting the node
*/
void cancel() throws JoinException;
}
/**
* Indicates an error joining a group.
*/
public static class JoinException extends Exception {
public JoinException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Indicates an error updating a group member's data.
*/
public static class UpdateException extends Exception {
public UpdateException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Equivalent to calling {@code join(null, null)}.
*/
public final Membership join() throws JoinException, InterruptedException {
return join(NO_MEMBER_DATA, null);
}
/**
* Equivalent to calling {@code join(memberData, null)}.
*/
public final Membership join(Supplier<byte[]> memberData)
throws JoinException, InterruptedException {
return join(memberData, null);
}
/**
* Equivalent to calling {@code join(null, onLoseMembership)}.
*/
public final Membership join(@Nullable final Command onLoseMembership)
throws JoinException, InterruptedException {
return join(NO_MEMBER_DATA, onLoseMembership);
}
/**
* Joins this group and returns the resulting Membership when successful. Membership will be
* automatically cancelled when the current jvm process dies; however the returned Membership
* object can be used to cancel membership earlier. Unless
* {@link com.twitter.common.zookeeper.Group.Membership#cancel()} is called the membership will
* be maintained by re-establishing it silently in the background.
*
* <p>Any {@code memberData} given is persisted in the member node in ZooKeeper. If an
* {@code onLoseMembership} callback is supplied, it will be notified each time this member loses
* membership in the group.
*
* @param memberData a supplier of the data to store in the member node
* @param onLoseMembership a callback to notify when membership is lost
* @return a Membership object with the member details
* @throws JoinException if there was a problem joining the group
* @throws InterruptedException if this thread is interrupted awaiting completion of the join
*/
public final Membership join(Supplier<byte[]> memberData, @Nullable Command onLoseMembership)
throws JoinException, InterruptedException {
Preconditions.checkNotNull(memberData);
ensurePersistentGroupPath();
final ActiveMembership groupJoiner = new ActiveMembership(memberData, onLoseMembership);
return backoffHelper.doUntilResult(new ExceptionalSupplier<Membership, JoinException>() {
@Override public Membership get() throws JoinException {
try {
return groupJoiner.join();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to join group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
return null;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
return null;
} else {
throw new JoinException("Problem joining partition group at path: " + path, e);
}
}
}
});
}
private void ensurePersistentGroupPath() throws JoinException, InterruptedException {
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
@Override public Boolean get() throws JoinException {
try {
ZooKeeperUtils.ensurePath(zkClient, acl, path);
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to ensure group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error ensuring path: " + path, e);
return false;
} else {
throw new JoinException("Problem ensuring group at path: " + path, e);
}
}
}
});
}
private class ActiveMembership implements Membership {
private final Supplier<byte[]> memberData;
private final Command onLoseMembership;
private String nodePath;
private String memberId;
private volatile boolean cancelled;
private byte[] membershipData;
public ActiveMembership(Supplier<byte[]> memberData, @Nullable Command onLoseMembership) {
this.memberData = memberData;
this.onLoseMembership = (onLoseMembership == null) ? Commands.NOOP : onLoseMembership;
}
@Override
public String getGroupPath() {
return path;
}
@Override
public synchronized String getMemberId() {
return memberId;
}
@Override
public synchronized String getMemberPath() {
return nodePath;
}
@Override
public synchronized byte[] updateMemberData() throws UpdateException {
byte[] membershipData = memberData.get();
if (!ArrayUtils.isEquals(this.membershipData, membershipData)) {
try {
zkClient.get().setData(nodePath, membershipData, ZooKeeperUtils.ANY_VERSION);
this.membershipData = membershipData;
} catch (KeeperException e) {
throw new UpdateException("Problem updating membership data.", e);
} catch (InterruptedException e) {
throw new UpdateException("Interrupted attempting to update membership data.", e);
} catch (ZooKeeperConnectionException e) {
throw new UpdateException(
"Could not connect to the ZooKeeper cluster to update membership data.", e);
}
}
return membershipData;
}
@Override
public synchronized void cancel() throws JoinException {
if (!cancelled) {
try {
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
@Override public Boolean get() throws JoinException {
try {
zkClient.get().delete(nodePath, ZooKeeperUtils.ANY_VERSION);
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Interrupted trying to cancel membership: " + nodePath, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (NoNodeException e) {
LOG.info("Membership already cancelled, node at path: " + nodePath +
" has been deleted");
return true;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error cancelling membership: " + nodePath, e);
return false;
} else {
throw new JoinException("Problem cancelling membership: " + nodePath, e);
}
}
}
});
cancelled = true; // Prevent auto-re-join logic from undoing this cancel.
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new JoinException("Problem cancelling membership: " + nodePath, e);
}
}
}
private class CancelledException extends IllegalStateException { /* marker */ }
synchronized Membership join()
throws ZooKeeperConnectionException, InterruptedException, KeeperException {
if (cancelled) {
throw new CancelledException();
}
if (nodePath == null) {
// Re-join if our ephemeral node goes away due to session expiry - only needs to be
// registered once.
zkClient.registerExpirationHandler(new Command() {
@Override public void execute() {
tryJoin();
}
});
}
byte[] membershipData = memberData.get();
String nodeName = nodeScheme.createName(membershipData);
CreateMode createMode = nodeScheme.isSequential()
? CreateMode.EPHEMERAL_SEQUENTIAL
: CreateMode.EPHEMERAL;
nodePath = zkClient.get().create(path + "/" + nodeName, membershipData, acl, createMode);
memberId = Group.this.getMemberId(nodePath);
LOG.info("Set group member ID to " + memberId);
this.membershipData = membershipData;
// Re-join if our ephemeral node goes away due to maliciousness.
zkClient.get().exists(nodePath, new Watcher() {
@Override public void process(WatchedEvent event) {
if (event.getType() == EventType.NodeDeleted) {
tryJoin();
}
}
});
return this;
}
private final ExceptionalSupplier<Boolean, InterruptedException> tryJoin =
new ExceptionalSupplier<Boolean, InterruptedException>() {
@Override public Boolean get() throws InterruptedException {
try {
join();
return true;
} catch (CancelledException e) {
// Lost a cancel race - that's ok.
return true;
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error re-joining group: " + path, e);
return false;
} else {
throw new IllegalStateException("Permanent problem re-joining group: " + path, e);
}
}
}
};
private synchronized void tryJoin() {
onLoseMembership.execute();
try {
backoffHelper.doUntilSuccess(tryJoin);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format("Interrupted while trying to re-join group: %s, giving up", path), e);
}
}
}
/**
* An interface to an object that listens for changes to a group's membership.
*/
public interface GroupChangeListener {
/**
* Called whenever group membership changes with the new list of member ids.
*
* @param memberIds the current member ids
*/
void onGroupChange(Iterable<String> memberIds);
}
/**
* An interface that dictates the scheme to use for storing and filtering nodes that represent
* members of a distributed group.
*/
public interface NodeScheme {
/**
* Determines if a child node is a member of a group by examining the node's name.
*
* @param nodeName the name of a child node found in a group
* @return {@code true} if {@code nodeName} identifies a group member in this scheme
*/
boolean isMember(String nodeName);
/**
* Generates a node name for the node representing this process in the distributed group.
*
* @param membershipData the data that will be stored in this node
* @return the name for the node that will represent this process in the group
*/
String createName(byte[] membershipData);
/**
* Indicates whether this scheme needs ephemeral sequential nodes or just ephemeral nodes.
*
* @return {@code true} if this scheme requires sequential node names; {@code false} otherwise
*/
boolean isSequential();
}
/**
* Indicates an error watching a group.
*/
public static class WatchException extends Exception {
public WatchException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Watches this group for the lifetime of this jvm process. This method will block until the
* current group members are available, notify the {@code groupChangeListener} and then return.
* All further changes to the group membership will cause notifications on a background thread.
*
* @param groupChangeListener the listener to notify of group membership change events
* @return A command which, when executed, will stop watching the group.
* @throws WatchException if there is a problem generating the 1st group membership list
* @throws InterruptedException if interrupted waiting to gather the 1st group membership list
*/
public final Command watch(final GroupChangeListener groupChangeListener)
throws WatchException, InterruptedException {
Preconditions.checkNotNull(groupChangeListener);
try {
ensurePersistentGroupPath();
} catch (JoinException e) {
throw new WatchException("Failed to create group path: " + path, e);
}
final GroupMonitor groupMonitor = new GroupMonitor(groupChangeListener);
backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, WatchException>() {
@Override public Boolean get() throws WatchException {
try {
groupMonitor.watchGroup();
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new WatchException("Interrupted trying to watch group at path: " + path, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
return null;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
return null;
} else {
throw new WatchException("Problem trying to watch group at path: " + path, e);
}
}
}
});
return new Command() {
@Override public void execute() {
groupMonitor.stopWatching();
}
};
}
/**
* Helps continuously monitor a group for membership changes.
*/
private class GroupMonitor {
private final GroupChangeListener groupChangeListener;
private volatile boolean stopped = false;
private Set<String> members;
GroupMonitor(GroupChangeListener groupChangeListener) {
this.groupChangeListener = groupChangeListener;
}
private final Watcher groupWatcher = new Watcher() {
@Override public final void process(WatchedEvent event) {
if (event.getType() == EventType.NodeChildrenChanged) {
tryWatchGroup();
}
}
};
private final ExceptionalSupplier<Boolean, InterruptedException> tryWatchGroup =
new ExceptionalSupplier<Boolean, InterruptedException>() {
@Override public Boolean get() throws InterruptedException {
try {
watchGroup();
return true;
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
return false;
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING, "Temporary error re-watching group: " + path, e);
return false;
} else {
throw new IllegalStateException("Permanent problem re-watching group: " + path, e);
}
}
}
};
private void tryWatchGroup() {
if (stopped) {
return;
}
try {
backoffHelper.doUntilSuccess(tryWatchGroup);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format("Interrupted while trying to re-watch group: %s, giving up", path), e);
}
}
private void watchGroup()
throws ZooKeeperConnectionException, InterruptedException, KeeperException {
if (stopped) {
return;
}
List<String> children = zkClient.get().getChildren(path, groupWatcher);
setMembers(Iterables.filter(children, nodeNameFilter));
}
private void stopWatching() {
// TODO(William Farner): Cancel the watch when
// https://issues.apache.org/jira/browse/ZOOKEEPER-442 is resolved.
LOG.info("Stopping watch on " + this);
stopped = true;
}
synchronized void setMembers(Iterable<String> members) {
if (stopped) {
LOG.info("Suppressing membership update, no longer watching " + this);
return;
}
if (this.members == null) {
// Reset our watch on the group if session expires - only needs to be registered once.
zkClient.registerExpirationHandler(new Command() {
@Override public void execute() {
tryWatchGroup();
}
});
}
Set<String> membership = ImmutableSet.copyOf(members);
if (!membership.equals(this.members)) {
groupChangeListener.onGroupChange(members);
this.members = membership;
}
}
}
/**
* Default naming scheme implementation. Stores nodes at [given path] + "/" + [given prefix] +
* ZooKeeper-generated member ID. For example, if the path is "/discovery/servicename", and the
* prefix is "member_", the node's full path will look something like
* {@code /discovery/servicename/member_0000000007}.
*/
public static class DefaultScheme implements NodeScheme {
private final String namePrefix;
private final Pattern namePattern;
/**
* Creates a sequential node scheme based on the given node name prefix.
*
* @param namePrefix the prefix for the names of the member nodes
*/
public DefaultScheme(String namePrefix) {
this.namePrefix = MorePreconditions.checkNotBlank(namePrefix);
namePattern = Pattern.compile("^" + Pattern.quote(namePrefix) + "-?[0-9]+$");
}
@Override
public boolean isMember(String nodeName) {
return namePattern.matcher(nodeName).matches();
}
@Override
public String createName(byte[] membershipData) {
return namePrefix;
}
@Override
public boolean isSequential() {
return true;
}
}
@Override
public String toString() {
return "Group " + path;
}
}