// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.zookeeper;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Throwables;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.MapDifference;
import com.google.common.collect.MapDifference.ValueDifference;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.gson.GsonBuilder;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import com.twitter.common.args.Arg;
import com.twitter.common.args.CmdLine;
import com.twitter.common.base.Command;
import com.twitter.common.base.Function;
import com.twitter.common.base.Supplier;
import com.twitter.common.io.Codec;
import com.twitter.common.io.CompatibilityCodec;
import com.twitter.common.io.JsonCodec;
import com.twitter.common.io.ThriftCodec;
import com.twitter.common.util.BackoffHelper;
import com.twitter.common.zookeeper.Group.GroupChangeListener;
import com.twitter.common.zookeeper.Group.JoinException;
import com.twitter.common.zookeeper.Group.Membership;
import com.twitter.common.zookeeper.Group.WatchException;
import com.twitter.common.zookeeper.ZooKeeperClient.ZooKeeperConnectionException;
import com.twitter.thrift.Endpoint;
import com.twitter.thrift.ServiceInstance;
import com.twitter.thrift.Status;
/**
* Implementation of {@link ServerSet}.
*
* @author John Sirois
*/
public class ServerSetImpl implements ServerSet {
private static final Logger LOG = Logger.getLogger(ServerSetImpl.class.getName());
@CmdLine(name = "serverset_encode_json",
help = "If true, use JSON for encoding server set information. Defaults to false (use Thrift).")
private static final Arg<Boolean> ENCODE_JSON = Arg.create(false);
private final ZooKeeperClient zkClient;
private final Group group;
private final Codec<ServiceInstance> codec;
private final BackoffHelper backoffHelper;
/**
* Creates a new ServerSet using open ZooKeeper node ACLs.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param path the name-service path of the service to connect to
*/
public ServerSetImpl(ZooKeeperClient zkClient, String path) {
this(zkClient, ZooDefs.Ids.OPEN_ACL_UNSAFE, path);
}
/**
* Creates a new ServerSet for the given service {@code path}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param acl the ACL to use for creating the persistent group path if it does not already exist
* @param path the name-service path of the service to connect to
*/
public ServerSetImpl(ZooKeeperClient zkClient, Iterable<ACL> acl, String path) {
this(zkClient, new Group(zkClient, acl, path), createDefaultCodec());
}
/**
* Creates a new ServerSet using the given service {@code group}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param group the server group
*/
public ServerSetImpl(ZooKeeperClient zkClient, Group group) {
this(zkClient, group, createDefaultCodec());
}
/**
* Creates a new ServerSet using the given service {@code group} and a custom {@code codec}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param group the server group
* @param codec a codec to use for serializing and de-serializing the ServiceInstance data to and
* from a byte array
*/
public ServerSetImpl(ZooKeeperClient zkClient, Group group, Codec<ServiceInstance> codec) {
this.zkClient = Preconditions.checkNotNull(zkClient);
this.group = Preconditions.checkNotNull(group);
this.codec = Preconditions.checkNotNull(codec);
// TODO(John Sirois): Inject the helper so that backoff strategy can be configurable.
backoffHelper = new BackoffHelper();
}
@VisibleForTesting
ZooKeeperClient getZkClient() {
return zkClient;
}
@Override
public EndpointStatus join(InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints, Status status)
throws JoinException, InterruptedException {
Preconditions.checkNotNull(endpoint);
Preconditions.checkNotNull(additionalEndpoints);
Preconditions.checkNotNull(status);
final MemberStatus memberStatus = new MemberStatus(endpoint, additionalEndpoints, status);
Supplier<byte[]> serviceInstanceSupplier = new Supplier<byte[]>() {
@Override public byte[] get() {
return memberStatus.serializeServiceInstance();
}
};
final Membership membership = group.join(serviceInstanceSupplier);
return new EndpointStatus() {
@Override public void update(Status status) throws UpdateException {
Preconditions.checkNotNull(status);
memberStatus.updateStatus(membership, status);
}
};
}
@Override
public void monitor(final HostChangeMonitor<ServiceInstance> monitor) throws MonitorException {
ServerSetWatcher serverSetWatcher = new ServerSetWatcher(zkClient, monitor);
try {
serverSetWatcher.watch();
} catch (WatchException e) {
throw new MonitorException("ZooKeeper watch failed.", e);
} catch (InterruptedException e) {
throw new MonitorException("Interrupted while watching ZooKeeper.", e);
}
}
private class MemberStatus {
private final InetSocketAddress endpoint;
private final Map<String, InetSocketAddress> additionalEndpoints;
private volatile Status status;
private MemberStatus(InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints, Status status) {
this.endpoint = endpoint;
this.additionalEndpoints = additionalEndpoints;
this.status = status;
}
synchronized void updateStatus(Membership membership, Status status) throws UpdateException {
if (this.status != status) {
this.status = status;
if (Status.DEAD == status) {
try {
membership.cancel();
} catch (JoinException e) {
throw new UpdateException(
"Failed to auto-cancel group membership on transition to DEAD status", e);
}
} else {
try {
membership.updateMemberData();
} catch (Group.UpdateException e) {
throw new UpdateException(
"Failed to update service data for: " + membership.getMemberPath(), e);
}
}
}
}
byte[] serializeServiceInstance() {
ServiceInstance serviceInstance =
new ServiceInstance(toEndpoint(endpoint),
Maps.transformValues(additionalEndpoints, TO_ENDPOINT), status);
LOG.info("updating endpoint data to:\n\t" + serviceInstance);
ByteArrayOutputStream output = new ByteArrayOutputStream();
try {
codec.serialize(serviceInstance, output);
} catch (IOException e) {
throw new IllegalStateException("Unexpected problem serializing thrift struct: " +
serviceInstance + " to a byte[]", e);
}
return output.toByteArray();
}
}
private static final Function<InetSocketAddress, Endpoint> TO_ENDPOINT =
new Function<InetSocketAddress, Endpoint>() {
@Override public Endpoint apply(InetSocketAddress address) {
return toEndpoint(address);
}
};
private static Endpoint toEndpoint(InetSocketAddress address) {
return new Endpoint(address.getHostName(), address.getPort());
}
private static class ServiceInstanceFetchException extends RuntimeException {
ServiceInstanceFetchException(String message, Throwable cause) {
super(message, cause);
}
}
private static class ServiceInstanceDeletedException extends RuntimeException {
ServiceInstanceDeletedException(String path) {
super(path);
}
}
private static final Function<ServiceInstance, Endpoint> GET_PRIMARY_ENDPOINT =
new Function<ServiceInstance, Endpoint>() {
@Override public Endpoint apply(ServiceInstance serviceInstance) {
return serviceInstance.getServiceEndpoint();
}
};
private class ServerSetWatcher {
private final ZooKeeperClient zkClient;
private final HostChangeMonitor<ServiceInstance> monitor;
@Nullable private ImmutableSet<ServiceInstance> serverSet;
ServerSetWatcher(ZooKeeperClient zkClient, HostChangeMonitor<ServiceInstance> monitor) {
this.zkClient = zkClient;
this.monitor = monitor;
}
public void watch() throws WatchException, InterruptedException {
zkClient.registerExpirationHandler(new Command() {
@Override public void execute() {
// Servers may have changed Status while we were disconnected from ZooKeeper, check and
// re-register our node watches.
rebuildServerSet();
}
});
group.watch(new GroupChangeListener() {
@Override public void onGroupChange(Iterable<String> memberIds) {
notifyGroupChange(memberIds);
}
});
}
private Watcher serviceInstanceWatcher = new Watcher() {
@Override public void process(WatchedEvent event) {
if (event.getState() == KeeperState.SyncConnected) {
switch (event.getType()) {
case None:
// Ignore re-connects that happen while we're watching
break;
case NodeDeleted:
// Ignore deletes since these trigger a group change through the group node watch.
break;
case NodeDataChanged:
notifyNodeChange(event.getPath());
break;
case NodeCreated:
// This watcher is only applied to ephemeral sequential server set member nodes we
// already know the path of (ie: the ephemeral sequential exists and we're told about
// this by reading children). Its not clear how we can get a NodeCreated event for a
// node we already know about - but this appears to occur in the wild. Firing a
// change here is safe even if the event path does not represent a server set member.
// The node de-serializer will throw ServiceInstanceFetchException in this case and
// these exceptions are logged and filtered out of member sets.
notifyNodeChange(event.getPath());
// TODO(John Sirois): inject a Statsprovider and track these events in a stat
LOG.warning("Unexpected NodeCreated event while watching service node: " +
event.getPath());
break;
default:
LOG.severe("Unexpected event watching service node: " + event);
}
}
}
};
private ServiceInstance getServiceInstance(final String nodePath) {
try {
return backoffHelper.doUntilResult(new Supplier<ServiceInstance>() {
@Override public ServiceInstance get() {
try {
byte[] data = zkClient.get().getData(nodePath, serviceInstanceWatcher, null);
return codec.deserialize(new ByteArrayInputStream(data));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ServiceInstanceFetchException(
"Interrupted updating service data for: " + nodePath, e);
} catch (ZooKeeperConnectionException e) {
LOG.log(Level.WARNING,
"Temporary error trying to updating service data for: " + nodePath, e);
return null;
} catch (NoNodeException e) {
invalidateNodePath(nodePath);
throw new ServiceInstanceDeletedException(nodePath);
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.log(Level.WARNING,
"Temporary error trying to update service data for: " + nodePath, e);
return null;
} else {
throw new ServiceInstanceFetchException(
"Failed to update service data for: " + nodePath, e);
}
} catch (IOException e) {
throw new ServiceInstanceFetchException(
"Failed to deserialize the ServiceInstance data for: " + nodePath, e);
}
}
});
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ServiceInstanceFetchException(
"Interrupted trying to update service data for: " + nodePath, e);
}
}
private final LoadingCache<String, ServiceInstance> servicesByMemberId =
CacheBuilder.newBuilder().build(new CacheLoader<String, ServiceInstance>() {
@Override public ServiceInstance load(String memberId) {
return getServiceInstance(group.getMemberPath(memberId));
}
});
private void rebuildServerSet() {
Set<String> memberIds = ImmutableSet.copyOf(servicesByMemberId.asMap().keySet());
servicesByMemberId.invalidateAll();
notifyGroupChange(memberIds);
}
private void notifyNodeChange(String changedPath) {
// Invalidate the associated ServiceInstance to trigger a fetch on group notify.
String memberId = invalidateNodePath(changedPath);
notifyGroupChange(
Iterables.concat(servicesByMemberId.asMap().keySet(), ImmutableList.of(memberId)));
}
private String invalidateNodePath(String deletedPath) {
String memberId = group.getMemberId(deletedPath);
servicesByMemberId.invalidate(memberId);
return memberId;
}
private final Function<String, ServiceInstance> MAYBE_FETCH_NODE =
new Function<String, ServiceInstance>() {
@Override public ServiceInstance apply(String memberId) {
// This get will trigger a fetch
try {
return servicesByMemberId.getUnchecked(memberId);
} catch (UncheckedExecutionException e) {
Throwable cause = e.getCause();
if (!(cause instanceof ServiceInstanceDeletedException)) {
Throwables.propagateIfInstanceOf(cause, ServiceInstanceFetchException.class);
throw new IllegalStateException(
"Unexpected error fetching member data for: " + memberId, e);
}
return null;
}
}
};
private synchronized void notifyGroupChange(Iterable<String> memberIds) {
ImmutableSet<String> newMemberIds = ImmutableSortedSet.copyOf(memberIds);
Set<String> existingMemberIds = servicesByMemberId.asMap().keySet();
// Ignore no-op state changes except for the 1st when we've seen no group yet.
if ((serverSet == null) || !newMemberIds.equals(existingMemberIds)) {
SetView<String> deletedMemberIds = Sets.difference(existingMemberIds, newMemberIds);
// Implicit removal from servicesByMemberId.
existingMemberIds.removeAll(ImmutableSet.copyOf(deletedMemberIds));
Iterable<ServiceInstance> serviceInstances = Iterables.filter(
Iterables.transform(newMemberIds, MAYBE_FETCH_NODE), Predicates.notNull());
notifyServerSetChange(ImmutableSet.copyOf(serviceInstances));
}
}
private void notifyServerSetChange(ImmutableSet<ServiceInstance> currentServerSet) {
// ZK nodes may have changed if there was a session expiry for a server in the server set, but
// if the server's status has not changed, we can skip any onChange updates.
if (!currentServerSet.equals(serverSet)) {
if (currentServerSet.isEmpty()) {
LOG.warning("server set empty!");
} else {
if (LOG.isLoggable(Level.INFO)) {
if (serverSet == null) {
LOG.info("received initial membership " + currentServerSet);
} else {
logChange(Level.INFO, currentServerSet);
}
}
}
serverSet = currentServerSet;
monitor.onChange(serverSet);
}
}
private void logChange(Level level, ImmutableSet<ServiceInstance> newServerSet) {
StringBuilder message = new StringBuilder("server set change: ");
if (serverSet.size() != newServerSet.size()) {
message.append("from ").append(serverSet.size())
.append(" members to ").append(newServerSet.size());
}
MapDifference<Endpoint, ServiceInstance> changes =
Maps.difference(
Maps.uniqueIndex(serverSet, GET_PRIMARY_ENDPOINT),
Maps.uniqueIndex(newServerSet, GET_PRIMARY_ENDPOINT));
Joiner joiner = Joiner.on("\n\t\t");
Map<Endpoint, ServiceInstance> left = changes.entriesOnlyOnLeft();
if (!left.isEmpty()) {
message.append("\n\tleft:\n\t\t").append(joiner.join(left.values()));
}
Map<Endpoint, ServiceInstance> joined = changes.entriesOnlyOnRight();
if (!joined.isEmpty()) {
message.append("\n\tjoined:\n\t\t").append(joiner.join(joined.values()));
}
Map<Endpoint, ValueDifference<ServiceInstance>> differing = changes.entriesDiffering();
if (!differing.isEmpty()) {
message.append("\n\tstatus changed:\n\t\t").append(joiner.join(differing.values()));
}
LOG.log(level, message.toString());
}
}
private static Codec<ServiceInstance> createCodec(final boolean useJsonEncoding) {
final Codec<ServiceInstance> json = JsonCodec.create(ServiceInstance.class, new GsonBuilder()
.setExclusionStrategies(JsonCodec.getThriftExclusionStrategy()).create());
final Codec<ServiceInstance> thrift = ThriftCodec.create(ServiceInstance.class,
ThriftCodec.BINARY_PROTOCOL);
final Predicate<byte[]> recognizer = new Predicate<byte[]>() {
public boolean apply(byte[] input) {
return (input.length > 1 && input[0] == '{' && input[1] == '\"') == useJsonEncoding;
}
};
if (useJsonEncoding) {
return CompatibilityCodec.create(json, thrift, 2, recognizer);
}
return CompatibilityCodec.create(thrift, json, 2, recognizer);
}
/**
* Creates a codec for {@link ServiceInstance} objects that uses Thrift binary encoding, and can
* decode both Thrift and JSON encodings.
*
* @return a new codec instance.
*/
public static Codec<ServiceInstance> createThriftCodec() {
return createCodec(false);
}
/**
* Creates a codec for {@link ServiceInstance} objects that uses JSON encoding, and can decode
* both Thrift and JSON encodings.
*
* @return a new codec instance.
*/
public static Codec<ServiceInstance> createJsonCodec() {
return createCodec(true);
}
/**
* Returns a codec for {@link ServiceInstance} objects that uses either the Thrift or the JSON
* encoding, depending on whether the command line argument <tt>serverset_json_encofing</tt> is
* set to <tt>true</tt>, and can decode both Thrift and JSON encodings.
*
* @return a new codec instance.
*/
public static Codec<ServiceInstance> createDefaultCodec() {
return createCodec(ENCODE_JSON.get());
}
}