/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.common.zookeeper;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Predicates;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import com.google.common.util.concurrent.UncheckedExecutionException;
import org.apache.aurora.common.base.Command;
import org.apache.aurora.common.io.Codec;
import org.apache.aurora.common.net.pool.DynamicHostSet;
import org.apache.aurora.common.thrift.ServiceInstance;
import org.apache.aurora.common.thrift.Status;
import org.apache.aurora.common.util.BackoffHelper;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Preconditions.checkNotNull;
/**
* ZooKeeper-backed implementation of {@link ServerSet} and {@link DynamicHostSet}.
*/
public class ServerSetImpl implements ServerSet, DynamicHostSet<ServiceInstance> {
private static final Logger LOG = LoggerFactory.getLogger(ServerSetImpl.class);
private final ZooKeeperClient zkClient;
private final Group group;
private final Codec<ServiceInstance> codec;
private final BackoffHelper backoffHelper;
/**
* Creates a new ServerSet using open ZooKeeper node ACLs.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param path the name-service path of the service to connect to
*/
public ServerSetImpl(ZooKeeperClient zkClient, String path) {
this(zkClient, ZooDefs.Ids.OPEN_ACL_UNSAFE, path);
}
/**
* Creates a new ServerSet for the given service {@code path}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param acl the ACL to use for creating the persistent group path if it does not already exist
* @param path the name-service path of the service to connect to
*/
public ServerSetImpl(ZooKeeperClient zkClient, Iterable<ACL> acl, String path) {
this(zkClient, new Group(zkClient, acl, path), JSON_CODEC);
}
/**
* Creates a new ServerSet using the given service {@code group}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param group the server group
*/
public ServerSetImpl(ZooKeeperClient zkClient, Group group) {
this(zkClient, group, JSON_CODEC);
}
/**
* Creates a new ServerSet using the given service {@code group} and a custom {@code codec}.
*
* @param zkClient the client to use for interactions with ZooKeeper
* @param group the server group
* @param codec a codec to use for serializing and de-serializing the ServiceInstance data to and
* from a byte array
*/
public ServerSetImpl(ZooKeeperClient zkClient, Group group, Codec<ServiceInstance> codec) {
this.zkClient = checkNotNull(zkClient);
this.group = checkNotNull(group);
this.codec = checkNotNull(codec);
// TODO(John Sirois): Inject the helper so that backoff strategy can be configurable.
backoffHelper = new BackoffHelper();
}
@VisibleForTesting
ZooKeeperClient getZkClient() {
return zkClient;
}
@Override
public EndpointStatus join(
InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints)
throws Group.JoinException, InterruptedException {
checkNotNull(endpoint);
checkNotNull(additionalEndpoints);
MemberStatus memberStatus = new MemberStatus(endpoint, additionalEndpoints);
Supplier<byte[]> serviceInstanceSupplier = memberStatus::serializeServiceInstance;
Group.Membership membership = group.join(serviceInstanceSupplier);
return () -> memberStatus.leave(membership);
}
@Override
public Command watch(HostChangeMonitor<ServiceInstance> monitor) throws MonitorException {
ServerSetWatcher serverSetWatcher = new ServerSetWatcher(zkClient, monitor);
try {
return serverSetWatcher.watch();
} catch (Group.WatchException e) {
throw new MonitorException("ZooKeeper watch failed.", e);
} catch (InterruptedException e) {
throw new MonitorException("Interrupted while watching ZooKeeper.", e);
}
}
private class MemberStatus {
private final InetSocketAddress endpoint;
private final Map<String, InetSocketAddress> additionalEndpoints;
private MemberStatus(
InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints) {
this.endpoint = endpoint;
this.additionalEndpoints = additionalEndpoints;
}
synchronized void leave(Group.Membership membership) throws UpdateException {
try {
membership.cancel();
} catch (Group.JoinException e) {
throw new UpdateException(
"Failed to auto-cancel group membership on transition to DEAD status", e);
}
}
byte[] serializeServiceInstance() {
ServiceInstance serviceInstance = new ServiceInstance(
ServerSets.toEndpoint(endpoint),
Maps.transformValues(additionalEndpoints, ServerSets.TO_ENDPOINT),
Status.ALIVE);
LOG.debug("updating endpoint data to:\n\t" + serviceInstance);
try {
return ServerSets.serializeServiceInstance(serviceInstance, codec);
} catch (IOException e) {
throw new IllegalStateException("Unexpected problem serializing thrift struct " +
serviceInstance + "to a byte[]", e);
}
}
}
private static class ServiceInstanceFetchException extends RuntimeException {
ServiceInstanceFetchException(String message, Throwable cause) {
super(message, cause);
}
}
private static class ServiceInstanceDeletedException extends RuntimeException {
ServiceInstanceDeletedException(String path) {
super(path);
}
}
private class ServerSetWatcher {
private final ZooKeeperClient zkClient;
private final HostChangeMonitor<ServiceInstance> monitor;
@Nullable private ImmutableSet<ServiceInstance> serverSet;
ServerSetWatcher(ZooKeeperClient zkClient, HostChangeMonitor<ServiceInstance> monitor) {
this.zkClient = zkClient;
this.monitor = monitor;
}
public Command watch() throws Group.WatchException, InterruptedException {
Watcher onExpirationWatcher = zkClient.registerExpirationHandler(this::rebuildServerSet);
try {
return group.watch(this::notifyGroupChange);
} catch (Group.WatchException e) {
zkClient.unregister(onExpirationWatcher);
throw e;
} catch (InterruptedException e) {
zkClient.unregister(onExpirationWatcher);
throw e;
}
}
private ServiceInstance getServiceInstance(final String nodePath) {
try {
return backoffHelper.doUntilResult(() -> {
try {
byte[] data = zkClient.get().getData(nodePath, false, null);
return ServerSets.deserializeServiceInstance(data, codec);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ServiceInstanceFetchException(
"Interrupted updating service data for: " + nodePath, e);
} catch (ZooKeeperClient.ZooKeeperConnectionException e) {
LOG.warn("Temporary error trying to updating service data for: " + nodePath, e);
return null;
} catch (NoNodeException e) {
invalidateNodePath(nodePath);
throw new ServiceInstanceDeletedException(nodePath);
} catch (KeeperException e) {
if (zkClient.shouldRetry(e)) {
LOG.warn("Temporary error trying to update service data for: " + nodePath, e);
return null;
} else {
throw new ServiceInstanceFetchException(
"Failed to update service data for: " + nodePath, e);
}
} catch (IOException e) {
throw new ServiceInstanceFetchException(
"Failed to deserialize the ServiceInstance data for: " + nodePath, e);
}
});
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ServiceInstanceFetchException(
"Interrupted trying to update service data for: " + nodePath, e);
}
}
private final LoadingCache<String, ServiceInstance> servicesByMemberId =
CacheBuilder.newBuilder().build(new CacheLoader<String, ServiceInstance>() {
@Override public ServiceInstance load(String memberId) {
return getServiceInstance(group.getMemberPath(memberId));
}
});
private void rebuildServerSet() {
Set<String> memberIds = ImmutableSet.copyOf(servicesByMemberId.asMap().keySet());
servicesByMemberId.invalidateAll();
notifyGroupChange(memberIds);
}
private String invalidateNodePath(String deletedPath) {
String memberId = group.getMemberId(deletedPath);
servicesByMemberId.invalidate(memberId);
return memberId;
}
private final Function<String, ServiceInstance> MAYBE_FETCH_NODE =
memberId -> {
// This get will trigger a fetch
try {
return servicesByMemberId.getUnchecked(memberId);
} catch (UncheckedExecutionException e) {
Throwable cause = e.getCause();
if (!(cause instanceof ServiceInstanceDeletedException)) {
Throwables.propagateIfInstanceOf(cause, ServiceInstanceFetchException.class);
throw new IllegalStateException(
"Unexpected error fetching member data for: " + memberId, e);
}
return null;
}
};
private synchronized void notifyGroupChange(Iterable<String> memberIds) {
ImmutableSet<String> newMemberIds = ImmutableSortedSet.copyOf(memberIds);
Set<String> existingMemberIds = servicesByMemberId.asMap().keySet();
// Ignore no-op state changes except for the 1st when we've seen no group yet.
if ((serverSet == null) || !newMemberIds.equals(existingMemberIds)) {
SetView<String> deletedMemberIds = Sets.difference(existingMemberIds, newMemberIds);
// Implicit removal from servicesByMemberId.
existingMemberIds.removeAll(ImmutableSet.copyOf(deletedMemberIds));
Iterable<ServiceInstance> serviceInstances = Iterables.filter(
Iterables.transform(newMemberIds, MAYBE_FETCH_NODE), Predicates.notNull());
notifyServerSetChange(ImmutableSet.copyOf(serviceInstances));
}
}
private void notifyServerSetChange(ImmutableSet<ServiceInstance> currentServerSet) {
// ZK nodes may have changed if there was a session expiry for a server in the server set, but
// if the server's status has not changed, we can skip any onChange updates.
if (!currentServerSet.equals(serverSet)) {
if (currentServerSet.isEmpty()) {
LOG.warn("server set empty for path " + group.getPath());
} else {
if (serverSet == null) {
LOG.info("received initial membership {}", currentServerSet);
} else {
logChange(currentServerSet);
}
}
serverSet = currentServerSet;
monitor.onChange(serverSet);
}
}
private void logChange(ImmutableSet<ServiceInstance> newServerSet) {
StringBuilder message = new StringBuilder("server set " + group.getPath() + " change: ");
if (serverSet.size() != newServerSet.size()) {
message.append("from ").append(serverSet.size())
.append(" members to ").append(newServerSet.size());
}
Joiner joiner = Joiner.on("\n\t\t");
SetView<ServiceInstance> left = Sets.difference(serverSet, newServerSet);
if (!left.isEmpty()) {
message.append("\n\tleft:\n\t\t").append(joiner.join(left));
}
SetView<ServiceInstance> joined = Sets.difference(newServerSet, serverSet);
if (!joined.isEmpty()) {
message.append("\n\tjoined:\n\t\t").append(joiner.join(joined));
}
LOG.info(message.toString());
}
}
}