/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.discovery;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Maps;
import com.google.common.io.Closer;
import org.apache.aurora.common.base.MorePreconditions;
import org.apache.aurora.common.io.Codec;
import org.apache.aurora.common.thrift.Endpoint;
import org.apache.aurora.common.thrift.ServiceInstance;
import org.apache.aurora.common.thrift.Status;
import org.apache.aurora.common.zookeeper.SingletonService;
import org.apache.aurora.scheduler.base.AsyncUtil;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.leader.CancelLeadershipException;
import org.apache.curator.framework.recipes.leader.LeaderSelector;
import org.apache.curator.framework.recipes.leader.LeaderSelectorListener;
import org.apache.curator.framework.recipes.nodes.PersistentNode;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.utils.PathUtils;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull;
class CuratorSingletonService implements SingletonService {
private static final Logger LOG = LoggerFactory.getLogger(CuratorSingletonService.class);
// This is the complement of the CuratorServiceGroupMonitor, it allows advertisement of a leader
// in a service group.
private static class Advertiser {
private final String groupPath;
private final String memberToken;
private final CuratorFramework client;
private final Codec<ServiceInstance> codec;
Advertiser(
CuratorFramework client,
String groupPath,
String memberToken,
Codec<ServiceInstance> codec) {
this.client = requireNonNull(client);
this.groupPath = PathUtils.validatePath(groupPath);
this.memberToken = MorePreconditions.checkNotBlank(memberToken);
this.codec = requireNonNull(codec);
}
void advertise(
Closer closer,
InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints)
throws AdvertiseException, InterruptedException {
byte[] nodeData = serializeAdvertisement(endpoint, additionalEndpoints);
PersistentNode persistentNode =
new PersistentNode(
client,
CreateMode.EPHEMERAL_SEQUENTIAL,
// TODO(John Sirois): Enable GUID protection once clients are updated to support
// its effects on group member node naming. We get nodes like:
// 4f5f98c4-8e71-41e3-8c8d-1c9a1f5f5df9-member_000000001
// Clients expect member_ is the prefix and are not prepared for the GUID.
false /* GUID protection */,
ZKPaths.makePath(groupPath, memberToken),
nodeData);
persistentNode.start();
closer.register(persistentNode);
// NB: This blocks on initial server set node population to emulate legacy
// SingletonService.LeaderControl.advertise (Group.join) behavior. Asynchronous
// population is an option though, we simply need to remove this wait.
if (!persistentNode.waitForInitialCreate(Long.MAX_VALUE, TimeUnit.DAYS)) {
throw new AdvertiseException("Timed out waiting for leader advertisement.");
}
}
private byte[] serializeAdvertisement(
InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints)
throws AdvertiseException {
ServiceInstance serviceInstance =
new ServiceInstance(
asEndpoint(endpoint),
Maps.transformValues(additionalEndpoints, Advertiser::asEndpoint),
Status.ALIVE);
ByteArrayOutputStream sink = new ByteArrayOutputStream();
try {
codec.serialize(serviceInstance, sink);
} catch (IOException e) {
throw new AdvertiseException(
"Problem serializing service instance data for " + serviceInstance, e);
}
return sink.toByteArray();
}
private static Endpoint asEndpoint(InetSocketAddress endpoint) {
return new Endpoint(endpoint.getHostName(), endpoint.getPort());
}
}
private final Advertiser advertiser;
private final CuratorFramework client;
private final String groupPath;
/**
* Creates a {@code SingletonService} backed by Curator.
*
* @param client A client to interact with a ZooKeeper ensemble.
* @param groupPath The root ZooKeeper path service members advertise their presence under.
* @param memberToken A token used to form service member node names.
* @param codec A codec that can be used to deserialize group member {@link ServiceInstance} data.
*/
CuratorSingletonService(
CuratorFramework client,
String groupPath,
String memberToken,
Codec<ServiceInstance> codec) {
advertiser = new Advertiser(client, groupPath, memberToken, codec);
this.client = client;
this.groupPath = PathUtils.validatePath(groupPath);
}
@Override
public synchronized void lead(
InetSocketAddress endpoint,
Map<String, InetSocketAddress> additionalEndpoints,
LeadershipListener listener)
throws LeadException, InterruptedException {
requireNonNull(endpoint);
requireNonNull(additionalEndpoints);
requireNonNull(listener);
Closer closer = Closer.create();
CountDownLatch giveUpLeadership = new CountDownLatch(1);
// We do not use the suggested `LeaderSelectorListenerAdapter` or the LeaderLatch class
// because we want to have precise control over state changes. By default the listener and the
// latch class treat `SUSPENDED` (connection loss) as fatal and a reason to lose leadership.
// To make the scheduler resilient to connection blips and long GC pauses, we only treat
// `LOST` (session loss) as fatal.
ExecutorService executor =
AsyncUtil.singleThreadLoggingScheduledExecutor("LeaderSelector-%d", LOG);
LeaderSelectorListener leaderSelectorListener = new LeaderSelectorListener() {
@Override
public void takeLeadership(CuratorFramework curatorFramework) throws Exception {
listener.onLeading(new LeaderControl() {
@Override
public void advertise() throws AdvertiseException, InterruptedException {
advertiser.advertise(closer, endpoint, additionalEndpoints);
}
@Override
public void leave() throws LeaveException {
try {
giveUpLeadership.countDown();
closer.close();
} catch (IOException e) {
throw new LeaveException("Failed to abdicate leadership of group at " + groupPath, e);
}
}
});
// The contract is to block as long as we want leadership. The leader never gives up
// leadership voluntarily, only when asked to shutdown so we block until our shutdown
// callback has been executed or we have lost our ZK connection.
giveUpLeadership.await();
}
@Override
public void stateChanged(CuratorFramework curatorFramework, ConnectionState newState) {
if (newState == ConnectionState.LOST) {
giveUpLeadership.countDown();
listener.onDefeated();
throw new CancelLeadershipException();
}
}
};
LeaderSelector leaderSelector =
new LeaderSelector(client, groupPath, executor, leaderSelectorListener);
leaderSelector.setId(endpoint.getHostName());
try {
leaderSelector.start();
} catch (Exception e) {
// NB: We failed to lead; so we never could have advertised and there is no need to close the
// closer.
throw new LeadException("Failed to begin awaiting leadership of group " + groupPath, e);
}
closer.register(leaderSelector);
}
}