/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.common.zookeeper.coordination;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.common.zookeeper.ZKExtOperations;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.google.common.util.concurrent.AbstractService;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import org.apache.twill.common.Cancellable;
import org.apache.twill.common.Threads;
import org.apache.twill.discovery.Discoverable;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.discovery.ServiceDiscovered;
import org.apache.twill.zookeeper.NodeChildren;
import org.apache.twill.zookeeper.NodeData;
import org.apache.twill.zookeeper.ZKClient;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* Allocate resources to registered handler. It is expected to have single instance of this class
* running per ZK namespace (determined by the ZKClient passed to constructor). User can use
* {@link org.apache.twill.internal.zookeeper.LeaderElection} class to help archiving requirement.
*
* ZK structure.
*
* <pre>
* {@code
* /requirements
* /[resource_name] - Contains ResourceRequirement encoded in json
* /assignments
* /[resource_name] - Contains ResourceAssignment encoded in json
* }
* </pre>
*
*/
public final class ResourceCoordinator extends AbstractService {
private static final Logger LOG = LoggerFactory.getLogger(ResourceCoordinator.class);
private final ZKClient zkClient;
private final DiscoveryServiceClient discoveryService;
private final AssignmentStrategy assignmentStrategy;
private final Map<String, ResourceRequirement> requirements;
private final Map<String, ResourceAssignment> assignments;
private final Map<String, CancellableServiceDiscovered> serviceDiscovered;
private final DiscoverableChangeListener discoverableListener;
// A single thread executor to process resource requests and perform allocation.
private ExecutorService executor;
public ResourceCoordinator(ZKClient zkClient,
DiscoveryServiceClient discoveryService,
AssignmentStrategy assignmentStrategy) {
this.zkClient = zkClient;
this.discoveryService = discoveryService;
this.assignmentStrategy = assignmentStrategy;
this.requirements = Maps.newHashMap();
this.assignments = Maps.newHashMap();
this.serviceDiscovered = Maps.newHashMap();
this.discoverableListener = new DiscoverableChangeListener();
}
@Override
protected void doStart() {
executor = Executors.newSingleThreadExecutor(Threads.createDaemonThreadFactory("resource-coordinator"));
beginWatch(wrapWatcher(new ResourceWatcher()));
notifyStarted();
}
@Override
protected void doStop() {
try {
executor.execute(createShutdownTask(null));
} finally {
executor.shutdown();
}
}
/**
* Signals failure in this service and terminates itself.
*
* @param cause Reason for failure.
*/
private void doNotifyFailed(Throwable cause) {
try {
executor.execute(createShutdownTask(cause));
} finally {
executor.shutdown();
}
}
private Runnable createShutdownTask(final Throwable failureCause) {
return new Runnable() {
@Override
public void run() {
for (Cancellable cancellable : serviceDiscovered.values()) {
try {
cancellable.cancel();
} catch (Throwable t) {
LOG.warn("Exception when cancelling service discovery listener.", t);
}
}
if (failureCause == null) {
notifyStopped();
} else {
notifyFailed(failureCause);
}
}
};
}
/**
* Start watching for changes in resources requirements.
*/
private void beginWatch(final Watcher watcher) {
Futures.addCallback(zkClient.exists(CoordinationConstants.REQUIREMENTS_PATH, watcher),
wrapCallback(new FutureCallback<Stat>() {
@Override
public void onSuccess(Stat result) {
if (result != null) {
fetchAndProcessAllResources(watcher);
}
// If the node doesn't exists yet, that's ok, the watcher would handle it once it's created.
}
@Override
public void onFailure(Throwable t) {
// Something very wrong to have exists call failed.
LOG.error("Failed to call exists on ZK node {}{}",
zkClient.getConnectString(), CoordinationConstants.REQUIREMENTS_PATH, t);
doNotifyFailed(t);
}
}), executor);
}
/**
* Fetches all {@link ResourceRequirement} and perform assignment for the one that changed. Also, it will
* remove assignments for the resource requirements that are removed.
*/
private void fetchAndProcessAllResources(final Watcher watcher) {
Futures.addCallback(
zkClient.getChildren(CoordinationConstants.REQUIREMENTS_PATH, watcher),
wrapCallback(new FutureCallback<NodeChildren>() {
@Override
public void onSuccess(NodeChildren result) {
Set<String> children = ImmutableSet.copyOf(result.getChildren());
// Handle new resources
for (String child : children) {
String path = CoordinationConstants.REQUIREMENTS_PATH + "/" + child;
Watcher requirementWatcher = wrapWatcher(new ResourceRequirementWatcher(path));
fetchAndProcessRequirement(path, requirementWatcher);
}
// Handle removed resources
for (String removed: ImmutableSet.copyOf(Sets.difference(requirements.keySet(), children))) {
ResourceRequirement requirement = requirements.remove(removed);
LOG.info("Requirement deleted {}", requirement);
// Delete the assignment node.
removeAssignment(removed);
}
}
@Override
public void onFailure(Throwable t) {
// If the resource path node doesn't exists, resort to watch for exists.
if (t instanceof KeeperException.NoNodeException) {
beginWatch(watcher);
}
// Otherwise, it's a unexpected failure.
LOG.error("Failed to getChildren on ZK node {}{}",
zkClient.getConnectString(), CoordinationConstants.REQUIREMENTS_PATH, t);
doNotifyFailed(t);
}
}), executor);
}
/**
* Gets the data from a resource node, decode it to {@link ResourceRequirement} and performs resource assignment
* if the requirement changed.
*/
private void fetchAndProcessRequirement(final String path, Watcher watcher) {
Futures.addCallback(zkClient.getData(path, watcher), wrapCallback(new FutureCallback<NodeData>() {
@Override
public void onSuccess(NodeData result) {
byte[] nodeData = result.getData();
if (nodeData == null) {
LOG.warn("Ignore empty data in ZK node {}{}", zkClient.getConnectString(), path);
return;
}
try {
ResourceRequirement requirement = CoordinationConstants.RESOURCE_REQUIREMENT_CODEC.decode(nodeData);
LOG.info("Get requirement {}", requirement);
// See if the requirement changed.
ResourceRequirement oldRequirement = requirements.get(requirement.getName());
if (requirement.equals(oldRequirement)) {
LOG.info("Requirement for {} is not changed. No assignment is needed. {} = {}",
requirement.getName(), oldRequirement, requirement);
return;
}
// Requirement change, perform assignment, optional subscribe to service discovery if not yet did.
requirements.put(requirement.getName(), requirement);
CancellableServiceDiscovered discovered = serviceDiscovered.get(requirement.getName());
if (discovered == null) {
discovered = new CancellableServiceDiscovered(discoveryService.discover(requirement.getName()),
discoverableListener, executor);
serviceDiscovered.put(requirement.getName(), discovered);
// If it is the first time it subscribes, no need to trigger assignment logic here as the first call
// to listener.onChange would do so.
} else {
performAssignment(requirement, discovered.serviceDiscovered);
}
} catch (Exception e) {
LOG.warn("Failed to process requirement ZK node {}{}: {}",
zkClient.getConnectString(), path, Bytes.toStringBinary(nodeData), e);
}
}
@Override
public void onFailure(Throwable t) {
// Just log
LOG.error("Failed to getData on ZK node {}{}", zkClient.getConnectString(), path, t);
}
}), executor);
}
/**
* Performs resource assignment based on the resource requirement.
* This method should only be called from the single thread executor owned by this class.
*
* @param requirement The resource requirement that needs to be fulfilled.
* @param serviceDiscovered The set of handlers available.
*/
private void performAssignment(ResourceRequirement requirement, ServiceDiscovered serviceDiscovered) {
ResourceAssignment oldAssignment = assignments.get(requirement.getName());
if (oldAssignment == null) {
// Try to fetch it from ZK.
// This is the penalty to pay for the first time this coordinator instance sees a given requirement.
fetchAndPerformAssignment(requirement, serviceDiscovered);
return;
}
Set<Discoverable> handlers = ImmutableSortedSet.copyOf(DiscoverableComparator.COMPARATOR, serviceDiscovered);
LOG.info("Perform assign for requirement {}. Number of handlers is {}", requirement, handlers.size());
// Build a map from partition name to number of replicas
Map<String, Integer> partitions = Maps.newHashMap();
for (ResourceRequirement.Partition partition : requirement.getPartitions()) {
partitions.put(partition.getName(), partition.getReplicas());
}
// Copy all valid partition replicas assignments and drops the invalid one.
Multimap<Discoverable, PartitionReplica> assignmentMap =
TreeMultimap.create(DiscoverableComparator.COMPARATOR, PartitionReplica.COMPARATOR);
for (Map.Entry<Discoverable, PartitionReplica> entry : oldAssignment.getAssignments().entries()) {
Integer replicas = partitions.get(entry.getValue().getName());
// Check if the partition replica is still valid and if the handler being assigned still exists.
if (replicas != null && entry.getValue().getReplicaId() < replicas && handlers.contains(entry.getKey())) {
assignmentMap.put(entry.getKey(), entry.getValue());
}
}
ResourceAssigner<Discoverable> assigner = DefaultResourceAssigner.create(assignmentMap);
// Call the strategy for assignment only if there are some handlers and the requirement is not empty.
// Otherwise the assignment will be just an empty assignment
if (!handlers.isEmpty() && !partitions.isEmpty()) {
assignmentStrategy.assign(requirement, handlers, assigner);
}
// Save the new assignment
saveAssignment(new ResourceAssignment(requirement.getName(), assigner.get()));
}
/**
* Fetch the {@link ResourceAssignment} from ZK and then perform resource assignment logic. This is done with best
* effort to let the {@link AssignmentStrategy} has access to existing assignment. If failed to get existing
* {@link ResourceAssignment} or if it's simply not exists, assignment will still be triggered as if there is no
* existing assignment.
*
* @param requirement The resource requirement that needs to be fulfilled.
* @param serviceDiscovered The set of handlers available.
*/
private void fetchAndPerformAssignment(final ResourceRequirement requirement,
final ServiceDiscovered serviceDiscovered) {
final String name = requirement.getName();
String zkPath = CoordinationConstants.ASSIGNMENTS_PATH + "/" + name;
Futures.addCallback(zkClient.getData(zkPath), new FutureCallback<NodeData>() {
@Override
public void onSuccess(NodeData result) {
if (assignments.get(name) != null) {
// Assignment should has been performed while this one is fetching. So, ignore this.
return;
}
byte[] data = result.getData();
ResourceAssignment resourceAssignment = new ResourceAssignment(name);
try {
if (data != null) {
resourceAssignment = CoordinationConstants.RESOURCE_ASSIGNMENT_CODEC.decode(data);
}
} catch (Throwable t) {
LOG.warn("Failed to decode resource assignment. Perform assignment as if no assignment existed.", t);
}
assignments.put(name, resourceAssignment);
performAssignment(requirement, serviceDiscovered);
}
@Override
public void onFailure(Throwable t) {
if (!(t instanceof KeeperException.NoNodeException)) {
// If failure is not because node doesn't exists, log a warning
LOG.warn("Failed to fetch current assignment. Perform assignment as if no assignment existed.", t);
}
assignments.put(name, new ResourceAssignment(name));
performAssignment(requirement, serviceDiscovered);
}
}, executor);
}
/**
* Save a {@link ResourceAssignment} to local cache as well as ZK ZK.
* @param assignment The assignment to be persisted.
*/
private void saveAssignment(ResourceAssignment assignment) {
assignments.put(assignment.getName(), assignment);
try {
final byte[] data = CoordinationConstants.RESOURCE_ASSIGNMENT_CODEC.encode(assignment);
String zkPath = CoordinationConstants.ASSIGNMENTS_PATH + "/" + assignment.getName();
Futures.addCallback(
ZKExtOperations.setOrCreate(zkClient, zkPath, data, assignment, CoordinationConstants.MAX_ZK_FAILURE_RETRY),
new FutureCallback<ResourceAssignment>() {
@Override
public void onSuccess(ResourceAssignment result) {
// Done. Just log for debugging.
LOG.info("Resource assignment updated for {}. {}", result.getName(), Bytes.toString(data));
}
@Override
public void onFailure(Throwable t) {
LOG.error("Failed to save assignment {}", Bytes.toStringBinary(data), t);
doNotifyFailed(t);
}
}, executor
);
} catch (Exception e) {
// Something very wrong
LOG.error("Failed to save assignment: {}", assignment.getName(), e);
}
}
/**
* Removes the {@link ResourceAssignment} with the given name from local cache as well as ZK.
* @param name Name of the resource.
*/
private void removeAssignment(String name) {
assignments.remove(name);
String zkPath = CoordinationConstants.ASSIGNMENTS_PATH + "/" + name;
// Simply delete the assignment node. No need to care about the result.
// Even if failed to delete the node and leaves stale assignment, next time when an assignment action is
// triggered, it'll correct it.
zkClient.delete(zkPath);
}
/**
* Returns true if this service is up and running, hence should process any events that it received.
*/
private boolean shouldProcess() {
State state = state();
return state == State.STARTING || state == State.RUNNING;
}
/**
* Wraps a given callback so that it only get triggered if {@link #shouldProcess()} returns true.
*/
private <T> FutureCallback<T> wrapCallback(final FutureCallback<T> callback) {
return new FutureCallback<T>() {
@Override
public void onSuccess(T result) {
if (shouldProcess()) {
callback.onSuccess(result);
}
}
@Override
public void onFailure(Throwable t) {
if (shouldProcess()) {
callback.onFailure(t);
}
}
};
}
/**
* Wraps a given Watcher so that it only get triggered if {@link #shouldProcess()} returns true.
*/
private Watcher wrapWatcher(final Watcher watcher) {
return new Watcher() {
@Override
public void process(WatchedEvent event) {
if (!shouldProcess()) {
return;
}
watcher.process(event);
}
};
}
/**
* Watcher to handle children nodes changes in the resource requirement. Child node will be added / removed
* when requirement is added or removed.
*/
private final class ResourceWatcher implements Watcher {
@Override
public void process(WatchedEvent event) {
if (!shouldProcess()) {
return;
}
switch (event.getType()) {
case NodeCreated:
case NodeChildrenChanged:
fetchAndProcessAllResources(this);
break;
case NodeDeleted:
beginWatch(this);
break;
default:
// No-op
}
}
}
/**
* Watcher to get updates in resource requirement node.
*/
private final class ResourceRequirementWatcher implements Watcher {
private final String path;
private ResourceRequirementWatcher(String path) {
this.path = path;
}
@Override
public void process(WatchedEvent event) {
if (!shouldProcess()) {
return;
}
// Only interested in data change event. Other type of events is handled by the watcher on parent node.
if (event.getType() == Event.EventType.NodeDataChanged) {
fetchAndProcessRequirement(path, this);
}
}
}
private final class DiscoverableChangeListener implements ServiceDiscovered.ChangeListener {
@Override
public void onChange(ServiceDiscovered serviceDiscovered) {
ResourceRequirement requirement = requirements.get(serviceDiscovered.getName());
if (requirement != null) {
performAssignment(requirement, serviceDiscovered);
}
}
}
private static final class CancellableServiceDiscovered implements Cancellable {
private final Cancellable cancellable;
private final ServiceDiscovered serviceDiscovered;
private CancellableServiceDiscovered(ServiceDiscovered serviceDiscovered,
ServiceDiscovered.ChangeListener listener, Executor executor) {
this.cancellable = serviceDiscovered.watchChanges(listener, executor);
this.serviceDiscovered = serviceDiscovered;
}
@Override
public void cancel() {
cancellable.cancel();
}
}
}