/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream.service;
import co.cask.cdap.api.data.stream.StreamSpecification;
import co.cask.cdap.api.metrics.MetricStore;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.stream.notification.StreamSizeNotification;
import co.cask.cdap.common.zookeeper.coordination.BalancedAssignmentStrategy;
import co.cask.cdap.common.zookeeper.coordination.PartitionReplica;
import co.cask.cdap.common.zookeeper.coordination.ResourceCoordinator;
import co.cask.cdap.common.zookeeper.coordination.ResourceCoordinatorClient;
import co.cask.cdap.common.zookeeper.coordination.ResourceHandler;
import co.cask.cdap.common.zookeeper.coordination.ResourceModifier;
import co.cask.cdap.common.zookeeper.coordination.ResourceRequirement;
import co.cask.cdap.data.stream.StreamCoordinatorClient;
import co.cask.cdap.data.stream.StreamLeaderListener;
import co.cask.cdap.data.stream.StreamPropertyListener;
import co.cask.cdap.data.stream.service.heartbeat.HeartbeatPublisher;
import co.cask.cdap.data.stream.service.heartbeat.StreamWriterHeartbeat;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.notifications.feeds.NotificationFeedException;
import co.cask.cdap.notifications.feeds.NotificationFeedManager;
import co.cask.cdap.notifications.feeds.NotificationFeedNotFoundException;
import co.cask.cdap.notifications.service.NotificationContext;
import co.cask.cdap.notifications.service.NotificationHandler;
import co.cask.cdap.notifications.service.NotificationService;
import co.cask.cdap.proto.Id;
import com.google.common.base.Function;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.Uninterruptibles;
import com.google.inject.Inject;
import org.apache.twill.api.ElectionHandler;
import org.apache.twill.api.TwillRunnable;
import org.apache.twill.common.Cancellable;
import org.apache.twill.common.Threads;
import org.apache.twill.discovery.Discoverable;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.internal.zookeeper.LeaderElection;
import org.apache.twill.zookeeper.ZKClient;
import org.apache.twill.zookeeper.ZKClients;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.reflect.Type;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import javax.annotation.Nullable;
/**
* Stream service running in a {@link TwillRunnable}. It is responsible for sending {@link StreamWriterHeartbeat}s
* at a fixed rate, describing the sizes of the stream files on which this service writes data, for each stream.
*/
public class DistributedStreamService extends AbstractStreamService {
private static final Logger LOG = LoggerFactory.getLogger(DistributedStreamService.class);
private static final String STREAMS_COORDINATOR = "streams.coordinator";
private final ZKClient zkClient;
private final StreamAdmin streamAdmin;
private final DiscoveryServiceClient discoveryServiceClient;
private final StreamWriterSizeCollector streamWriterSizeCollector;
private final HeartbeatPublisher heartbeatPublisher;
private final StreamMetaStore streamMetaStore;
private final ResourceCoordinatorClient resourceCoordinatorClient;
private final NotificationService notificationService;
private final NotificationFeedManager feedManager;
private final Set<StreamLeaderListener> leaderListeners;
private final int instanceId;
private Cancellable leaderListenerCancellable;
private final ConcurrentMap<Id.Stream, StreamSizeAggregator> aggregators;
private Cancellable heartbeatsSubscription;
private Supplier<Discoverable> discoverableSupplier;
private LeaderElection leaderElection;
private ResourceCoordinator resourceCoordinator;
private Cancellable coordinationSubscription;
private ExecutorService heartbeatsSubscriptionExecutor;
@Inject
public DistributedStreamService(CConfiguration cConf,
StreamAdmin streamAdmin,
StreamCoordinatorClient streamCoordinatorClient,
StreamFileJanitorService janitorService,
ZKClient zkClient,
DiscoveryServiceClient discoveryServiceClient,
StreamMetaStore streamMetaStore,
Supplier<Discoverable> discoverableSupplier,
StreamWriterSizeCollector streamWriterSizeCollector,
HeartbeatPublisher heartbeatPublisher,
NotificationFeedManager feedManager,
NotificationService notificationService,
MetricStore metricStore) {
super(streamCoordinatorClient, janitorService, streamWriterSizeCollector, metricStore);
this.zkClient = zkClient;
this.streamAdmin = streamAdmin;
this.notificationService = notificationService;
this.discoveryServiceClient = discoveryServiceClient;
this.streamMetaStore = streamMetaStore;
this.discoverableSupplier = discoverableSupplier;
this.feedManager = feedManager;
this.streamWriterSizeCollector = streamWriterSizeCollector;
this.heartbeatPublisher = heartbeatPublisher;
this.resourceCoordinatorClient = new ResourceCoordinatorClient(getCoordinatorZKClient());
this.leaderListeners = Sets.newHashSet();
this.instanceId = cConf.getInt(Constants.Stream.CONTAINER_INSTANCE_ID);
this.aggregators = Maps.newConcurrentMap();
}
@Override
protected void initialize() throws Exception {
LOG.info("Initializing DistributedStreamService.");
createHeartbeatsFeed();
heartbeatPublisher.startAndWait();
resourceCoordinatorClient.startAndWait();
coordinationSubscription = resourceCoordinatorClient.subscribe(discoverableSupplier.get().getName(),
new StreamsLeaderHandler());
heartbeatsSubscriptionExecutor = Executors.newSingleThreadExecutor(
Threads.createDaemonThreadFactory("heartbeats-subscription-executor"));
heartbeatsSubscription = subscribeToHeartbeatsFeed();
leaderListenerCancellable = addLeaderListener(new StreamLeaderListener() {
@Override
public void leaderOf(Set<Id.Stream> streamIds) {
aggregate(streamIds);
}
});
performLeaderElection();
LOG.info("DistributedStreamService initialized.");
}
@Override
protected void doShutdown() throws Exception {
for (StreamSizeAggregator aggregator : aggregators.values()) {
aggregator.cancel();
}
if (leaderListenerCancellable != null) {
leaderListenerCancellable.cancel();
}
if (heartbeatsSubscription != null) {
heartbeatsSubscription.cancel();
}
if (heartbeatsSubscriptionExecutor != null) {
heartbeatsSubscriptionExecutor.shutdownNow();
}
heartbeatPublisher.stopAndWait();
if (leaderElection != null) {
Uninterruptibles.getUninterruptibly(leaderElection.stop(), 5, TimeUnit.SECONDS);
}
if (coordinationSubscription != null) {
coordinationSubscription.cancel();
}
if (resourceCoordinatorClient != null) {
resourceCoordinatorClient.stopAndWait();
}
}
@Override
protected void runOneIteration() throws Exception {
LOG.trace("Performing heartbeat publishing in Stream service instance {}", instanceId);
ImmutableMap.Builder<Id.Stream, Long> sizes = ImmutableMap.builder();
Map<Id.Stream, AtomicLong> streamSizes = streamWriterSizeCollector.getStreamSizes();
for (Map.Entry<Id.Stream, AtomicLong> streamSize : streamSizes.entrySet()) {
sizes.put(streamSize.getKey(), streamSize.getValue().get());
}
StreamWriterHeartbeat heartbeat = new StreamWriterHeartbeat(System.currentTimeMillis(), instanceId, sizes.build());
LOG.trace("Publishing heartbeat {}", heartbeat);
heartbeatPublisher.sendHeartbeat(heartbeat);
}
/**
* Perform aggregation on the Streams described by the {@code streamIds}, and no other Streams.
* If aggregation was previously done on other Streams, those must be cancelled.
*
* @param streamIds Ids of the streams to perform data sizes aggregation on
*/
private void aggregate(Set<Id.Stream> streamIds) {
Set<Id.Stream> existingAggregators = Sets.newHashSet(aggregators.keySet());
for (Id.Stream streamId : streamIds) {
if (existingAggregators.remove(streamId)) {
continue;
}
while (true) {
try {
if (!streamAdmin.exists(streamId)) {
break;
}
int threshold = streamAdmin.getConfig(streamId).getNotificationThresholdMB();
long eventsSize = getStreamEventsSize(streamId);
createSizeAggregator(streamId, eventsSize, threshold);
LOG.debug("Size of the events ingested in stream {}: {}", streamId, eventsSize);
break;
} catch (Exception e) {
LOG.info("Could not compute sizes of files for stream {}. Retrying in 1 sec.", streamId);
try {
TimeUnit.SECONDS.sleep(1);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw Throwables.propagate(ie);
}
}
}
}
// Stop aggregating the heartbeats we used to listen to before the call to that method,
// but don't anymore
for (Id.Stream outdatedStream : existingAggregators) {
// We need to first cancel the aggregator and then remove it from the map of aggregators,
// to avoid race conditions in createSizeAggregator
StreamSizeAggregator aggregator = aggregators.get(outdatedStream);
if (aggregator != null) {
aggregator.cancel();
}
aggregators.remove(outdatedStream);
}
}
/**
* Create a new aggregator for the {@code streamId}, and add it to the existing map of {@link Cancellable}
* {@code aggregators}. This method does not cancel previously existing aggregator associated to the
* {@code streamId}.
*
* @param streamId stream Id to create a new aggregator for
* @param baseCount stream size from which to start aggregating
* @param threshold notification threshold after which to publish a notification - in MB
* @return the created {@link StreamSizeAggregator}
*/
private StreamSizeAggregator createSizeAggregator(Id.Stream streamId, long baseCount, int threshold) {
LOG.debug("Creating size aggregator for stream {} with baseCount {} and threshold {}",
streamId, baseCount, threshold);
// Handle threshold changes
final Cancellable thresholdSubscription =
getStreamCoordinatorClient().addListener(streamId, new StreamPropertyListener() {
@Override
public void thresholdChanged(Id.Stream streamId, int threshold) {
StreamSizeAggregator aggregator = aggregators.get(streamId);
while (aggregator == null) {
Thread.yield();
aggregator = aggregators.get(streamId);
}
aggregator.setStreamThresholdMB(threshold);
}
});
StreamSizeAggregator newAggregator = new StreamSizeAggregator(streamId, baseCount, threshold,
thresholdSubscription);
newAggregator.init();
aggregators.put(streamId, newAggregator);
return newAggregator;
}
private ZKClient getCoordinatorZKClient() {
return ZKClients.namespace(zkClient, Constants.Stream.STREAM_ZK_COORDINATION_NAMESPACE);
}
/**
* Subscribe to the streams heartbeat notification feed. One heartbeat contains data for all existing streams,
* we filter that to only take into account the streams that this {@link DistributedStreamService} is a leader
* of.
*
* @return a {@link Cancellable} to cancel the subscription
* @throws NotificationFeedNotFoundException if the heartbeat feed does not exist
*/
private Cancellable subscribeToHeartbeatsFeed() throws NotificationFeedNotFoundException {
LOG.debug("Subscribing to stream heartbeats notification feed");
final Id.NotificationFeed heartbeatsFeed = new Id.NotificationFeed.Builder()
.setNamespaceId(Id.Namespace.SYSTEM.getId())
.setCategory(Constants.Notification.Stream.STREAM_INTERNAL_FEED_CATEGORY)
.setName(Constants.Notification.Stream.STREAM_HEARTBEAT_FEED_NAME)
.build();
boolean isRetry = false;
while (true) {
try {
return notificationService.subscribe(heartbeatsFeed, new NotificationHandler<StreamWriterHeartbeat>() {
@Override
public Type getNotificationType() {
return StreamWriterHeartbeat.class;
}
@Override
public void received(StreamWriterHeartbeat heartbeat, NotificationContext notificationContext) {
LOG.trace("Received heartbeat {}", heartbeat);
for (Map.Entry<Id.Stream, Long> entry : heartbeat.getStreamsSizes().entrySet()) {
StreamSizeAggregator streamSizeAggregator = aggregators.get(entry.getKey());
if (streamSizeAggregator == null) {
LOG.trace("Aggregator for stream {} is null", entry.getKey());
continue;
}
streamSizeAggregator.bytesReceived(heartbeat.getInstanceId(), entry.getValue());
}
}
}, heartbeatsSubscriptionExecutor);
} catch (NotificationFeedException e) {
if (!isRetry) {
LOG.warn("Unable to subscribe to HeartbeatsFeed. Will retry until successfully subscribed. " +
"Retry failures will be logged at debug level.", e);
} else {
LOG.debug("Unable to subscribe to HeartbeatsFeed. Will retry until successfully subscribed. ", e);
}
isRetry = true;
waitBeforeRetryHeartbeatsFeedOperation();
}
}
}
/**
* This method is called every time the Stream handler in which this {@link DistributedStreamService}
* runs becomes the leader of a set of streams. Prior to this call, the Stream handler might
* already have been the leader of some of those streams.
*
* @param listener {@link StreamLeaderListener} called when this Stream handler becomes leader
* of a collection of streams
* @return A {@link Cancellable} to cancel the watch
*/
private Cancellable addLeaderListener(final StreamLeaderListener listener) {
synchronized (this) {
leaderListeners.add(listener);
}
return new Cancellable() {
@Override
public void cancel() {
synchronized (DistributedStreamService.this) {
leaderListeners.remove(listener);
}
}
};
}
/**
* Create Notification feed for stream's heartbeats, if it does not already exist.
*/
private void createHeartbeatsFeed() throws NotificationFeedException {
Id.NotificationFeed streamHeartbeatsFeed = new Id.NotificationFeed.Builder()
.setNamespaceId(Id.Namespace.SYSTEM.getId())
.setCategory(Constants.Notification.Stream.STREAM_INTERNAL_FEED_CATEGORY)
.setName(Constants.Notification.Stream.STREAM_HEARTBEAT_FEED_NAME)
.setDescription("Stream heartbeats feed.")
.build();
LOG.debug("Ensuring Stream HeartbeatsFeed exists.");
boolean isRetry = false;
while (true) {
try {
feedManager.getFeed(streamHeartbeatsFeed);
LOG.debug("Stream HeartbeatsFeed exists.");
return;
} catch (NotificationFeedNotFoundException notFoundException) {
if (!isRetry) {
LOG.debug("Creating Stream HeartbeatsFeed.");
}
feedManager.createFeed(streamHeartbeatsFeed);
LOG.info("Stream HeartbeatsFeed created.");
return;
} catch (NotificationFeedException e) {
if (!isRetry) {
LOG.warn("Could not ensure existence of HeartbeatsFeed. Will retry until successful. " +
"Retry failures will be logged at debug level.", e);
} else {
LOG.debug("Could not ensure existence of HeartbeatsFeed. Will retry until successful.", e);
}
isRetry = true;
waitBeforeRetryHeartbeatsFeedOperation();
}
}
}
private void waitBeforeRetryHeartbeatsFeedOperation() {
// Most probably, the dataset service is not up. We retry
try {
TimeUnit.SECONDS.sleep(1);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw Throwables.propagate(ie);
}
}
/**
* Elect one leader among the {@link DistributedStreamService}s running in different Twill runnables.
*/
private void performLeaderElection() {
// Start the resource coordinator that will map Streams to Stream handlers
leaderElection = new LeaderElection(
// TODO: Should unify this leader election with DistributedStreamFileJanitorService
zkClient, "/election/" + STREAMS_COORDINATOR, new ElectionHandler() {
@Override
public void leader() {
LOG.info("Became Stream handler leader. Starting resource coordinator.");
resourceCoordinator = new ResourceCoordinator(getCoordinatorZKClient(), discoveryServiceClient,
new BalancedAssignmentStrategy());
resourceCoordinator.startAndWait();
updateRequirement();
}
@Override
public void follower() {
LOG.info("Became Stream handler follower.");
if (resourceCoordinator != null) {
resourceCoordinator.stopAndWait();
}
}
});
leaderElection.start();
}
/**
* Updates stream resource requirement. It will retry if failed to do so.
*/
private void updateRequirement() {
final ResourceModifier modifier = createRequirementModifier();
Futures.addCallback(resourceCoordinatorClient.modifyRequirement(Constants.Service.STREAMS, modifier),
new FutureCallback<ResourceRequirement>() {
@Override
public void onSuccess(ResourceRequirement result) {
// No-op
LOG.info("Stream resource requirement updated to {}", result);
}
@Override
public void onFailure(Throwable t) {
LOG.warn("Failed to update stream resource requirement: {}", t.getMessage());
LOG.debug("Failed to update stream resource requirement.", t);
if (isRunning()) {
final FutureCallback<ResourceRequirement> callback = this;
// Retry in 2 seconds. Shouldn't sleep in this callback thread. Should start a new thread for the retry.
Thread retryThread = new Thread("stream-resource-update") {
@Override
public void run() {
try {
TimeUnit.SECONDS.sleep(2);
LOG.info("Retrying update stream resource requirement");
Futures.addCallback(resourceCoordinatorClient.modifyRequirement(Constants.Service.STREAMS, modifier),
callback);
} catch (InterruptedException e) {
LOG.warn("Stream resource retry thread interrupted", e);
}
}
};
retryThread.setDaemon(true);
retryThread.start();
}
}
});
}
/**
* Creates a {@link ResourceModifier} that updates stream resource requirement by consulting stream meta store.
*/
private ResourceModifier createRequirementModifier() {
return new ResourceModifier() {
@Nullable
@Override
public ResourceRequirement apply(@Nullable ResourceRequirement existingRequirement) {
try {
// Create one requirement for the resource coordinator for all the streams.
// One stream is identified by one partition
ResourceRequirement.Builder builder = ResourceRequirement.builder(Constants.Service.STREAMS);
for (Map.Entry<Id.Namespace, StreamSpecification> streamSpecEntry : streamMetaStore.listStreams().entries()) {
Id.Stream streamId = Id.Stream.from(streamSpecEntry.getKey(), streamSpecEntry.getValue().getName());
LOG.debug("Adding {} stream as a resource to the coordinator to manager streams leaders.", streamId);
builder.addPartition(new ResourceRequirement.Partition(streamId.toString(), 1));
}
return builder.build();
} catch (Throwable e) {
LOG.warn("Could not create requirement for coordinator in Stream handler leader: " + e.getMessage());
LOG.debug("Could not create requirement for coordinator in Stream handler leader", e);
throw Throwables.propagate(e);
}
}
};
}
/**
* Call all the listeners that are interested in knowing that this Stream writer is the leader of a set of Streams.
*
* @param streamIds set of Streams that this coordinator is the leader of
*/
private void invokeLeaderListeners(Set<Id.Stream> streamIds) {
LOG.debug("Stream writer is the leader of streams: {}", streamIds);
Set<StreamLeaderListener> listeners;
synchronized (this) {
listeners = ImmutableSet.copyOf(leaderListeners);
}
for (StreamLeaderListener listener : listeners) {
listener.leaderOf(streamIds);
}
}
/**
* Class that defines the behavior of a leader of a collection of Streams.
*/
private final class StreamsLeaderHandler extends ResourceHandler {
protected StreamsLeaderHandler() {
super(discoverableSupplier.get());
}
@Override
public void onChange(Collection<PartitionReplica> partitionReplicas) {
LOG.info("Stream leader requirement has changed to {}", partitionReplicas);
Set<Id.Stream> streamIds =
ImmutableSet.copyOf(Iterables.transform(partitionReplicas, new Function<PartitionReplica, Id.Stream>() {
@Nullable
@Override
public Id.Stream apply(@Nullable PartitionReplica input) {
return input != null ? Id.Stream.fromString(input.getName(), Id.Stream.class) : null;
}
}));
invokeLeaderListeners(ImmutableSet.copyOf(streamIds));
}
@Override
public void finished(Throwable failureCause) {
if (failureCause != null) {
LOG.error("Finished with failure for Stream handler instance {}", discoverableSupplier.get().getName(),
failureCause);
}
}
}
/**
* Aggregate the sizes of all stream writers. A notification is published if the aggregated
* size is higher than a threshold.
*/
private final class StreamSizeAggregator implements Cancellable {
private final Map<Integer, Long> streamWriterSizes;
private final Id.NotificationFeed streamFeed;
private final AtomicLong streamBaseCount;
private final long streamInitSize;
private final AtomicInteger streamThresholdMB;
private final Cancellable cancellable;
private final Id.Stream streamId;
protected StreamSizeAggregator(Id.Stream streamId, long baseCount, int streamThresholdMB, Cancellable cancellable) {
this.streamWriterSizes = Maps.newHashMap();
this.streamBaseCount = new AtomicLong(baseCount);
this.streamInitSize = baseCount;
this.streamThresholdMB = new AtomicInteger(streamThresholdMB);
this.cancellable = cancellable;
this.streamId = streamId;
this.streamFeed = new Id.NotificationFeed.Builder()
.setNamespaceId(streamId.getNamespaceId())
.setCategory(Constants.Notification.Stream.STREAM_FEED_CATEGORY)
.setName(String.format("%sSize", streamId.getId()))
.build();
}
/**
* Initialize this {@link StreamSizeAggregator}.
*/
public void init() {
// Publish an initialization notification
publishNotification(streamInitSize);
}
@Override
public void cancel() {
cancellable.cancel();
}
/**
* Set the notification threshold for the stream that this {@link StreamSizeAggregator} is linked to.
*
* @param newThreshold new notification threshold, in megabytes
*/
public void setStreamThresholdMB(int newThreshold) {
LOG.debug("Updating threshold of size aggregator for stream {}: {}MB", streamId, newThreshold);
streamThresholdMB.set(newThreshold);
}
/**
* Notify this aggregator that a certain number of bytes have been received from the stream writer with instance
* {@code instanceId}.
*
* @param instanceId id of the stream writer from which we received some bytes
* @param nbBytes number of bytes of data received
*/
public void bytesReceived(int instanceId, long nbBytes) {
LOG.trace("Bytes received from instanceId {}: {}B", instanceId, nbBytes);
streamWriterSizes.put(instanceId, nbBytes);
checkSendNotification();
}
/**
* Check if the current size of data is enough to trigger a notification.
*/
private void checkSendNotification() {
long sum = streamInitSize;
for (Long size : streamWriterSizes.values()) {
sum += size;
}
LOG.trace("Check notification publishing: sum is {}, baseCount is {}", sum, streamBaseCount);
if (sum - streamBaseCount.get() > toBytes(streamThresholdMB.get())) {
try {
publishNotification(sum);
} finally {
streamBaseCount.set(sum);
}
}
}
private long toBytes(int mb) {
return ((long) mb) * 1024 * 1024;
}
private void publishNotification(long absoluteSize) {
try {
notificationService.publish(streamFeed, new StreamSizeNotification(System.currentTimeMillis(), absoluteSize))
.get();
} catch (NotificationFeedException e) {
LOG.warn("Error with notification feed {}", streamFeed, e);
} catch (Throwable t) {
LOG.warn("Could not publish notification on feed {}", streamFeed.getFeedId(), t);
}
}
}
}