/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.streaming.api.operators; import java.util.concurrent.ScheduledFuture; import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.streamstatus.StreamStatus; import org.apache.flink.streaming.runtime.streamstatus.StreamStatusMaintainer; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeCallback; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; import org.apache.flink.util.Preconditions; /** * Source contexts for various stream time characteristics. */ public class StreamSourceContexts { /** * Depending on the {@link TimeCharacteristic}, this method will return the adequate * {@link org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext}. That is: * <ul> * <li>{@link TimeCharacteristic#IngestionTime} = {@code AutomaticWatermarkContext}</li> * <li>{@link TimeCharacteristic#ProcessingTime} = {@code NonTimestampContext}</li> * <li>{@link TimeCharacteristic#EventTime} = {@code ManualWatermarkContext}</li> * </ul> * */ public static <OUT> SourceFunction.SourceContext<OUT> getSourceContext( TimeCharacteristic timeCharacteristic, ProcessingTimeService processingTimeService, Object checkpointLock, StreamStatusMaintainer streamStatusMaintainer, Output<StreamRecord<OUT>> output, long watermarkInterval, long idleTimeout) { final SourceFunction.SourceContext<OUT> ctx; switch (timeCharacteristic) { case EventTime: ctx = new ManualWatermarkContext<>( output, processingTimeService, checkpointLock, streamStatusMaintainer, idleTimeout); break; case IngestionTime: ctx = new AutomaticWatermarkContext<>( output, watermarkInterval, processingTimeService, checkpointLock, streamStatusMaintainer, idleTimeout); break; case ProcessingTime: ctx = new NonTimestampContext<>(checkpointLock, output); break; default: throw new IllegalArgumentException(String.valueOf(timeCharacteristic)); } return ctx; } /** * A source context that attached {@code -1} as a timestamp to all records, and that * does not forward watermarks. */ private static class NonTimestampContext<T> implements SourceFunction.SourceContext<T> { private final Object lock; private final Output<StreamRecord<T>> output; private final StreamRecord<T> reuse; private NonTimestampContext(Object checkpointLock, Output<StreamRecord<T>> output) { this.lock = Preconditions.checkNotNull(checkpointLock, "The checkpoint lock cannot be null."); this.output = Preconditions.checkNotNull(output, "The output cannot be null."); this.reuse = new StreamRecord<>(null); } @Override public void collect(T element) { synchronized (lock) { output.collect(reuse.replace(element)); } } @Override public void collectWithTimestamp(T element, long timestamp) { // ignore the timestamp collect(element); } @Override public void emitWatermark(Watermark mark) { // do nothing } @Override public void markAsTemporarilyIdle() { // do nothing } @Override public Object getCheckpointLock() { return lock; } @Override public void close() {} } /** * {@link SourceFunction.SourceContext} to be used for sources with automatic timestamps * and watermark emission. */ private static class AutomaticWatermarkContext<T> extends WatermarkContext<T> { private final Output<StreamRecord<T>> output; private final StreamRecord<T> reuse; private final long watermarkInterval; private volatile ScheduledFuture<?> nextWatermarkTimer; private volatile long nextWatermarkTime; private long lastRecordTime; private AutomaticWatermarkContext( final Output<StreamRecord<T>> output, final long watermarkInterval, final ProcessingTimeService timeService, final Object checkpointLock, final StreamStatusMaintainer streamStatusMaintainer, final long idleTimeout) { super(timeService, checkpointLock, streamStatusMaintainer, idleTimeout); this.output = Preconditions.checkNotNull(output, "The output cannot be null."); Preconditions.checkArgument(watermarkInterval >= 1L, "The watermark interval cannot be smaller than 1 ms."); this.watermarkInterval = watermarkInterval; this.reuse = new StreamRecord<>(null); this.lastRecordTime = Long.MIN_VALUE; long now = this.timeService.getCurrentProcessingTime(); this.nextWatermarkTimer = this.timeService.registerTimer(now + watermarkInterval, new WatermarkEmittingTask(this.timeService, checkpointLock, output)); } @Override protected void processAndCollect(T element) { lastRecordTime = this.timeService.getCurrentProcessingTime(); output.collect(reuse.replace(element, lastRecordTime)); // this is to avoid lock contention in the lockingObject by // sending the watermark before the firing of the watermark // emission task. if (lastRecordTime > nextWatermarkTime) { // in case we jumped some watermarks, recompute the next watermark time final long watermarkTime = lastRecordTime - (lastRecordTime % watermarkInterval); nextWatermarkTime = watermarkTime + watermarkInterval; output.emitWatermark(new Watermark(watermarkTime)); // we do not need to register another timer here // because the emitting task will do so. } } @Override protected void processAndCollectWithTimestamp(T element, long timestamp) { processAndCollect(element); } @Override protected boolean allowWatermark(Watermark mark) { // allow Long.MAX_VALUE since this is the special end-watermark that for example the Kafka source emits return mark.getTimestamp() == Long.MAX_VALUE && nextWatermarkTime != Long.MAX_VALUE; } /** This will only be called if allowWatermark returned {@code true}. */ @Override protected void processAndEmitWatermark(Watermark mark) { nextWatermarkTime = Long.MAX_VALUE; output.emitWatermark(mark); // we can shutdown the watermark timer now, no watermarks will be needed any more. // Note that this procedure actually doesn't need to be synchronized with the lock, // but since it's only a one-time thing, doesn't hurt either final ScheduledFuture<?> nextWatermarkTimer = this.nextWatermarkTimer; if (nextWatermarkTimer != null) { nextWatermarkTimer.cancel(true); } } @Override public void close() { super.close(); final ScheduledFuture<?> nextWatermarkTimer = this.nextWatermarkTimer; if (nextWatermarkTimer != null) { nextWatermarkTimer.cancel(true); } } private class WatermarkEmittingTask implements ProcessingTimeCallback { private final ProcessingTimeService timeService; private final Object lock; private final Output<StreamRecord<T>> output; private WatermarkEmittingTask( ProcessingTimeService timeService, Object checkpointLock, Output<StreamRecord<T>> output) { this.timeService = timeService; this.lock = checkpointLock; this.output = output; } @Override public void onProcessingTime(long timestamp) { final long currentTime = timeService.getCurrentProcessingTime(); synchronized (lock) { // we should continue to automatically emit watermarks if we are active if (streamStatusMaintainer.getStreamStatus().isActive()) { if (idleTimeout != -1 && currentTime - lastRecordTime > idleTimeout) { // if we are configured to detect idleness, piggy-back the idle detection check on the // watermark interval, so that we may possibly discover idle sources faster before waiting // for the next idle check to fire markAsTemporarilyIdle(); // no need to finish the next check, as we are now idle. cancelNextIdleDetectionTask(); } else if (currentTime > nextWatermarkTime) { // align the watermarks across all machines. this will ensure that we // don't have watermarks that creep along at different intervals because // the machine clocks are out of sync final long watermarkTime = currentTime - (currentTime % watermarkInterval); output.emitWatermark(new Watermark(watermarkTime)); nextWatermarkTime = watermarkTime + watermarkInterval; } } } long nextWatermark = currentTime + watermarkInterval; nextWatermarkTimer = this.timeService.registerTimer( nextWatermark, new WatermarkEmittingTask(this.timeService, lock, output)); } } } /** * A SourceContext for event time. Sources may directly attach timestamps and generate * watermarks, but if records are emitted without timestamps, no timestamps are automatically * generated and attached. The records will simply have no timestamp in that case. * * <p>Streaming topologies can use timestamp assigner functions to override the timestamps * assigned here. */ private static class ManualWatermarkContext<T> extends WatermarkContext<T> { private final Output<StreamRecord<T>> output; private final StreamRecord<T> reuse; private ManualWatermarkContext( final Output<StreamRecord<T>> output, final ProcessingTimeService timeService, final Object checkpointLock, final StreamStatusMaintainer streamStatusMaintainer, final long idleTimeout) { super(timeService, checkpointLock, streamStatusMaintainer, idleTimeout); this.output = Preconditions.checkNotNull(output, "The output cannot be null."); this.reuse = new StreamRecord<>(null); } @Override protected void processAndCollect(T element) { output.collect(reuse.replace(element)); } @Override protected void processAndCollectWithTimestamp(T element, long timestamp) { output.collect(reuse.replace(element, timestamp)); } @Override protected void processAndEmitWatermark(Watermark mark) { output.emitWatermark(mark); } @Override protected boolean allowWatermark(Watermark mark) { return true; } } /** * An abstract {@link SourceFunction.SourceContext} that should be used as the base for * stream source contexts that are relevant with {@link Watermark}s. * * <p>Stream source contexts that are relevant with watermarks are responsible of manipulating * the current {@link StreamStatus}, so that stream status can be correctly propagated * downstream. Please refer to the class-level documentation of {@link StreamStatus} for * information on how stream status affects watermark advancement at downstream tasks. * * <p>This class implements the logic of idleness detection. It fires idleness detection * tasks at a given interval; if no records or watermarks were collected by the source context * between 2 consecutive checks, it determines the source to be IDLE and correspondingly * toggles the status. ACTIVE status resumes as soon as some record or watermark is collected * again. */ private abstract static class WatermarkContext<T> implements SourceFunction.SourceContext<T> { protected final ProcessingTimeService timeService; protected final Object checkpointLock; protected final StreamStatusMaintainer streamStatusMaintainer; protected final long idleTimeout; private ScheduledFuture<?> nextCheck; /** * This flag will be reset to {@code true} every time the next check is scheduled. * Whenever a record or watermark is collected, the flag will be set to {@code false}. * * <p>When the scheduled check is fired, if the flag remains to be {@code true}, the check * will fail, and our current status will determined to be IDLE. */ private volatile boolean failOnNextCheck; /** * Create a watermark context. * * @param timeService the time service to schedule idleness detection tasks * @param checkpointLock the checkpoint lock * @param streamStatusMaintainer the stream status maintainer to toggle and retrieve current status * @param idleTimeout (-1 if idleness checking is disabled) */ public WatermarkContext( final ProcessingTimeService timeService, final Object checkpointLock, final StreamStatusMaintainer streamStatusMaintainer, final long idleTimeout) { this.timeService = Preconditions.checkNotNull(timeService, "Time Service cannot be null."); this.checkpointLock = Preconditions.checkNotNull(checkpointLock, "Checkpoint Lock cannot be null."); this.streamStatusMaintainer = Preconditions.checkNotNull(streamStatusMaintainer, "Stream Status Maintainer cannot be null."); if (idleTimeout != -1) { Preconditions.checkArgument(idleTimeout >= 1, "The idle timeout cannot be smaller than 1 ms."); } this.idleTimeout = idleTimeout; scheduleNextIdleDetectionTask(); } @Override public void collect(T element) { synchronized (checkpointLock) { streamStatusMaintainer.toggleStreamStatus(StreamStatus.ACTIVE); if (nextCheck != null) { this.failOnNextCheck = false; } else { scheduleNextIdleDetectionTask(); } processAndCollect(element); } } @Override public void collectWithTimestamp(T element, long timestamp) { synchronized (checkpointLock) { streamStatusMaintainer.toggleStreamStatus(StreamStatus.ACTIVE); if (nextCheck != null) { this.failOnNextCheck = false; } else { scheduleNextIdleDetectionTask(); } processAndCollectWithTimestamp(element, timestamp); } } @Override public void emitWatermark(Watermark mark) { if (allowWatermark(mark)) { synchronized (checkpointLock) { streamStatusMaintainer.toggleStreamStatus(StreamStatus.ACTIVE); if (nextCheck != null) { this.failOnNextCheck = false; } else { scheduleNextIdleDetectionTask(); } processAndEmitWatermark(mark); } } } @Override public void markAsTemporarilyIdle() { synchronized (checkpointLock) { streamStatusMaintainer.toggleStreamStatus(StreamStatus.IDLE); } } @Override public Object getCheckpointLock() { return checkpointLock; } @Override public void close() { cancelNextIdleDetectionTask(); } private class IdlenessDetectionTask implements ProcessingTimeCallback { @Override public void onProcessingTime(long timestamp) throws Exception { synchronized (checkpointLock) { // set this to null now; // the next idleness detection will be scheduled again // depending on the below failOnNextCheck condition nextCheck = null; if (failOnNextCheck) { markAsTemporarilyIdle(); } else { scheduleNextIdleDetectionTask(); } } } } private void scheduleNextIdleDetectionTask() { if (idleTimeout != -1) { // reset flag; if it remains true when task fires, we have detected idleness failOnNextCheck = true; nextCheck = this.timeService.registerTimer( this.timeService.getCurrentProcessingTime() + idleTimeout, new IdlenessDetectionTask()); } } protected void cancelNextIdleDetectionTask() { final ScheduledFuture<?> nextCheck = this.nextCheck; if (nextCheck != null) { nextCheck.cancel(true); } } // ------------------------------------------------------------------------ // Abstract methods for concrete subclasses to implement. // These methods are guaranteed to be synchronized on the checkpoint lock, // so implementations don't need to do so. // ------------------------------------------------------------------------ /** Process and collect record. */ protected abstract void processAndCollect(T element); /** Process and collect record with timestamp. */ protected abstract void processAndCollectWithTimestamp(T element, long timestamp); /** Whether or not a watermark should be allowed. */ protected abstract boolean allowWatermark(Watermark mark); /** * Process and emit watermark. Only called if * {@link WatermarkContext#allowWatermark(Watermark)} returns {@code true}. */ protected abstract void processAndEmitWatermark(Watermark mark); } }