/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.direct;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Ordering;
import com.google.common.collect.SortedMultiset;
import com.google.common.collect.Table;
import com.google.common.collect.TreeMultiset;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import org.apache.beam.runners.core.StateNamespace;
import org.apache.beam.runners.core.TimerInternals;
import org.apache.beam.runners.core.TimerInternals.TimerData;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.runners.AppliedPTransform;
import org.apache.beam.sdk.state.TimeDomain;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.joda.time.Instant;
/**
* Manages watermarks of {@link PCollection PCollections} and input and output watermarks of
* {@link AppliedPTransform AppliedPTransforms} to provide event-time and completion tracking for
* in-memory execution. {@link WatermarkManager} is designed to update and return a
* consistent view of watermarks in the presence of concurrent updates.
*
* <p>An {@link WatermarkManager} is provided with the collection of root
* {@link AppliedPTransform AppliedPTransforms} and a map of {@link PCollection PCollections} to
* all the {@link AppliedPTransform AppliedPTransforms} that consume them at construction time.
*
* <p>Whenever a root {@link AppliedPTransform transform} produces elements, the
* {@link WatermarkManager} is provided with the produced elements and the output watermark
* of the producing {@link AppliedPTransform transform}. The
* {@link WatermarkManager watermark manager} is responsible for computing the watermarks
* of all {@link AppliedPTransform transforms} that consume one or more
* {@link PCollection PCollections}.
*
* <p>Whenever a non-root {@link AppliedPTransform} finishes processing one or more in-flight
* elements (referred to as the input {@link CommittedBundle bundle}), the following occurs
* atomically:
* <ul>
* <li>All of the in-flight elements are removed from the collection of pending elements for the
* {@link AppliedPTransform}.</li>
* <li>All of the elements produced by the {@link AppliedPTransform} are added to the collection
* of pending elements for each {@link AppliedPTransform} that consumes them.</li>
* <li>The input watermark for the {@link AppliedPTransform} becomes the maximum value of
* <ul>
* <li>the previous input watermark</li>
* <li>the minimum of
* <ul>
* <li>the timestamps of all currently pending elements</li>
* <li>all input {@link PCollection} watermarks</li>
* </ul>
* </li>
* </ul>
* </li>
* <li>The output watermark for the {@link AppliedPTransform} becomes the maximum of
* <ul>
* <li>the previous output watermark</li>
* <li>the minimum of
* <ul>
* <li>the current input watermark</li>
* <li>the current watermark holds</li>
* </ul>
* </li>
* </ul>
* </li>
* <li>The watermark of the output {@link PCollection} can be advanced to the output watermark of
* the {@link AppliedPTransform}</li>
* <li>The watermark of all downstream {@link AppliedPTransform AppliedPTransforms} can be
* advanced.</li>
* </ul>
*
* <p>The watermark of a {@link PCollection} is equal to the output watermark of the
* {@link AppliedPTransform} that produces it.
*
* <p>The watermarks for a {@link PTransform} are updated as follows when output is committed:<pre>
* Watermark_In' = MAX(Watermark_In, MIN(U(TS_Pending), U(Watermark_InputPCollection)))
* Watermark_Out' = MAX(Watermark_Out, MIN(Watermark_In', U(StateHold)))
* Watermark_PCollection = Watermark_Out_ProducingPTransform
* </pre>
*/
class WatermarkManager {
// The number of updates to apply in #tryApplyPendingUpdates
private static final int MAX_INCREMENTAL_UPDATES = 10;
/**
* The watermark of some {@link Pipeline} element, usually a {@link PTransform} or a
* {@link PCollection}.
*
* <p>A watermark is a monotonically increasing value, which represents the point up to which the
* system believes it has received all of the data. Data that arrives with a timestamp that is
* before the watermark is considered late. {@link BoundedWindow#TIMESTAMP_MAX_VALUE} is a special
* timestamp which indicates we have received all of the data and there will be no more on-time or
* late data. This value is represented by {@link WatermarkManager#THE_END_OF_TIME}.
*/
private interface Watermark {
/**
* Returns the current value of this watermark.
*/
Instant get();
/**
* Refreshes the value of this watermark from its input watermarks and watermark holds.
*
* @return true if the value of the watermark has changed (and thus dependent watermark must
* also be updated
*/
WatermarkUpdate refresh();
}
/**
* The result of computing a {@link Watermark}.
*/
private enum WatermarkUpdate {
/** The watermark is later than the value at the previous time it was computed. */
ADVANCED(true),
/** The watermark is equal to the value at the previous time it was computed. */
NO_CHANGE(false);
private final boolean advanced;
private WatermarkUpdate(boolean advanced) {
this.advanced = advanced;
}
public boolean isAdvanced() {
return advanced;
}
/**
* Returns the {@link WatermarkUpdate} that is a result of combining the two watermark updates.
*
* <p>If either of the input {@link WatermarkUpdate WatermarkUpdates} were advanced, the result
* {@link WatermarkUpdate} has been advanced.
*/
public WatermarkUpdate union(WatermarkUpdate that) {
if (this.advanced) {
return this;
}
return that;
}
/**
* Returns the {@link WatermarkUpdate} based on the former and current
* {@link Instant timestamps}.
*/
public static WatermarkUpdate fromTimestamps(Instant oldTime, Instant currentTime) {
if (currentTime.isAfter(oldTime)) {
return ADVANCED;
}
return NO_CHANGE;
}
}
/**
* The input {@link Watermark} of an {@link AppliedPTransform}.
*
* <p>At any point, the value of an {@link AppliedPTransformInputWatermark} is equal to the
* minimum watermark across all of its input {@link Watermark Watermarks}, and the minimum
* timestamp of all of the pending elements, restricted to be monotonically increasing.
*
* <p>See {@link #refresh()} for more information.
*/
private static class AppliedPTransformInputWatermark implements Watermark {
private final Collection<? extends Watermark> inputWatermarks;
private final SortedMultiset<CommittedBundle<?>> pendingElements;
// This tracks only the quantity of timers at each timestamp, for quickly getting the cross-key
// minimum
private final SortedMultiset<Instant> pendingTimers;
// Entries in this table represent the authoritative timestamp for which
// a per-key-and-StateNamespace timer is set.
private final Map<StructuralKey<?>, Table<StateNamespace, String, TimerData>> existingTimers;
// This per-key sorted set allows quick retrieval of timers that should fire for a key
private final Map<StructuralKey<?>, NavigableSet<TimerData>> objectTimers;
private AtomicReference<Instant> currentWatermark;
public AppliedPTransformInputWatermark(Collection<? extends Watermark> inputWatermarks) {
this.inputWatermarks = inputWatermarks;
// The ordering must order elements by timestamp, and must not compare two distinct elements
// as equal. This is built on the assumption that any element added as a pending element will
// be consumed without modifications.
//
// The same logic is applied for pending timers
Ordering<CommittedBundle<?>> pendingBundleComparator =
new BundleByElementTimestampComparator().compound(Ordering.arbitrary());
this.pendingElements =
TreeMultiset.create(pendingBundleComparator);
this.pendingTimers = TreeMultiset.create();
this.objectTimers = new HashMap<>();
this.existingTimers = new HashMap<>();
currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
}
@Override
public Instant get() {
return currentWatermark.get();
}
/**
* {@inheritDoc}.
*
* <p>When refresh is called, the value of the {@link AppliedPTransformInputWatermark} becomes
* equal to the maximum value of
* <ul>
* <li>the previous input watermark</li>
* <li>the minimum of
* <ul>
* <li>the timestamps of all currently pending elements</li>
* <li>all input {@link PCollection} watermarks</li>
* </ul>
* </li>
* </ul>
*/
@Override
public synchronized WatermarkUpdate refresh() {
Instant oldWatermark = currentWatermark.get();
Instant minInputWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
for (Watermark inputWatermark : inputWatermarks) {
minInputWatermark = INSTANT_ORDERING.min(minInputWatermark, inputWatermark.get());
}
if (!pendingElements.isEmpty()) {
minInputWatermark =
INSTANT_ORDERING.min(
minInputWatermark, pendingElements.firstEntry().getElement().getMinTimestamp());
}
Instant newWatermark = INSTANT_ORDERING.max(oldWatermark, minInputWatermark);
currentWatermark.set(newWatermark);
return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
}
private synchronized void addPending(CommittedBundle<?> newPending) {
pendingElements.add(newPending);
}
private synchronized void removePending(CommittedBundle<?> completed) {
pendingElements.remove(completed);
}
private synchronized Instant getEarliestTimerTimestamp() {
if (pendingTimers.isEmpty()) {
return BoundedWindow.TIMESTAMP_MAX_VALUE;
} else {
return pendingTimers.firstEntry().getElement();
}
}
private synchronized void updateTimers(TimerUpdate update) {
NavigableSet<TimerData> keyTimers = objectTimers.get(update.key);
if (keyTimers == null) {
keyTimers = new TreeSet<>();
objectTimers.put(update.key, keyTimers);
}
Table<StateNamespace, String, TimerData> existingTimersForKey =
existingTimers.get(update.key);
if (existingTimersForKey == null) {
existingTimersForKey = HashBasedTable.create();
existingTimers.put(update.key, existingTimersForKey);
}
for (TimerData timer : update.getSetTimers()) {
if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
@Nullable
TimerData existingTimer =
existingTimersForKey.get(timer.getNamespace(), timer.getTimerId());
if (existingTimer == null) {
pendingTimers.add(timer.getTimestamp());
keyTimers.add(timer);
} else if (!existingTimer.equals(timer)) {
keyTimers.remove(existingTimer);
keyTimers.add(timer);
} // else the timer is already set identically, so noop
existingTimersForKey.put(timer.getNamespace(), timer.getTimerId(), timer);
}
}
for (TimerData timer : update.getDeletedTimers()) {
if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
@Nullable
TimerData existingTimer =
existingTimersForKey.get(timer.getNamespace(), timer.getTimerId());
if (existingTimer != null) {
pendingTimers.remove(existingTimer.getTimestamp());
keyTimers.remove(existingTimer);
existingTimersForKey.remove(existingTimer.getNamespace(), existingTimer.getTimerId());
}
}
}
for (TimerData timer : update.getCompletedTimers()) {
if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
pendingTimers.remove(timer.getTimestamp());
}
}
}
private synchronized Map<StructuralKey<?>, List<TimerData>> extractFiredEventTimeTimers() {
return extractFiredTimers(currentWatermark.get(), objectTimers);
}
@Override
public synchronized String toString() {
return MoreObjects.toStringHelper(AppliedPTransformInputWatermark.class)
.add("pendingElements", pendingElements)
.add("currentWatermark", currentWatermark)
.toString();
}
}
/**
* The output {@link Watermark} of an {@link AppliedPTransform}.
*
* <p>The value of an {@link AppliedPTransformOutputWatermark} is equal to the minimum of the
* current watermark hold and the {@link AppliedPTransformInputWatermark} for the same
* {@link AppliedPTransform}, restricted to be monotonically increasing. See
* {@link #refresh()} for more information.
*/
private static class AppliedPTransformOutputWatermark implements Watermark {
private final AppliedPTransformInputWatermark inputWatermark;
private final PerKeyHolds holds;
private AtomicReference<Instant> currentWatermark;
public AppliedPTransformOutputWatermark(
AppliedPTransformInputWatermark inputWatermark) {
this.inputWatermark = inputWatermark;
holds = new PerKeyHolds();
currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
}
public synchronized void updateHold(Object key, Instant newHold) {
if (newHold == null) {
holds.removeHold(key);
} else {
holds.updateHold(key, newHold);
}
}
@Override
public Instant get() {
return currentWatermark.get();
}
/**
* {@inheritDoc}.
*
* <p>When refresh is called, the value of the {@link AppliedPTransformOutputWatermark} becomes
* equal to the maximum value of:
* <ul>
* <li>the previous output watermark</li>
* <li>the minimum of
* <ul>
* <li>the current input watermark</li>
* <li>the current watermark holds</li>
* </ul>
* </li>
* </ul>
*/
@Override
public synchronized WatermarkUpdate refresh() {
Instant oldWatermark = currentWatermark.get();
Instant newWatermark = INSTANT_ORDERING.min(
inputWatermark.get(),
inputWatermark.getEarliestTimerTimestamp(),
holds.getMinHold());
newWatermark = INSTANT_ORDERING.max(oldWatermark, newWatermark);
currentWatermark.set(newWatermark);
return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
}
@Override
public synchronized String toString() {
return MoreObjects.toStringHelper(AppliedPTransformOutputWatermark.class)
.add("holds", holds)
.add("currentWatermark", currentWatermark)
.toString();
}
}
/**
* The input {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
* {@link AppliedPTransform}.
*
* <p>At any point, the hold value of an {@link SynchronizedProcessingTimeInputWatermark} is equal
* to the minimum across all pending bundles at the {@link AppliedPTransform} and all upstream
* {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the input
* synchronized processing time at any step is equal to the maximum of:
* <ul>
* <li>The most recently returned synchronized processing input time
* <li>The minimum of
* <ul>
* <li>The current processing time
* <li>The current synchronized processing time input hold
* </ul>
* </ul>
*/
private static class SynchronizedProcessingTimeInputWatermark implements Watermark {
private final Collection<? extends Watermark> inputWms;
private final Collection<CommittedBundle<?>> pendingBundles;
private final Map<StructuralKey<?>, NavigableSet<TimerData>> processingTimers;
private final Map<StructuralKey<?>, NavigableSet<TimerData>> synchronizedProcessingTimers;
private final NavigableSet<TimerData> pendingTimers;
private AtomicReference<Instant> earliestHold;
public SynchronizedProcessingTimeInputWatermark(Collection<? extends Watermark> inputWms) {
this.inputWms = inputWms;
this.pendingBundles = new HashSet<>();
this.processingTimers = new HashMap<>();
this.synchronizedProcessingTimers = new HashMap<>();
this.pendingTimers = new TreeSet<>();
Instant initialHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
for (Watermark wm : inputWms) {
initialHold = INSTANT_ORDERING.min(initialHold, wm.get());
}
earliestHold = new AtomicReference<>(initialHold);
}
@Override
public Instant get() {
return earliestHold.get();
}
/**
* {@inheritDoc}.
*
* <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeInputWatermark}
* becomes equal to the minimum value of
* <ul>
* <li>the timestamps of all currently pending bundles</li>
* <li>all input {@link PCollection} synchronized processing time watermarks</li>
* </ul>
*
* <p>Note that this value is not monotonic, but the returned value for the synchronized
* processing time must be.
*/
@Override
public synchronized WatermarkUpdate refresh() {
Instant oldHold = earliestHold.get();
Instant minTime = THE_END_OF_TIME.get();
for (Watermark input : inputWms) {
minTime = INSTANT_ORDERING.min(minTime, input.get());
}
for (CommittedBundle<?> bundle : pendingBundles) {
// TODO: Track elements in the bundle by the processing time they were output instead of
// entire bundles. Requried to support arbitrarily splitting and merging bundles between
// steps
minTime = INSTANT_ORDERING.min(minTime, bundle.getSynchronizedProcessingOutputWatermark());
}
earliestHold.set(minTime);
return WatermarkUpdate.fromTimestamps(oldHold, minTime);
}
public synchronized void addPending(CommittedBundle<?> bundle) {
pendingBundles.add(bundle);
}
public synchronized void removePending(CommittedBundle<?> bundle) {
pendingBundles.remove(bundle);
}
/**
* Return the earliest timestamp of the earliest timer that has not been completed. This is
* either the earliest timestamp across timers that have not been completed, or the earliest
* timestamp across timers that have been delivered but have not been completed.
*/
public synchronized Instant getEarliestTimerTimestamp() {
Instant earliest = THE_END_OF_TIME.get();
for (NavigableSet<TimerData> timers : processingTimers.values()) {
if (!timers.isEmpty()) {
earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
}
}
for (NavigableSet<TimerData> timers : synchronizedProcessingTimers.values()) {
if (!timers.isEmpty()) {
earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
}
}
if (!pendingTimers.isEmpty()) {
earliest = INSTANT_ORDERING.min(pendingTimers.first().getTimestamp(), earliest);
}
return earliest;
}
private synchronized void updateTimers(TimerUpdate update) {
Map<TimeDomain, NavigableSet<TimerData>> timerMap = timerMap(update.key);
for (TimerData addedTimer : update.setTimers) {
NavigableSet<TimerData> timerQueue = timerMap.get(addedTimer.getDomain());
if (timerQueue != null) {
timerQueue.add(addedTimer);
}
}
for (TimerData completedTimer : update.completedTimers) {
pendingTimers.remove(completedTimer);
}
for (TimerData deletedTimer : update.deletedTimers) {
NavigableSet<TimerData> timerQueue = timerMap.get(deletedTimer.getDomain());
if (timerQueue != null) {
timerQueue.remove(deletedTimer);
}
}
}
private synchronized Map<StructuralKey<?>, List<TimerData>> extractFiredDomainTimers(
TimeDomain domain, Instant firingTime) {
Map<StructuralKey<?>, List<TimerData>> firedTimers;
switch (domain) {
case PROCESSING_TIME:
firedTimers = extractFiredTimers(firingTime, processingTimers);
break;
case SYNCHRONIZED_PROCESSING_TIME:
firedTimers =
extractFiredTimers(
INSTANT_ORDERING.min(firingTime, earliestHold.get()),
synchronizedProcessingTimers);
break;
default:
throw new IllegalArgumentException(
"Called getFiredTimers on a Synchronized Processing Time watermark"
+ " and gave a non-processing time domain "
+ domain);
}
for (Map.Entry<StructuralKey<?>, ? extends Collection<TimerData>> firedTimer :
firedTimers.entrySet()) {
pendingTimers.addAll(firedTimer.getValue());
}
return firedTimers;
}
private Map<TimeDomain, NavigableSet<TimerData>> timerMap(StructuralKey<?> key) {
NavigableSet<TimerData> processingQueue = processingTimers.get(key);
if (processingQueue == null) {
processingQueue = new TreeSet<>();
processingTimers.put(key, processingQueue);
}
NavigableSet<TimerData> synchronizedProcessingQueue =
synchronizedProcessingTimers.get(key);
if (synchronizedProcessingQueue == null) {
synchronizedProcessingQueue = new TreeSet<>();
synchronizedProcessingTimers.put(key, synchronizedProcessingQueue);
}
EnumMap<TimeDomain, NavigableSet<TimerData>> result = new EnumMap<>(TimeDomain.class);
result.put(TimeDomain.PROCESSING_TIME, processingQueue);
result.put(TimeDomain.SYNCHRONIZED_PROCESSING_TIME, synchronizedProcessingQueue);
return result;
}
@Override
public synchronized String toString() {
return MoreObjects.toStringHelper(SynchronizedProcessingTimeInputWatermark.class)
.add("earliestHold", earliestHold)
.toString();
}
}
/**
* The output {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
* {@link AppliedPTransform}.
*
* <p>At any point, the hold value of an {@link SynchronizedProcessingTimeOutputWatermark} is
* equal to the minimum across all incomplete timers at the {@link AppliedPTransform} and all
* upstream {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the output
* synchronized processing time at any step is equal to the maximum of:
* <ul>
* <li>The most recently returned synchronized processing output time
* <li>The minimum of
* <ul>
* <li>The current processing time
* <li>The current synchronized processing time output hold
* </ul>
* </ul>
*/
private static class SynchronizedProcessingTimeOutputWatermark implements Watermark {
private final SynchronizedProcessingTimeInputWatermark inputWm;
private AtomicReference<Instant> latestRefresh;
public SynchronizedProcessingTimeOutputWatermark(
SynchronizedProcessingTimeInputWatermark inputWm) {
this.inputWm = inputWm;
this.latestRefresh = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
}
@Override
public Instant get() {
return latestRefresh.get();
}
/**
* {@inheritDoc}.
*
* <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeOutputWatermark}
* becomes equal to the minimum value of:
* <ul>
* <li>the current input watermark.
* <li>all {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} timers that are based on the input
* watermark.
* <li>all {@link TimeDomain#PROCESSING_TIME} timers that are based on the input watermark.
* </ul>
*
* <p>Note that this value is not monotonic, but the returned value for the synchronized
* processing time must be.
*/
@Override
public synchronized WatermarkUpdate refresh() {
// Hold the output synchronized processing time to the input watermark, which takes into
// account buffered bundles, and the earliest pending timer, which determines what to hold
// downstream timers to.
Instant oldRefresh = latestRefresh.get();
Instant newTimestamp =
INSTANT_ORDERING.min(inputWm.get(), inputWm.getEarliestTimerTimestamp());
latestRefresh.set(newTimestamp);
return WatermarkUpdate.fromTimestamps(oldRefresh, newTimestamp);
}
@Override
public synchronized String toString() {
return MoreObjects.toStringHelper(SynchronizedProcessingTimeOutputWatermark.class)
.add("latestRefresh", latestRefresh)
.toString();
}
}
/**
* The {@code Watermark} that is after the latest time it is possible to represent in the global
* window. This is a distinguished value representing a complete {@link PTransform}.
*/
private static final Watermark THE_END_OF_TIME = new Watermark() {
@Override
public WatermarkUpdate refresh() {
// THE_END_OF_TIME is a distinguished value that cannot be advanced.
return WatermarkUpdate.NO_CHANGE;
}
@Override
public Instant get() {
return BoundedWindow.TIMESTAMP_MAX_VALUE;
}
};
private static final Ordering<Instant> INSTANT_ORDERING = Ordering.natural();
/**
* For each (Object, NavigableSet) pair in the provided map, remove each Timer that is before the
* latestTime argument and put in in the result with the same key, then remove all of the keys
* which have no more pending timers.
*
* <p>The result collection retains ordering of timers (from earliest to latest).
*/
private static Map<StructuralKey<?>, List<TimerData>> extractFiredTimers(
Instant latestTime, Map<StructuralKey<?>, NavigableSet<TimerData>> objectTimers) {
Map<StructuralKey<?>, List<TimerData>> result = new HashMap<>();
Set<StructuralKey<?>> emptyKeys = new HashSet<>();
for (Map.Entry<StructuralKey<?>, NavigableSet<TimerData>> pendingTimers :
objectTimers.entrySet()) {
NavigableSet<TimerData> timers = pendingTimers.getValue();
if (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
ArrayList<TimerData> keyFiredTimers = new ArrayList<>();
result.put(pendingTimers.getKey(), keyFiredTimers);
while (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
keyFiredTimers.add(timers.first());
timers.remove(timers.first());
}
}
if (timers.isEmpty()) {
emptyKeys.add(pendingTimers.getKey());
}
}
objectTimers.keySet().removeAll(emptyKeys);
return result;
}
////////////////////////////////////////////////////////////////////////////////////////////////
/**
* The {@link Clock} providing the current time in the {@link TimeDomain#PROCESSING_TIME} domain.
*/
private final Clock clock;
/**
* The {@link DirectGraph} representing the {@link Pipeline} this {@link WatermarkManager} tracks
* watermarks for.
*/
private final DirectGraph graph;
/**
* The input and output watermark of each {@link AppliedPTransform}.
*/
private final Map<AppliedPTransform<?, ?, ?>, TransformWatermarks> transformToWatermarks;
/**
* A queue of pending updates to the state of this {@link WatermarkManager}.
*/
private final ConcurrentLinkedQueue<PendingWatermarkUpdate> pendingUpdates;
/**
* A lock used to control concurrency for updating pending values.
*/
private final Lock refreshLock;
/**
* A queue of pending {@link AppliedPTransform AppliedPTransforms} that have potentially
* stale data.
*/
@GuardedBy("refreshLock")
private final Set<AppliedPTransform<?, ?, ?>> pendingRefreshes;
/**
* Creates a new {@link WatermarkManager}. All watermarks within the newly created {@link
* WatermarkManager} start at {@link BoundedWindow#TIMESTAMP_MIN_VALUE}, the minimum watermark,
* with no watermark holds or pending elements.
*
* @param clock the clock to use to determine processing time
* @param graph the graph representing this pipeline
*/
public static WatermarkManager create(Clock clock, DirectGraph graph) {
return new WatermarkManager(clock, graph);
}
private WatermarkManager(Clock clock, DirectGraph graph) {
this.clock = clock;
this.graph = graph;
this.pendingUpdates = new ConcurrentLinkedQueue<>();
this.refreshLock = new ReentrantLock();
this.pendingRefreshes = new HashSet<>();
transformToWatermarks = new HashMap<>();
for (AppliedPTransform<?, ?, ?> rootTransform : graph.getRootTransforms()) {
getTransformWatermark(rootTransform);
}
for (AppliedPTransform<?, ?, ?> primitiveTransform : graph.getPrimitiveTransforms()) {
getTransformWatermark(primitiveTransform);
}
}
private TransformWatermarks getTransformWatermark(AppliedPTransform<?, ?, ?> transform) {
TransformWatermarks wms = transformToWatermarks.get(transform);
if (wms == null) {
List<Watermark> inputCollectionWatermarks = getInputWatermarks(transform);
AppliedPTransformInputWatermark inputWatermark =
new AppliedPTransformInputWatermark(inputCollectionWatermarks);
AppliedPTransformOutputWatermark outputWatermark =
new AppliedPTransformOutputWatermark(inputWatermark);
SynchronizedProcessingTimeInputWatermark inputProcessingWatermark =
new SynchronizedProcessingTimeInputWatermark(getInputProcessingWatermarks(transform));
SynchronizedProcessingTimeOutputWatermark outputProcessingWatermark =
new SynchronizedProcessingTimeOutputWatermark(inputProcessingWatermark);
wms =
new TransformWatermarks(
transform,
inputWatermark,
outputWatermark,
inputProcessingWatermark,
outputProcessingWatermark);
transformToWatermarks.put(transform, wms);
}
return wms;
}
private Collection<Watermark> getInputProcessingWatermarks(AppliedPTransform<?, ?, ?> transform) {
ImmutableList.Builder<Watermark> inputWmsBuilder = ImmutableList.builder();
Map<TupleTag<?>, PValue> inputs = transform.getInputs();
if (inputs.isEmpty()) {
inputWmsBuilder.add(THE_END_OF_TIME);
}
for (PValue pvalue : inputs.values()) {
Watermark producerOutputWatermark =
getTransformWatermark(graph.getProducer(pvalue))
.synchronizedProcessingOutputWatermark;
inputWmsBuilder.add(producerOutputWatermark);
}
return inputWmsBuilder.build();
}
private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform) {
ImmutableList.Builder<Watermark> inputWatermarksBuilder = ImmutableList.builder();
Map<TupleTag<?>, PValue> inputs = transform.getInputs();
if (inputs.isEmpty()) {
inputWatermarksBuilder.add(THE_END_OF_TIME);
}
for (PValue pvalue : inputs.values()) {
Watermark producerOutputWatermark =
getTransformWatermark(graph.getProducer(pvalue)).outputWatermark;
inputWatermarksBuilder.add(producerOutputWatermark);
}
List<Watermark> inputCollectionWatermarks = inputWatermarksBuilder.build();
return inputCollectionWatermarks;
}
////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Gets the input and output watermarks for an {@link AppliedPTransform}. If the
* {@link AppliedPTransform PTransform} has not processed any elements, return a watermark of
* {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
*
* @return a snapshot of the input watermark and output watermark for the provided transform
*/
public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
return transformToWatermarks.get(transform);
}
public void initialize(
Map<AppliedPTransform<?, ?, ?>, ? extends Iterable<CommittedBundle<?>>> initialBundles) {
refreshLock.lock();
try {
for (Map.Entry<AppliedPTransform<?, ?, ?>, ? extends Iterable<CommittedBundle<?>>> rootEntry :
initialBundles.entrySet()) {
TransformWatermarks rootWms = transformToWatermarks.get(rootEntry.getKey());
for (CommittedBundle<?> initialBundle : rootEntry.getValue()) {
rootWms.addPending(initialBundle);
}
pendingRefreshes.add(rootEntry.getKey());
}
} finally {
refreshLock.unlock();
}
}
/**
* Updates the watermarks of a transform with one or more inputs.
*
* <p>Each transform has two monotonically increasing watermarks: the input watermark, which can,
* at any time, be updated to equal:
* <pre>
* MAX(CurrentInputWatermark, MIN(PendingElements, InputPCollectionWatermarks))
* </pre>
* and the output watermark, which can, at any time, be updated to equal:
* <pre>
* MAX(CurrentOutputWatermark, MIN(InputWatermark, WatermarkHolds))
* </pre>.
*
* @param completed the input that has completed
* @param timerUpdate the timers that were added, removed, and completed as part of producing
* this update
* @param result the result that was produced by processing the input
* @param earliestHold the earliest watermark hold in the transform's state. {@code null} if there
* is no hold
*/
public void updateWatermarks(
@Nullable CommittedBundle<?> completed,
TimerUpdate timerUpdate,
CommittedResult result,
Instant earliestHold) {
pendingUpdates.offer(PendingWatermarkUpdate.create(completed,
timerUpdate,
result,
earliestHold));
tryApplyPendingUpdates();
}
private void tryApplyPendingUpdates() {
if (refreshLock.tryLock()) {
try {
applyNUpdates(MAX_INCREMENTAL_UPDATES);
} finally {
refreshLock.unlock();
}
}
}
/**
* Applies all pending updates to this {@link WatermarkManager}, causing the pending state
* of all {@link TransformWatermarks} to be advanced as far as possible.
*/
private void applyAllPendingUpdates() {
refreshLock.lock();
try {
applyNUpdates(-1);
} finally {
refreshLock.unlock();
}
}
@GuardedBy("refreshLock")
/**
* Applies up to {@code numUpdates}, or all available updates if numUpdates is non-positive.
*/
private void applyNUpdates(int numUpdates) {
for (int i = 0; !pendingUpdates.isEmpty() && (i < numUpdates || numUpdates <= 0); i++) {
PendingWatermarkUpdate pending = pendingUpdates.poll();
applyPendingUpdate(pending);
pendingRefreshes.add(pending.getTransform());
}
}
private void applyPendingUpdate(PendingWatermarkUpdate pending) {
CommittedResult result = pending.getResult();
AppliedPTransform<?, ?, ?> transform = result.getTransform();
CommittedBundle<?> inputBundle = pending.getInputBundle();
updatePending(inputBundle, pending.getTimerUpdate(), result);
TransformWatermarks transformWms = transformToWatermarks.get(transform);
transformWms.setEventTimeHold(inputBundle == null ? null : inputBundle.getKey(),
pending.getEarliestHold());
}
/**
* First adds all produced elements to the queue of pending elements for each consumer, then adds
* all pending timers to the collection of pending timers, then removes all completed and deleted
* timers from the collection of pending timers, then removes all completed elements from the
* pending queue of the transform.
*
* <p>It is required that all newly pending elements are added to the queue of pending elements
* for each consumer prior to the completed elements being removed, as doing otherwise could cause
* a Watermark to appear in a state in which the upstream (completed) element does not hold the
* watermark but the element it produced is not yet pending. This can cause the watermark to
* erroneously advance.
*/
private void updatePending(
CommittedBundle<?> input,
TimerUpdate timerUpdate,
CommittedResult result) {
// Newly pending elements must be added before completed elements are removed, as the two
// do not share a Mutex within this call and thus can be interleaved with external calls to
// refresh.
for (CommittedBundle<?> bundle : result.getOutputs()) {
for (AppliedPTransform<?, ?, ?> consumer :
graph.getPrimitiveConsumers(bundle.getPCollection())) {
TransformWatermarks watermarks = transformToWatermarks.get(consumer);
watermarks.addPending(bundle);
}
}
TransformWatermarks completedTransform = transformToWatermarks.get(result.getTransform());
if (input != null) {
// Add the unprocessed inputs
completedTransform.addPending(result.getUnprocessedInputs());
}
completedTransform.updateTimers(timerUpdate);
if (input != null) {
completedTransform.removePending(input);
}
}
/**
* Refresh the watermarks contained within this {@link WatermarkManager}, causing all
* watermarks to be advanced as far as possible.
*/
synchronized void refreshAll() {
refreshLock.lock();
try {
applyAllPendingUpdates();
Set<AppliedPTransform<?, ?, ?>> toRefresh = pendingRefreshes;
while (!toRefresh.isEmpty()) {
toRefresh = refreshAllOf(toRefresh);
}
} finally {
refreshLock.unlock();
}
}
private Set<AppliedPTransform<?, ?, ?>> refreshAllOf(Set<AppliedPTransform<?, ?, ?>> toRefresh) {
Set<AppliedPTransform<?, ?, ?>> newRefreshes = new HashSet<>();
for (AppliedPTransform<?, ?, ?> transform : toRefresh) {
newRefreshes.addAll(refreshWatermarks(transform));
}
return newRefreshes;
}
private Set<AppliedPTransform<?, ?, ?>> refreshWatermarks(AppliedPTransform<?, ?, ?> toRefresh) {
TransformWatermarks myWatermarks = transformToWatermarks.get(toRefresh);
WatermarkUpdate updateResult = myWatermarks.refresh();
if (updateResult.isAdvanced()) {
Set<AppliedPTransform<?, ?, ?>> additionalRefreshes = new HashSet<>();
for (PValue outputPValue : toRefresh.getOutputs().values()) {
additionalRefreshes.addAll(graph.getPrimitiveConsumers(outputPValue));
}
return additionalRefreshes;
}
return Collections.emptySet();
}
/**
* Returns a map of each {@link PTransform} that has pending timers to those timers. All of the
* pending timers will be removed from this {@link WatermarkManager}.
*/
public Collection<FiredTimers> extractFiredTimers() {
Collection<FiredTimers> allTimers = new ArrayList<>();
refreshLock.lock();
try {
for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> watermarksEntry
: transformToWatermarks.entrySet()) {
Collection<FiredTimers> firedTimers = watermarksEntry.getValue().extractFiredTimers();
allTimers.addAll(firedTimers);
}
return allTimers;
} finally {
refreshLock.unlock();
}
}
/**
* A (key, Instant) pair that holds the watermark. Holds are per-key, but the watermark is global,
* and as such the watermark manager must track holds and the release of holds on a per-key basis.
*
* <p>The {@link #compareTo(KeyedHold)} method of {@link KeyedHold} is not consistent with equals,
* as the key is arbitrarily ordered via identity, rather than object equality.
*/
private static final class KeyedHold implements Comparable<KeyedHold> {
private static final Ordering<Object> KEY_ORDERING = Ordering.arbitrary().nullsLast();
private final Object key;
private final Instant timestamp;
/**
* Create a new KeyedHold with the specified key and timestamp.
*/
public static KeyedHold of(Object key, Instant timestamp) {
return new KeyedHold(key, MoreObjects.firstNonNull(timestamp, THE_END_OF_TIME.get()));
}
private KeyedHold(Object key, Instant timestamp) {
this.key = key;
this.timestamp = timestamp;
}
@Override
public int compareTo(KeyedHold that) {
return ComparisonChain.start()
.compare(this.timestamp, that.timestamp)
.compare(this.key, that.key, KEY_ORDERING)
.result();
}
@Override
public int hashCode() {
return Objects.hash(timestamp, key);
}
@Override
public boolean equals(Object other) {
if (other == null || !(other instanceof KeyedHold)) {
return false;
}
KeyedHold that = (KeyedHold) other;
return Objects.equals(this.timestamp, that.timestamp) && Objects.equals(this.key, that.key);
}
/**
* Get the value of this {@link KeyedHold}.
*/
public Instant getTimestamp() {
return timestamp;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(KeyedHold.class)
.add("key", key)
.add("hold", timestamp)
.toString();
}
}
private static class PerKeyHolds {
private final Map<Object, KeyedHold> keyedHolds;
private final NavigableSet<KeyedHold> allHolds;
private PerKeyHolds() {
this.keyedHolds = new HashMap<>();
this.allHolds = new TreeSet<>();
}
/**
* Gets the minimum hold across all keys in this {@link PerKeyHolds}, or THE_END_OF_TIME if
* there are no holds within this {@link PerKeyHolds}.
*/
public Instant getMinHold() {
return allHolds.isEmpty() ? THE_END_OF_TIME.get() : allHolds.first().getTimestamp();
}
/**
* Updates the hold of the provided key to the provided value, removing any other holds for
* the same key.
*/
public void updateHold(@Nullable Object key, Instant newHold) {
removeHold(key);
KeyedHold newKeyedHold = KeyedHold.of(key, newHold);
keyedHolds.put(key, newKeyedHold);
allHolds.add(newKeyedHold);
}
/**
* Removes the hold of the provided key.
*/
public void removeHold(Object key) {
KeyedHold oldHold = keyedHolds.remove(key);
if (oldHold != null) {
allHolds.remove(oldHold);
}
}
}
/**
* A reference to the input and output watermarks of an {@link AppliedPTransform}.
*/
public class TransformWatermarks {
private final AppliedPTransform<?, ?, ?> transform;
private final AppliedPTransformInputWatermark inputWatermark;
private final AppliedPTransformOutputWatermark outputWatermark;
private final SynchronizedProcessingTimeInputWatermark synchronizedProcessingInputWatermark;
private final SynchronizedProcessingTimeOutputWatermark synchronizedProcessingOutputWatermark;
private Instant latestSynchronizedInputWm;
private Instant latestSynchronizedOutputWm;
private TransformWatermarks(
AppliedPTransform<?, ?, ?> transform,
AppliedPTransformInputWatermark inputWatermark,
AppliedPTransformOutputWatermark outputWatermark,
SynchronizedProcessingTimeInputWatermark inputSynchProcessingWatermark,
SynchronizedProcessingTimeOutputWatermark outputSynchProcessingWatermark) {
this.transform = transform;
this.inputWatermark = inputWatermark;
this.outputWatermark = outputWatermark;
this.synchronizedProcessingInputWatermark = inputSynchProcessingWatermark;
this.synchronizedProcessingOutputWatermark = outputSynchProcessingWatermark;
this.latestSynchronizedInputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.latestSynchronizedOutputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
}
/**
* Returns the input watermark of the {@link AppliedPTransform}.
*/
public Instant getInputWatermark() {
return checkNotNull(inputWatermark.get());
}
/**
* Returns the output watermark of the {@link AppliedPTransform}.
*/
public Instant getOutputWatermark() {
return outputWatermark.get();
}
/**
* Returns the synchronized processing input time of the {@link AppliedPTransform}.
*
* <p>The returned value is guaranteed to be monotonically increasing, and outside of the
* presence of holds, will increase as the system time progresses.
*/
public synchronized Instant getSynchronizedProcessingInputTime() {
latestSynchronizedInputWm = INSTANT_ORDERING.max(
latestSynchronizedInputWm,
INSTANT_ORDERING.min(clock.now(), synchronizedProcessingInputWatermark.get()));
return latestSynchronizedInputWm;
}
/**
* Returns the synchronized processing output time of the {@link AppliedPTransform}.
*
* <p>The returned value is guaranteed to be monotonically increasing, and outside of the
* presence of holds, will increase as the system time progresses.
*/
public synchronized Instant getSynchronizedProcessingOutputTime() {
latestSynchronizedOutputWm = INSTANT_ORDERING.max(
latestSynchronizedOutputWm,
INSTANT_ORDERING.min(clock.now(), synchronizedProcessingOutputWatermark.get()));
return latestSynchronizedOutputWm;
}
private WatermarkUpdate refresh() {
inputWatermark.refresh();
synchronizedProcessingInputWatermark.refresh();
WatermarkUpdate eventOutputUpdate = outputWatermark.refresh();
WatermarkUpdate syncOutputUpdate = synchronizedProcessingOutputWatermark.refresh();
return eventOutputUpdate.union(syncOutputUpdate);
}
private void setEventTimeHold(Object key, Instant newHold) {
outputWatermark.updateHold(key, newHold);
}
private void removePending(CommittedBundle<?> bundle) {
inputWatermark.removePending(bundle);
synchronizedProcessingInputWatermark.removePending(bundle);
}
private void addPending(CommittedBundle<?> bundle) {
inputWatermark.addPending(bundle);
synchronizedProcessingInputWatermark.addPending(bundle);
}
private Collection<FiredTimers> extractFiredTimers() {
Map<StructuralKey<?>, List<TimerData>> eventTimeTimers =
inputWatermark.extractFiredEventTimeTimers();
Map<StructuralKey<?>, List<TimerData>> processingTimers;
Map<StructuralKey<?>, List<TimerData>> synchronizedTimers;
processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
TimeDomain.PROCESSING_TIME, clock.now());
synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
TimeDomain.SYNCHRONIZED_PROCESSING_TIME, getSynchronizedProcessingInputTime());
Map<StructuralKey<?>, List<TimerData>> timersPerKey =
groupFiredTimers(eventTimeTimers, processingTimers, synchronizedTimers);
Collection<FiredTimers> keyFiredTimers = new ArrayList<>(timersPerKey.size());
for (Map.Entry<StructuralKey<?>, List<TimerData>> firedTimers :
timersPerKey.entrySet()) {
keyFiredTimers.add(
new FiredTimers(transform, firedTimers.getKey(), firedTimers.getValue()));
}
return keyFiredTimers;
}
@SafeVarargs
private final Map<StructuralKey<?>, List<TimerData>> groupFiredTimers(
Map<StructuralKey<?>, List<TimerData>>... timersToGroup) {
Map<StructuralKey<?>, List<TimerData>> groupedTimers = new HashMap<>();
for (Map<StructuralKey<?>, List<TimerData>> subGroup : timersToGroup) {
for (Map.Entry<StructuralKey<?>, List<TimerData>> newTimers : subGroup.entrySet()) {
List<TimerData> grouped = groupedTimers.get(newTimers.getKey());
if (grouped == null) {
grouped = new ArrayList<>();
groupedTimers.put(newTimers.getKey(), grouped);
}
grouped.addAll(newTimers.getValue());
}
}
return groupedTimers;
}
private void updateTimers(TimerUpdate update) {
inputWatermark.updateTimers(update);
synchronizedProcessingInputWatermark.updateTimers(update);
}
@Override
public String toString() {
return MoreObjects.toStringHelper(TransformWatermarks.class)
.add("inputWatermark", inputWatermark)
.add("outputWatermark", outputWatermark)
.add("inputProcessingTime", synchronizedProcessingInputWatermark)
.add("outputProcessingTime", synchronizedProcessingOutputWatermark)
.toString();
}
}
/**
* A collection of newly set, deleted, and completed timers.
*
* <p>setTimers and deletedTimers are collections of {@link TimerData} that have been added to the
* {@link TimerInternals} of an executed step. completedTimers are timers that were delivered as
* the input to the executed step.
*/
public static class TimerUpdate {
private final StructuralKey<?> key;
private final Iterable<? extends TimerData> completedTimers;
private final Iterable<? extends TimerData> setTimers;
private final Iterable<? extends TimerData> deletedTimers;
/**
* Returns a TimerUpdate for a null key with no timers.
*/
public static TimerUpdate empty() {
return new TimerUpdate(
null,
Collections.<TimerData>emptyList(),
Collections.<TimerData>emptyList(),
Collections.<TimerData>emptyList());
}
/**
* Creates a new {@link TimerUpdate} builder with the provided completed timers that needs the
* set and deleted timers to be added to it.
*/
public static TimerUpdateBuilder builder(StructuralKey<?> key) {
return new TimerUpdateBuilder(key);
}
/**
* A {@link TimerUpdate} builder that needs to be provided with set timers and deleted timers.
*/
public static final class TimerUpdateBuilder {
private final StructuralKey<?> key;
private final Collection<TimerData> completedTimers;
private final Collection<TimerData> setTimers;
private final Collection<TimerData> deletedTimers;
private TimerUpdateBuilder(StructuralKey<?> key) {
this.key = key;
this.completedTimers = new HashSet<>();
this.setTimers = new HashSet<>();
this.deletedTimers = new HashSet<>();
}
/**
* Adds all of the provided timers to the collection of completed timers, and returns this
* {@link TimerUpdateBuilder}.
*/
public TimerUpdateBuilder withCompletedTimers(Iterable<TimerData> completedTimers) {
Iterables.addAll(this.completedTimers, completedTimers);
return this;
}
/**
* Adds the provided timer to the collection of set timers, removing it from deleted timers if
* it has previously been deleted. Returns this {@link TimerUpdateBuilder}.
*/
public TimerUpdateBuilder setTimer(TimerData setTimer) {
checkArgument(
setTimer.getTimestamp().isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE),
"Got a timer for after the end of time (%s), got %s",
BoundedWindow.TIMESTAMP_MAX_VALUE,
setTimer.getTimestamp());
deletedTimers.remove(setTimer);
setTimers.add(setTimer);
return this;
}
/**
* Adds the provided timer to the collection of deleted timers, removing it from set timers if
* it has previously been set. Returns this {@link TimerUpdateBuilder}.
*/
public TimerUpdateBuilder deletedTimer(TimerData deletedTimer) {
deletedTimers.add(deletedTimer);
setTimers.remove(deletedTimer);
return this;
}
/**
* Returns a new {@link TimerUpdate} with the most recently set completedTimers, setTimers,
* and deletedTimers.
*/
public TimerUpdate build() {
return new TimerUpdate(
key,
ImmutableSet.copyOf(completedTimers),
ImmutableSet.copyOf(setTimers),
ImmutableSet.copyOf(deletedTimers));
}
}
private TimerUpdate(
StructuralKey<?> key,
Iterable<? extends TimerData> completedTimers,
Iterable<? extends TimerData> setTimers,
Iterable<? extends TimerData> deletedTimers) {
this.key = key;
this.completedTimers = completedTimers;
this.setTimers = setTimers;
this.deletedTimers = deletedTimers;
}
@VisibleForTesting
StructuralKey<?> getKey() {
return key;
}
@VisibleForTesting
Iterable<? extends TimerData> getCompletedTimers() {
return completedTimers;
}
@VisibleForTesting
Iterable<? extends TimerData> getSetTimers() {
return setTimers;
}
@VisibleForTesting
Iterable<? extends TimerData> getDeletedTimers() {
return deletedTimers;
}
/**
* Returns a {@link TimerUpdate} that is like this one, but with the specified completed timers.
*/
public TimerUpdate withCompletedTimers(Iterable<TimerData> completedTimers) {
return new TimerUpdate(this.key, completedTimers, setTimers, deletedTimers);
}
@Override
public int hashCode() {
return Objects.hash(key, completedTimers, setTimers, deletedTimers);
}
@Override
public boolean equals(Object other) {
if (other == null || !(other instanceof TimerUpdate)) {
return false;
}
TimerUpdate that = (TimerUpdate) other;
return Objects.equals(this.key, that.key)
&& Objects.equals(this.completedTimers, that.completedTimers)
&& Objects.equals(this.setTimers, that.setTimers)
&& Objects.equals(this.deletedTimers, that.deletedTimers);
}
}
/**
* A pair of {@link TimerData} and key which can be delivered to the appropriate
* {@link AppliedPTransform}. A timer fires at the transform that set it with a specific key when
* the time domain in which it lives progresses past a specified time, as determined by the
* {@link WatermarkManager}.
*/
public static class FiredTimers {
/** The transform the timers were set at and will be delivered to. */
private final AppliedPTransform<?, ?, ?> transform;
/** The key the timers were set for and will be delivered to. */
private final StructuralKey<?> key;
private final Collection<TimerData> timers;
private FiredTimers(
AppliedPTransform<?, ?, ?> transform, StructuralKey<?> key, Collection<TimerData> timers) {
this.transform = transform;
this.key = key;
this.timers = timers;
}
public AppliedPTransform<?, ?, ?> getTransform() {
return transform;
}
public StructuralKey<?> getKey() {
return key;
}
/**
* Gets all of the timers that have fired within the provided {@link TimeDomain}. If no timers
* fired within the provided domain, return an empty collection.
*
* <p>Timers within a {@link TimeDomain} are guaranteed to be in order of increasing timestamp.
*/
public Collection<TimerData> getTimers() {
return timers;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(FiredTimers.class).add("timers", timers).toString();
}
}
private static class BundleByElementTimestampComparator extends Ordering<CommittedBundle<?>>
implements Serializable {
@Override
public int compare(CommittedBundle<?> o1, CommittedBundle<?> o2) {
return ComparisonChain.start()
.compare(o1.getMinTimestamp(), o2.getMinTimestamp())
.result();
}
}
public Set<AppliedPTransform<?, ?, ?>> getCompletedTransforms() {
Set<AppliedPTransform<?, ?, ?>> result = new HashSet<>();
for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> wms :
transformToWatermarks.entrySet()) {
if (wms.getValue().getOutputWatermark().equals(THE_END_OF_TIME.get())) {
result.add(wms.getKey());
}
}
return result;
}
@AutoValue
abstract static class PendingWatermarkUpdate {
@Nullable
public abstract CommittedBundle<?> getInputBundle();
public abstract TimerUpdate getTimerUpdate();
public abstract CommittedResult getResult();
public abstract Instant getEarliestHold();
/**
* Gets the {@link AppliedPTransform} that generated this result.
*/
public AppliedPTransform<?, ?, ?> getTransform() {
return getResult().getTransform();
}
public static PendingWatermarkUpdate create(
CommittedBundle<?> inputBundle,
TimerUpdate timerUpdate,
CommittedResult result, Instant earliestHold) {
return new AutoValue_WatermarkManager_PendingWatermarkUpdate(inputBundle,
timerUpdate,
result,
earliestHold);
}
}
}