/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.testing; import static com.google.common.base.Preconditions.checkArgument; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import com.google.common.collect.Iterables; import com.google.common.collect.Ordering; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.GlobalWindow; import org.apache.beam.sdk.transforms.windowing.IntervalWindow; import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; import org.apache.beam.sdk.transforms.windowing.WindowFn; import org.joda.time.Instant; import org.joda.time.ReadableInstant; /** * A utility class for testing {@link WindowFn}s. */ public class WindowFnTestUtils { /** * Creates a Set of elements to be used as expected output in * {@link #runWindowFn}. */ public static Set<String> set(long... timestamps) { Set<String> result = new HashSet<>(); for (long timestamp : timestamps) { result.add(timestampValue(timestamp)); } return result; } /** * Runs the {@link WindowFn} over the provided input, returning a map * of windows to the timestamps in those windows. */ public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowFn( WindowFn<T, W> windowFn, List<Long> timestamps) throws Exception { final TestWindowSet<W, String> windowSet = new TestWindowSet<W, String>(); for (final Long timestamp : timestamps) { for (W window : windowFn.assignWindows( new TestAssignContext<T, W>(new Instant(timestamp), windowFn))) { windowSet.put(window, timestampValue(timestamp)); } windowFn.mergeWindows(new TestMergeContext<T, W>(windowSet, windowFn)); } Map<W, Set<String>> actual = new HashMap<>(); for (W window : windowSet.windows()) { actual.put(window, windowSet.get(window)); } return actual; } public static <T, W extends BoundedWindow> Collection<W> assignedWindows( WindowFn<T, W> windowFn, long timestamp) throws Exception { return windowFn.assignWindows(new TestAssignContext<T, W>(new Instant(timestamp), windowFn)); } private static String timestampValue(long timestamp) { return "T" + new Instant(timestamp); } /** * Test implementation of AssignContext. */ private static class TestAssignContext<T, W extends BoundedWindow> extends WindowFn<T, W>.AssignContext { private Instant timestamp; public TestAssignContext(Instant timestamp, WindowFn<T, W> windowFn) { windowFn.super(); this.timestamp = timestamp; } @Override public T element() { return null; } @Override public Instant timestamp() { return timestamp; } @Override public BoundedWindow window() { return GlobalWindow.INSTANCE; } } /** * Test implementation of MergeContext. */ private static class TestMergeContext<T, W extends BoundedWindow> extends WindowFn<T, W>.MergeContext { private TestWindowSet<W, ?> windowSet; public TestMergeContext( TestWindowSet<W, ?> windowSet, WindowFn<T, W> windowFn) { windowFn.super(); this.windowSet = windowSet; } @Override public Collection<W> windows() { return windowSet.windows(); } @Override public void merge(Collection<W> toBeMerged, W mergeResult) { windowSet.merge(toBeMerged, mergeResult); } } /** * A WindowSet useful for testing WindowFns that simply * collects the placed elements into multisets. */ private static class TestWindowSet<W extends BoundedWindow, V> { private Map<W, Set<V>> elements = new HashMap<>(); public void put(W window, V value) { Set<V> all = elements.get(window); if (all == null) { all = new HashSet<>(); elements.put(window, all); } all.add(value); } public void merge(Collection<W> otherWindows, W window) { if (otherWindows.isEmpty()) { return; } Set<V> merged = new HashSet<>(); if (elements.containsKey(window) && !otherWindows.contains(window)) { merged.addAll(elements.get(window)); } for (W w : otherWindows) { if (!elements.containsKey(w)) { throw new IllegalArgumentException("Tried to merge a non-existent window:" + w); } merged.addAll(elements.get(w)); elements.remove(w); } elements.put(window, merged); } public Collection<W> windows() { return elements.keySet(); } // For testing. public Set<V> get(W window) { return elements.get(window); } } /** * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and * verifies that result of {@code windowFn.getOutputTimestamp} for each window is within the * proper bound. */ public static <T, W extends BoundedWindow> void validateNonInterferingOutputTimes( WindowFn<T, W> windowFn, long timestamp) throws Exception { Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp); Instant instant = new Instant(timestamp); for (W window : windows) { Instant outputTimestamp = windowFn.getOutputTime(instant, window); assertFalse("getOutputTime must be greater than or equal to input timestamp", outputTimestamp.isBefore(instant)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } } /** * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and * verifies that result of {@link WindowFn#getOutputTime windowFn.getOutputTime} for later windows * (as defined by {@code maxTimestamp} won't prevent the watermark from passing the end of earlier * windows. * * <p>This verifies that overlapping windows don't interfere at all. Depending on the * {@code windowFn} this may be stricter than desired. */ public static <T, W extends BoundedWindow> void validateGetOutputTimestamp( WindowFn<T, W> windowFn, long timestamp) throws Exception { Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp); List<W> sortedWindows = new ArrayList<>(windows); Collections.sort(sortedWindows, new Comparator<BoundedWindow>() { @Override public int compare(BoundedWindow o1, BoundedWindow o2) { return o1.maxTimestamp().compareTo(o2.maxTimestamp()); } }); Instant instant = new Instant(timestamp); Instant endOfPrevious = null; for (W window : sortedWindows) { Instant outputTimestamp = windowFn.getOutputTime(instant, window); if (endOfPrevious == null) { // If this is the first window, the output timestamp can be anything, as long as it is in // the valid range. assertFalse("getOutputTime must be greater than or equal to input timestamp", outputTimestamp.isBefore(instant)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } else { // If this is a later window, the output timestamp must be after the end of the previous // window assertTrue("getOutputTime must be greater than the end of the previous window", outputTimestamp.isAfter(endOfPrevious)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } endOfPrevious = window.maxTimestamp(); } } /** * Verifies that later-ending merged windows from any of the timestamps hold up output of * earlier-ending windows, using the provided {@link WindowFn} and {@link TimestampCombiner}. * * <p>Given a list of lists of timestamps, where each list is expected to merge into a single * window with end times in ascending order, assigns and merges windows for each list (as though * each were a separate key/user session). Then combines each timestamp in the list according to * the provided {@link TimestampCombiner}. * * <p>Verifies that a overlapping windows do not hold each other up via the watermark. */ public static <T, W extends IntervalWindow> void validateGetOutputTimestamps( WindowFn<T, W> windowFn, TimestampCombiner timestampCombiner, List<List<Long>> timestampsPerWindow) throws Exception { // Assign windows to each timestamp, then merge them, storing the merged windows in // a list in corresponding order to timestampsPerWindow final List<W> windows = new ArrayList<>(); for (List<Long> timestampsForWindow : timestampsPerWindow) { final Set<W> windowsToMerge = new HashSet<>(); for (long timestamp : timestampsForWindow) { windowsToMerge.addAll( WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp)); } windowFn.mergeWindows(windowFn.new MergeContext() { @Override public Collection<W> windows() { return windowsToMerge; } @Override public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception { windows.add(mergeResult); } }); } // Map every list of input timestamps to an output timestamp final List<Instant> combinedOutputTimestamps = new ArrayList<>(); for (int i = 0; i < timestampsPerWindow.size(); ++i) { List<Long> timestampsForWindow = timestampsPerWindow.get(i); W window = windows.get(i); List<Instant> outputInstants = new ArrayList<>(); for (long inputTimestamp : timestampsForWindow) { outputInstants.add( assignOutputTime(timestampCombiner, new Instant(inputTimestamp), window)); } combinedOutputTimestamps.add(combineOutputTimes(timestampCombiner, outputInstants)); } // Consider windows in increasing order of max timestamp; ensure the output timestamp is after // the max timestamp of the previous @Nullable W earlierEndingWindow = null; for (int i = 0; i < windows.size(); ++i) { W window = windows.get(i); ReadableInstant outputTimestamp = combinedOutputTimestamps.get(i); if (earlierEndingWindow != null) { assertThat(outputTimestamp, greaterThan((ReadableInstant) earlierEndingWindow.maxTimestamp())); } earlierEndingWindow = window; } } private static Instant assignOutputTime( TimestampCombiner timestampCombiner, Instant inputTimestamp, BoundedWindow window) { switch (timestampCombiner) { case EARLIEST: case LATEST: return inputTimestamp; case END_OF_WINDOW: return window.maxTimestamp(); default: throw new IllegalArgumentException( String.format("Unknown %s: %s", TimestampCombiner.class, timestampCombiner)); } } private static Instant combineOutputTimes( TimestampCombiner timestampCombiner, Iterable<Instant> outputInstants) { checkArgument( !Iterables.isEmpty(outputInstants), "Cannot combine zero instants with %s", timestampCombiner); switch(timestampCombiner) { case EARLIEST: return Ordering.natural().min(outputInstants); case LATEST: return Ordering.natural().max(outputInstants); case END_OF_WINDOW: return outputInstants.iterator().next(); default: throw new IllegalArgumentException( String.format("Unknown %s: %s", TimestampCombiner.class, timestampCombiner)); } } }