/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.transforms; import static com.google.common.base.Preconditions.checkArgument; import static org.apache.beam.sdk.values.KV.of; import static org.hamcrest.Matchers.isA; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.common.collect.ImmutableList; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.Pipeline.PipelineExecutionException; import org.apache.beam.sdk.coders.AtomicCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.NullableCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.coders.VarIntCoder; import org.apache.beam.sdk.coders.VoidCoder; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.testing.ValidatesRunner; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.GlobalWindows; import org.apache.beam.sdk.transforms.windowing.InvalidWindows; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionView; import org.apache.beam.sdk.values.TimestampedValue; import org.apache.beam.sdk.values.TypeDescriptor; import org.apache.beam.sdk.values.WindowingStrategy; import org.hamcrest.Matchers; import org.joda.time.Duration; import org.joda.time.Instant; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; /** * Tests for {@link View}. See also {@link ParDoTest}, which * provides additional coverage since views can only be * observed via {@link ParDo}. */ @RunWith(JUnit4.class) public class ViewTest implements Serializable { // This test is Serializable, just so that it's easy to have // anonymous inner classes inside the non-static test methods. @Rule public final transient TestPipeline pipeline = TestPipeline.create(); @Rule public transient ExpectedException thrown = ExpectedException.none(); @Test @Category(ValidatesRunner.class) public void testSingletonSideInput() { final PCollectionView<Integer> view = pipeline.apply("Create47", Create.of(47)).apply(View.<Integer>asSingleton()); PCollection<Integer> output = pipeline.apply("Create123", Create.of(1, 2, 3)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(47, 47, 47); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedSingletonSideInput() { final PCollectionView<Integer> view = pipeline.apply("Create47", Create.timestamped( TimestampedValue.of(47, new Instant(1)), TimestampedValue.of(48, new Instant(11)))) .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<Integer>asSingleton()); PCollection<Integer> output = pipeline.apply("Create123", Create.timestamped( TimestampedValue.of(1, new Instant(4)), TimestampedValue.of(2, new Instant(8)), TimestampedValue.of(3, new Instant(12)))) .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(47, 47, 48); pipeline.run(); } @Test @Category(NeedsRunner.class) public void testEmptySingletonSideInput() throws Exception { final PCollectionView<Integer> view = pipeline.apply("CreateEmptyIntegers", Create.empty(VarIntCoder.of())) .apply(View.<Integer>asSingleton()); pipeline.apply("Create123", Create.of(1, 2, 3)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view)); } }).withSideInputs(view)); thrown.expect(PipelineExecutionException.class); thrown.expectCause(isA(NoSuchElementException.class)); thrown.expectMessage("Empty"); thrown.expectMessage("PCollection"); thrown.expectMessage("singleton"); pipeline.run(); } @Test @Category(NeedsRunner.class) public void testNonSingletonSideInput() throws Exception { PCollection<Integer> oneTwoThree = pipeline.apply(Create.<Integer>of(1, 2, 3)); final PCollectionView<Integer> view = oneTwoThree.apply(View.<Integer>asSingleton()); oneTwoThree.apply( "OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view)); } }).withSideInputs(view)); thrown.expect(PipelineExecutionException.class); thrown.expectCause(isA(IllegalArgumentException.class)); thrown.expectMessage("PCollection"); thrown.expectMessage("more than one"); thrown.expectMessage("singleton"); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testListSideInput() { final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)).apply(View.<Integer>asList()); PCollection<Integer> output = pipeline.apply("CreateMainInput", Create.of(29, 31)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { checkArgument(c.sideInput(view).size() == 4); checkArgument(c.sideInput(view).get(0) == c.sideInput(view).get(0)); for (Integer i : c.sideInput(view)) { c.output(i); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 11, 13, 17, 23); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedListSideInput() { final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(11, new Instant(1)), TimestampedValue.of(13, new Instant(1)), TimestampedValue.of(17, new Instant(1)), TimestampedValue.of(23, new Instant(1)), TimestampedValue.of(31, new Instant(11)), TimestampedValue.of(33, new Instant(11)), TimestampedValue.of(37, new Instant(11)), TimestampedValue.of(43, new Instant(11)))) .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<Integer>asList()); PCollection<Integer> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of(29, new Instant(1)), TimestampedValue.of(35, new Instant(11)))) .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply( "OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { checkArgument(c.sideInput(view).size() == 4); checkArgument(c.sideInput(view).get(0) == c.sideInput(view).get(0)); for (Integer i : c.sideInput(view)) { c.output(i); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 31, 33, 37, 43); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyListSideInput() throws Exception { final PCollectionView<List<Integer>> view = pipeline.apply("CreateEmptyView", Create.empty(VarIntCoder.of())) .apply(View.<Integer>asList()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertFalse(c.sideInput(view).iterator().hasNext()); c.output(1); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testListSideInputIsImmutable() { final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11)).apply(View.<Integer>asList()); PCollection<Integer> output = pipeline.apply("CreateMainInput", Create.of(29)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { try { c.sideInput(view).clear(); fail("Expected UnsupportedOperationException on clear()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).add(4); fail("Expected UnsupportedOperationException on add()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).addAll(new ArrayList<Integer>()); fail("Expected UnsupportedOperationException on addAll()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).remove(0); fail("Expected UnsupportedOperationException on remove()"); } catch (UnsupportedOperationException expected) { } for (Integer i : c.sideInput(view)) { c.output(i); } } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(output).containsInAnyOrder(11); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testIterableSideInput() { final PCollectionView<Iterable<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)) .apply(View.<Integer>asIterable()); PCollection<Integer> output = pipeline.apply("CreateMainInput", Create.of(29, 31)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer i : c.sideInput(view)) { c.output(i); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 11, 13, 17, 23); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedIterableSideInput() { final PCollectionView<Iterable<Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(11, new Instant(1)), TimestampedValue.of(13, new Instant(1)), TimestampedValue.of(17, new Instant(1)), TimestampedValue.of(23, new Instant(1)), TimestampedValue.of(31, new Instant(11)), TimestampedValue.of(33, new Instant(11)), TimestampedValue.of(37, new Instant(11)), TimestampedValue.of(43, new Instant(11)))) .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<Integer>asIterable()); PCollection<Integer> output = pipeline .apply("CreateMainInput", Create.timestamped( TimestampedValue.of(29, new Instant(1)), TimestampedValue.of(35, new Instant(11)))) .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer i : c.sideInput(view)) { c.output(i); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 31, 33, 37, 43); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyIterableSideInput() throws Exception { final PCollectionView<Iterable<Integer>> view = pipeline.apply("CreateEmptyView", Create.empty(VarIntCoder.of())) .apply(View.<Integer>asIterable()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertFalse(c.sideInput(view).iterator().hasNext()); c.output(1); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testIterableSideInputIsImmutable() { final PCollectionView<Iterable<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11)).apply(View.<Integer>asIterable()); PCollection<Integer> output = pipeline.apply("CreateMainInput", Create.of(29)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { Iterator<Integer> iterator = c.sideInput(view).iterator(); while (iterator.hasNext()) { try { iterator.remove(); fail("Expected UnsupportedOperationException on remove()"); } catch (UnsupportedOperationException expected) { } c.output(iterator.next()); } } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(output).containsInAnyOrder(11); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMultimapSideInput() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) { c.output(of(c.element(), v)); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMultimapAsEntrySetSideInput() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of(2 /* size */)) .apply( "OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { assertEquals((int) c.element(), c.sideInput(view).size()); assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); for (Entry<String, Iterable<Integer>> entry : c.sideInput(view).entrySet()) { for (Integer value : entry.getValue()) { c.output(KV.of(entry.getKey(), value)); } } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)); pipeline.run(); } private static class NonDeterministicStringCoder extends AtomicCoder<String> { @Override public void encode(String value, OutputStream outStream) throws CoderException, IOException { encode(value, outStream, Coder.Context.NESTED); } @Override public void encode(String value, OutputStream outStream, Coder.Context context) throws CoderException, IOException { StringUtf8Coder.of().encode(value, outStream, context); } @Override public String decode(InputStream inStream) throws CoderException, IOException { return decode(inStream, Coder.Context.NESTED); } @Override public String decode(InputStream inStream, Coder.Context context) throws CoderException, IOException { return StringUtf8Coder.of().decode(inStream, context); } @Override public void verifyDeterministic() throws org.apache.beam.sdk.coders.Coder.NonDeterministicException { throw new NonDeterministicException(this, "Test coder is not deterministic on purpose."); } } @Test @Category(ValidatesRunner.class) public void testMultimapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) { c.output(of(c.element(), v)); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMultimapSideInput() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14)))) .apply( "SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(13)), TimestampedValue.of("blackberry", new Instant(16)))) .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer v : c.sideInput(view) .get(c.element().substring(0, 1))) { c.output(of(c.element(), v)); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMultimapAsEntrySetSideInput() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14)))) .apply( "SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of(1 /* size */, new Instant(5)), TimestampedValue.of(1 /* size */, new Instant(16)))) .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<Integer, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { assertEquals((int) c.element(), c.sideInput(view).size()); assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); for (Entry<String, Iterable<Integer>> entry : c.sideInput(view).entrySet()) { for (Integer value : entry.getValue()) { c.output(KV.of(entry.getKey(), value)); } } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMultimapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14))) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(13)), TimestampedValue.of("blackberry", new Instant(16)))) .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer v : c.sideInput(view) .get(c.element().substring(0, 1))) { c.output(of(c.element(), v)); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyMultimapSideInput() throws Exception { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateEmptyView", Create.empty( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))) .apply(View.<String, Integer>asMultimap()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertTrue(c.sideInput(view).entrySet().isEmpty()); assertFalse(c.sideInput(view).entrySet().iterator().hasNext()); c.output(c.element()); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyMultimapSideInputWithNonDeterministicKeyCoder() throws Exception { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline .apply( "CreateEmptyView", Create.empty(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.<String, Integer>asMultimap()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertTrue(c.sideInput(view).entrySet().isEmpty()); assertFalse(c.sideInput(view).entrySet().iterator().hasNext()); c.output(c.element()); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMultimapSideInputIsImmutable() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1))) .apply(View.<String, Integer>asMultimap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { try { c.sideInput(view).clear(); fail("Expected UnsupportedOperationException on clear()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).put("c", ImmutableList.of(3)); fail("Expected UnsupportedOperationException on put()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).remove("c"); fail("Expected UnsupportedOperationException on remove()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).putAll(new HashMap<String, Iterable<Integer>>()); fail("Expected UnsupportedOperationException on putAll()"); } catch (UnsupportedOperationException expected) { } for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) { c.output(KV.of(c.element(), v)); } } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(output).containsInAnyOrder(KV.of("apple", 1)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMapSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( of(c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMapAsEntrySetSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of(2 /* size */)) .apply( "OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { assertEquals((int) c.element(), c.sideInput(view).size()); assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) { c.output(KV.of(entry.getKey(), entry.getValue())); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("a", 1), KV.of("b", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( of(c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMapSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))) .apply( "SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(4)), TimestampedValue.of("blackberry", new Instant(16)))) .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(KV.of( c.element(), c.sideInput(view).get( c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMapAsEntrySetSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))) .apply( "SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of(2 /* size */, new Instant(5)), TimestampedValue.of(1 /* size */, new Instant(16)))) .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<Integer, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { assertEquals((int) c.element(), c.sideInput(view).size()); assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) { c.output(KV.of(entry.getKey(), entry.getValue())); } } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("a", 1), KV.of("b", 2), KV.of("b", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedMapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18))) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply( "SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(4)), TimestampedValue.of("blackberry", new Instant(16)))) .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(of( c.element(), c.sideInput(view).get( c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyMapSideInput() throws Exception { final PCollectionView<Map<String, Integer>> view = pipeline .apply( "CreateEmptyView", Create.empty(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))) .apply(View.<String, Integer>asMap()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertTrue(c.sideInput(view).entrySet().isEmpty()); assertFalse(c.sideInput(view).entrySet().iterator().hasNext()); c.output(c.element()); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testEmptyMapSideInputWithNonDeterministicKeyCoder() throws Exception { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateEmptyView", Create.empty( KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.<String, Integer>asMap()); PCollection<Integer> results = pipeline.apply("Create1", Create.of(1)) .apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertTrue(c.sideInput(view).entrySet().isEmpty()); assertFalse(c.sideInput(view).entrySet().iterator().hasNext()); c.output(c.element()); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); } @Test @Category(NeedsRunner.class) public void testMapSideInputWithNullValuesCatchesDuplicates() { final PCollectionView<Map<String, Integer>> view = pipeline .apply( "CreateSideInput", Create.of(KV.of("a", (Integer) null), KV.of("a", (Integer) null)) .withCoder( KvCoder.of(StringUtf8Coder.of(), NullableCoder.of(VarIntCoder.of())))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3)); // PipelineExecutionException is thrown with cause having a message stating that a // duplicate is not allowed. thrown.expectCause( ThrowableMessageMatcher.hasMessage(Matchers.containsString("Duplicate values for a"))); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testMapSideInputIsImmutable() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1))) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple")) .apply( "OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { try { c.sideInput(view).clear(); fail("Expected UnsupportedOperationException on clear()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).put("c", 3); fail("Expected UnsupportedOperationException on put()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).remove("c"); fail("Expected UnsupportedOperationException on remove()"); } catch (UnsupportedOperationException expected) { } try { c.sideInput(view).putAll(new HashMap<String, Integer>()); fail("Expected UnsupportedOperationException on putAll()"); } catch (UnsupportedOperationException expected) { } c.output( KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }).withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(output).containsInAnyOrder(KV.of("apple", 1)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testCombinedMapSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3))) .apply("SumIntegers", Combine.<String, Integer, Integer>perKey(Sum.ofIntegers())) .apply(View.<String, Integer>asMap()); PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply("Output", ParDo.of(new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(KV .of(c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder( KV.of("apple", 21), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedSideInputFixedToFixed() { final PCollectionView<Integer> view = pipeline.apply( "CreateSideInput", Create.timestamped(TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))) .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply(Sum.integersGlobally().withoutDefaults()) .apply(View.<Integer>asSingleton()); PCollection<String> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))) .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputMainAndSideInputs", ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element() + c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder("A1", "B5", "C1"); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedSideInputFixedToGlobal() { final PCollectionView<Integer> view = pipeline.apply( "CreateSideInput", Create.timestamped(TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))) .apply("WindowSideInput", Window.<Integer>into(new GlobalWindows())) .apply(Sum.integersGlobally()) .apply(View.<Integer>asSingleton()); PCollection<String> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))) .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputMainAndSideInputs", ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element() + c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder("A6", "B6", "C6"); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testWindowedSideInputFixedToFixedWithDefault() { final PCollectionView<Integer> view = pipeline.apply("CreateSideInput", Create.timestamped( TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))) .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))) .apply(Sum.integersGlobally().asSingletonView()); PCollection<String> output = pipeline.apply("CreateMainInput", Create.timestamped( TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))) .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10)))) .apply("OutputMainAndSideInputs", ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element() + c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder("A0", "B5", "C0"); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testSideInputWithNullDefault() { final PCollectionView<Void> view = pipeline.apply("CreateSideInput", Create.of((Void) null).withCoder(VoidCoder.of())) .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() { @Override public Void apply(Iterable<Void> input) { return null; } }).asSingletonView()); PCollection<String> output = pipeline.apply("CreateMainInput", Create.of("")) .apply( "OutputMainAndSideInputs", ParDo.of(new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element() + c.sideInput(view)); } }).withSideInputs(view)); PAssert.that(output).containsInAnyOrder("null"); pipeline.run(); } @Test @Category(ValidatesRunner.class) public void testSideInputWithNestedIterables() { final PCollectionView<Iterable<Integer>> view1 = pipeline.apply("CreateVoid1", Create.of((Void) null).withCoder(VoidCoder.of())) .apply("OutputOneInteger", ParDo.of(new DoFn<Void, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(17); } })) .apply("View1", View.<Integer>asIterable()); final PCollectionView<Iterable<Iterable<Integer>>> view2 = pipeline.apply("CreateVoid2", Create.of((Void) null).withCoder(VoidCoder.of())) .apply( "OutputSideInput", ParDo.of(new DoFn<Void, Iterable<Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view1)); } }).withSideInputs(view1)) .apply("View2", View.<Iterable<Integer>>asIterable()); PCollection<Integer> output = pipeline.apply("CreateVoid3", Create.of((Void) null).withCoder(VoidCoder.of())) .apply("ReadIterableSideInput", ParDo.of(new DoFn<Void, Integer>() { @ProcessElement public void processElement(ProcessContext c) { for (Iterable<Integer> input : c.sideInput(view2)) { for (Integer i : input) { c.output(i); } } } }).withSideInputs(view2)); PAssert.that(output).containsInAnyOrder(17); pipeline.run(); } @Test public void testViewGetName() { assertEquals("View.AsSingleton", View.<Integer>asSingleton().getName()); assertEquals("View.AsIterable", View.<Integer>asIterable().getName()); assertEquals("View.AsMap", View.<String, Integer>asMap().getName()); assertEquals("View.AsMultimap", View.<String, Integer>asMultimap().getName()); } private void testViewUnbounded( Pipeline pipeline, PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) { thrown.expect(IllegalStateException.class); thrown.expectMessage("Unable to create a side-input view from input"); thrown.expectCause( ThrowableMessageMatcher.hasMessage(Matchers.containsString("non-bounded PCollection"))); pipeline .apply( new PTransform<PBegin, PCollection<KV<String, Integer>>>() { @Override public PCollection<KV<String, Integer>> expand(PBegin input) { return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED) .setTypeDescriptor(new TypeDescriptor<KV<String, Integer>>() {}); } }) .apply(view); } private void testViewNonmerging( Pipeline pipeline, PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) { thrown.expect(IllegalStateException.class); thrown.expectMessage("Unable to create a side-input view from input"); thrown.expectCause( ThrowableMessageMatcher.hasMessage(Matchers.containsString("Consumed by GroupByKey"))); pipeline.apply(Create.<KV<String, Integer>>of(KV.of("hello", 5))) .apply(Window.<KV<String, Integer>>into(new InvalidWindows<>( "Consumed by GroupByKey", FixedWindows.of(Duration.standardHours(1))))) .apply(view); } @Test public void testViewUnboundedAsSingletonDirect() { testViewUnbounded(pipeline, View.<KV<String, Integer>>asSingleton()); } @Test public void testViewUnboundedAsIterableDirect() { testViewUnbounded(pipeline, View.<KV<String, Integer>>asIterable()); } @Test public void testViewUnboundedAsListDirect() { testViewUnbounded(pipeline, View.<KV<String, Integer>>asList()); } @Test public void testViewUnboundedAsMapDirect() { testViewUnbounded(pipeline, View.<String, Integer>asMap()); } @Test public void testViewUnboundedAsMultimapDirect() { testViewUnbounded(pipeline, View.<String, Integer>asMultimap()); } @Test public void testViewNonmergingAsSingletonDirect() { testViewNonmerging(pipeline, View.<KV<String, Integer>>asSingleton()); } @Test public void testViewNonmergingAsIterableDirect() { testViewNonmerging(pipeline, View.<KV<String, Integer>>asIterable()); } @Test public void testViewNonmergingAsListDirect() { testViewNonmerging(pipeline, View.<KV<String, Integer>>asList()); } @Test public void testViewNonmergingAsMapDirect() { testViewNonmerging(pipeline, View.<String, Integer>asMap()); } @Test public void testViewNonmergingAsMultimapDirect() { testViewNonmerging(pipeline, View.<String, Integer>asMultimap()); } }