/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.runners; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertThat; import com.google.common.collect.Iterables; import java.io.Serializable; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.beam.sdk.Pipeline.PipelineVisitor; import org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults; import org.apache.beam.sdk.io.CountingSource; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.runners.PTransformOverrideFactory.ReplacementOutput; import org.apache.beam.sdk.runners.TransformHierarchy.Node; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.ParDo.MultiOutput; import org.apache.beam.sdk.transforms.ParDo.SingleOutput; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollection.IsBounded; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.PDone; import org.apache.beam.sdk.values.PInput; import org.apache.beam.sdk.values.POutput; import org.apache.beam.sdk.values.PValue; import org.apache.beam.sdk.values.TaggedPValue; import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TupleTagList; import org.apache.beam.sdk.values.WindowingStrategy; import org.hamcrest.Matchers; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; /** * Tests for {@link TransformHierarchy}. */ @RunWith(JUnit4.class) public class TransformHierarchyTest implements Serializable { @Rule public final transient TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false); @Rule public transient ExpectedException thrown = ExpectedException.none(); private transient TransformHierarchy hierarchy; @Before public void setup() { hierarchy = new TransformHierarchy(pipeline); } @Test public void getCurrentNoPushReturnsRoot() { assertThat(hierarchy.getCurrent().isRootNode(), is(true)); } @Test public void pushWithoutPushFails() { thrown.expect(IllegalStateException.class); hierarchy.popNode(); } @Test public void pushThenPopSucceeds() { TransformHierarchy.Node root = hierarchy.getCurrent(); TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1)); assertThat(hierarchy.getCurrent(), equalTo(node)); hierarchy.popNode(); assertThat(node.finishedSpecifying, is(true)); assertThat(hierarchy.getCurrent(), equalTo(root)); } @Test public void emptyCompositeSucceeds() { PCollection<Long> created = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED); TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1)); hierarchy.setOutput(created); hierarchy.popNode(); PCollectionList<Long> pcList = PCollectionList.of(created); TransformHierarchy.Node emptyTransform = hierarchy.pushNode( "Extract", pcList, new PTransform<PCollectionList<Long>, PCollection<Long>>() { @Override public PCollection<Long> expand(PCollectionList<Long> input) { return input.get(0); } }); hierarchy.setOutput(created); hierarchy.popNode(); assertThat(hierarchy.getProducer(created), equalTo(node)); assertThat( "A Transform that produces non-primitive output should be composite", emptyTransform.isCompositeNode(), is(true)); } @Test public void producingOwnAndOthersOutputsFails() { PCollection<Long> created = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED); hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1)); hierarchy.setOutput(created); hierarchy.popNode(); PCollectionList<Long> pcList = PCollectionList.of(created); final PCollectionList<Long> appended = pcList.and( PCollection.<Long>createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED) .setName("prim")); hierarchy.pushNode( "AddPc", pcList, new PTransform<PCollectionList<Long>, PCollectionList<Long>>() { @Override public PCollectionList<Long> expand(PCollectionList<Long> input) { return appended; } }); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("contains a primitive POutput produced by it"); thrown.expectMessage("AddPc"); thrown.expectMessage("Create"); thrown.expectMessage(appended.expand().toString()); hierarchy.setOutput(appended); } @Test public void producingOwnOutputWithCompositeFails() { final PCollection<Long> comp = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED); PTransform<PBegin, PCollection<Long>> root = new PTransform<PBegin, PCollection<Long>>() { @Override public PCollection<Long> expand(PBegin input) { return comp; } }; hierarchy.pushNode("Composite", PBegin.in(pipeline), root); Create.Values<Integer> create = Create.of(1); hierarchy.pushNode("Create", PBegin.in(pipeline), create); hierarchy.setOutput(pipeline.apply(create)); hierarchy.popNode(); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("contains a primitive POutput produced by it"); thrown.expectMessage("primitive transforms are permitted to produce"); thrown.expectMessage("Composite"); hierarchy.setOutput(comp); } @Test public void replaceSucceeds() { PTransform<?, ?> enclosingPT = new PTransform<PInput, POutput>() { @Override public POutput expand(PInput input) { return PDone.in(input.getPipeline()); } }; TransformHierarchy.Node enclosing = hierarchy.pushNode("Enclosing", PBegin.in(pipeline), enclosingPT); Create.Values<Long> originalTransform = Create.of(1L); TransformHierarchy.Node original = hierarchy.pushNode("Create", PBegin.in(pipeline), originalTransform); assertThat(hierarchy.getCurrent(), equalTo(original)); PCollection<Long> originalOutput = pipeline.apply(originalTransform); hierarchy.setOutput(originalOutput); hierarchy.popNode(); assertThat(original.finishedSpecifying, is(true)); hierarchy.setOutput(PDone.in(pipeline)); hierarchy.popNode(); assertThat(hierarchy.getCurrent(), not(equalTo(enclosing))); Read.Bounded<Long> replacementTransform = Read.from(CountingSource.upTo(1L)); PCollection<Long> replacementOutput = pipeline.apply(replacementTransform); Node replacement = hierarchy.replaceNode(original, PBegin.in(pipeline), replacementTransform); assertThat(hierarchy.getCurrent(), equalTo(replacement)); hierarchy.setOutput(replacementOutput); TaggedPValue taggedReplacement = TaggedPValue.ofExpandedValue(replacementOutput); Map<PValue, ReplacementOutput> replacementOutputs = Collections.<PValue, ReplacementOutput>singletonMap( replacementOutput, ReplacementOutput.of( TaggedPValue.ofExpandedValue(originalOutput), taggedReplacement)); hierarchy.replaceOutputs(replacementOutputs); assertThat(replacement.getInputs(), equalTo(original.getInputs())); assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode())); assertThat(replacement.getEnclosingNode(), equalTo(enclosing)); assertThat( replacement.getTransform(), Matchers.<PTransform<?, ?>>equalTo(replacementTransform)); // THe tags of the replacement transform are matched to the appropriate PValues of the original assertThat( replacement.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(taggedReplacement.getTag())); assertThat(replacement.getOutputs().values(), Matchers.<PValue>contains(originalOutput)); hierarchy.popNode(); } @Test public void replaceWithCompositeSucceeds() { final SingleOutput<Long, Long> originalParDo = ParDo.of( new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext ctxt) { ctxt.output(ctxt.element() + 1L); } }); GenerateSequence genUpstream = GenerateSequence.from(0); PCollection<Long> upstream = pipeline.apply(genUpstream); PCollection<Long> output = upstream.apply("Original", originalParDo); hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(upstream); hierarchy.popNode(); TransformHierarchy.Node original = hierarchy.pushNode("Original", upstream, originalParDo); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(output); hierarchy.popNode(); final TupleTag<Long> longs = new TupleTag<>(); final MultiOutput<Long, Long> replacementParDo = ParDo.of( new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext ctxt) { ctxt.output(ctxt.element() + 1L); } }) .withOutputTags(longs, TupleTagList.empty()); PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() { @Override public PCollection<Long> expand(PCollection<Long> input) { return input.apply("Contained", replacementParDo).get(longs); } }; PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo); Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite); Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(replacementOutput); hierarchy.popNode(); hierarchy.setOutput(replacementOutput.get(longs)); Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet()); hierarchy.replaceOutputs( Collections.<PValue, ReplacementOutput>singletonMap( replacementOutput.get(longs), ReplacementOutput.of( TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue())))); assertThat( replacementParNode.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(replacementLongs.getKey())); assertThat(replacementParNode.getOutputs().values(), Matchers.<PValue>contains(output)); assertThat( compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet())); assertThat(compositeNode.getOutputs().values(), Matchers.<PValue>contains(output)); hierarchy.popNode(); } @Test public void visitVisitsAllPushed() { TransformHierarchy.Node root = hierarchy.getCurrent(); PBegin begin = PBegin.in(pipeline); Create.Values<Long> create = Create.of(1L); Read.Bounded<Long> read = Read.from(CountingSource.upTo(1L)); PCollection<Long> created = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED); SingleOutput<Long, Long> pardo = ParDo.of( new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext ctxt) { ctxt.output(ctxt.element()); } }); PCollection<Long> mapped = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED); TransformHierarchy.Node compositeNode = hierarchy.pushNode("Create", begin, create); hierarchy.finishSpecifyingInput(); assertThat(hierarchy.getCurrent(), equalTo(compositeNode)); assertThat(compositeNode.getInputs().entrySet(), Matchers.empty()); assertThat(compositeNode.getTransform(), Matchers.<PTransform<?, ?>>equalTo(create)); // Not yet set assertThat(compositeNode.getOutputs().entrySet(), Matchers.emptyIterable()); assertThat(compositeNode.getEnclosingNode().isRootNode(), is(true)); TransformHierarchy.Node primitiveNode = hierarchy.pushNode("Create/Read", begin, read); assertThat(hierarchy.getCurrent(), equalTo(primitiveNode)); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(created); hierarchy.popNode(); assertThat(primitiveNode.getOutputs().values(), Matchers.<PValue>containsInAnyOrder(created)); assertThat(primitiveNode.getInputs().entrySet(), Matchers.emptyIterable()); assertThat(primitiveNode.getTransform(), Matchers.<PTransform<?, ?>>equalTo(read)); assertThat(primitiveNode.getEnclosingNode(), equalTo(compositeNode)); hierarchy.setOutput(created); // The composite is listed as outputting a PValue created by the contained primitive assertThat(compositeNode.getOutputs().values(), Matchers.<PValue>containsInAnyOrder(created)); // The producer of that PValue is still the primitive in which it is first output assertThat(hierarchy.getProducer(created), equalTo(primitiveNode)); hierarchy.popNode(); TransformHierarchy.Node otherPrimitive = hierarchy.pushNode("ParDo", created, pardo); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(mapped); hierarchy.popNode(); final Set<TransformHierarchy.Node> visitedCompositeNodes = new HashSet<>(); final Set<TransformHierarchy.Node> visitedPrimitiveNodes = new HashSet<>(); final Set<PValue> visitedValuesInVisitor = new HashSet<>(); Set<PValue> visitedValues = hierarchy.visit( new PipelineVisitor.Defaults() { @Override public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { visitedCompositeNodes.add(node); return CompositeBehavior.ENTER_TRANSFORM; } @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { visitedPrimitiveNodes.add(node); } @Override public void visitValue(PValue value, TransformHierarchy.Node producer) { visitedValuesInVisitor.add(value); } }); assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode)); assertThat(visitedPrimitiveNodes, containsInAnyOrder(primitiveNode, otherPrimitive)); assertThat(visitedValuesInVisitor, Matchers.<PValue>containsInAnyOrder(created, mapped)); assertThat(visitedValuesInVisitor, equalTo(visitedValues)); } /** * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any * of the original nodes or inaccessible values but does visit all of the replacement nodes, * new inaccessible replacement values, and the original output values. */ @Test public void visitAfterReplace() { Node root = hierarchy.getCurrent(); final SingleOutput<Long, Long> originalParDo = ParDo.of( new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext ctxt) { ctxt.output(ctxt.element() + 1L); } }); GenerateSequence genUpstream = GenerateSequence.from(0); PCollection<Long> upstream = pipeline.apply(genUpstream); PCollection<Long> output = upstream.apply("Original", originalParDo); Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(upstream); hierarchy.popNode(); Node original = hierarchy.pushNode("Original", upstream, originalParDo); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(output); hierarchy.popNode(); final TupleTag<Long> longs = new TupleTag<>(); final MultiOutput<Long, Long> replacementParDo = ParDo.of( new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext ctxt) { ctxt.output(ctxt.element() + 1L); } }) .withOutputTags(longs, TupleTagList.empty()); PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() { @Override public PCollection<Long> expand(PCollection<Long> input) { return input.apply("Contained", replacementParDo).get(longs); } }; PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo); Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite); Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo); hierarchy.finishSpecifyingInput(); hierarchy.setOutput(replacementOutput); hierarchy.popNode(); hierarchy.setOutput(replacementOutput.get(longs)); Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet()); hierarchy.replaceOutputs( Collections.<PValue, ReplacementOutput>singletonMap( replacementOutput.get(longs), ReplacementOutput.of( TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue())))); hierarchy.popNode(); final Set<Node> visitedCompositeNodes = new HashSet<>(); final Set<Node> visitedPrimitiveNodes = new HashSet<>(); Set<PValue> visitedValues = hierarchy.visit( new Defaults() { @Override public CompositeBehavior enterCompositeTransform(Node node) { visitedCompositeNodes.add(node); return CompositeBehavior.ENTER_TRANSFORM; } @Override public void visitPrimitiveTransform(Node node) { visitedPrimitiveNodes.add(node); } }); /* Final Graph: Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out) */ assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode)); assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode)); assertThat(visitedValues, Matchers.<PValue>containsInAnyOrder(upstream, output)); } }