/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.core.construction;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.junit.Assert.assertThat;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableMap;
import java.io.Serializable;
import java.util.Collections;
import org.apache.beam.sdk.coders.VarIntCoder;
import org.apache.beam.sdk.coders.VoidCoder;
import org.apache.beam.sdk.io.DefaultFilenamePolicy;
import org.apache.beam.sdk.io.FileBasedSink;
import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
import org.apache.beam.sdk.io.LocalResources;
import org.apache.beam.sdk.io.WriteFiles;
import org.apache.beam.sdk.io.fs.ResourceId;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.runners.AppliedPTransform;
import org.apache.beam.sdk.runners.PTransformMatcher;
import org.apache.beam.sdk.state.StateSpec;
import org.apache.beam.sdk.state.StateSpecs;
import org.apache.beam.sdk.state.TimeDomain;
import org.apache.beam.sdk.state.Timer;
import org.apache.beam.sdk.state.TimerSpec;
import org.apache.beam.sdk.state.TimerSpecs;
import org.apache.beam.sdk.state.ValueState;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.Flatten;
import org.apache.beam.sdk.transforms.Materialization;
import org.apache.beam.sdk.transforms.Materializations;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.Sum;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.transforms.View.CreatePCollectionView;
import org.apache.beam.sdk.transforms.ViewFn;
import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollection.IsBounded;
import org.apache.beam.sdk.values.PCollectionList;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PCollectionViews;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.sdk.values.TupleTagList;
import org.apache.beam.sdk.values.WindowingStrategy;
import org.hamcrest.Matchers;
import org.joda.time.Duration;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/**
* Tests for {@link PTransformMatcher}.
*/
@RunWith(JUnit4.class)
public class PTransformMatchersTest implements Serializable {
@Rule
public transient TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false);
/**
* Gets the {@link AppliedPTransform} that has a created {@code PCollection<KV<String, Integer>>}
* as input.
*/
private AppliedPTransform<?, ?, ?> getAppliedTransform(PTransform pardo) {
PCollection<KV<String, Integer>> input =
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED);
PCollection<Integer> output =
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED);
return AppliedPTransform.of("pardo", input.expand(), output.expand(), pardo, p);
}
@Test
public void classEqualToMatchesSameClass() {
PTransformMatcher matcher = PTransformMatchers.classEqualTo(ParDo.SingleOutput.class);
AppliedPTransform<?, ?, ?> application =
getAppliedTransform(
ParDo.of(
new DoFn<KV<String, Integer>, Integer>() {
@ProcessElement
public void doStuff(ProcessContext ctxt) {}
}));
assertThat(matcher.matches(application), is(true));
}
@Test
public void classEqualToDoesNotMatchSubclass() {
class MyPTransform extends PTransform<PCollection<KV<String, Integer>>, PCollection<Integer>> {
@Override
public PCollection<Integer> expand(PCollection<KV<String, Integer>> input) {
return PCollection.createPrimitiveOutputInternal(
input.getPipeline(), input.getWindowingStrategy(), input.isBounded());
}
}
PTransformMatcher matcher = PTransformMatchers.classEqualTo(MyPTransform.class);
MyPTransform subclass = new MyPTransform() {};
assertThat(subclass.getClass(), not(Matchers.<Class<?>>equalTo(MyPTransform.class)));
assertThat(subclass, instanceOf(MyPTransform.class));
AppliedPTransform<?, ?, ?> application =
getAppliedTransform(subclass);
assertThat(matcher.matches(application), is(false));
}
@Test
public void classEqualToDoesNotMatchUnrelatedClass() {
PTransformMatcher matcher = PTransformMatchers.classEqualTo(ParDo.SingleOutput.class);
AppliedPTransform<?, ?, ?> application =
getAppliedTransform(Window.<KV<String, Integer>>into(new GlobalWindows()));
assertThat(matcher.matches(application), is(false));
}
private DoFn<KV<String, Integer>, Integer> doFn =
new DoFn<KV<String, Integer>, Integer>() {
@ProcessElement
public void simpleProcess(ProcessContext ctxt) {
ctxt.output(ctxt.element().getValue() + 1);
}
};
private abstract static class SomeTracker implements RestrictionTracker<Void> {}
private DoFn<KV<String, Integer>, Integer> splittableDoFn =
new DoFn<KV<String, Integer>, Integer>() {
@ProcessElement
public void processElement(ProcessContext context, SomeTracker tracker) {}
@GetInitialRestriction
public Void getInitialRestriction(KV<String, Integer> element) {
return null;
}
@NewTracker
public SomeTracker newTracker(Void restriction) {
return null;
}
};
private DoFn<KV<String, Integer>, Integer> doFnWithState =
new DoFn<KV<String, Integer>, Integer>() {
private final String stateId = "mystate";
@StateId(stateId)
private final StateSpec<ValueState<Integer>> intState =
StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
c.output(currentValue);
state.write(currentValue + 1);
}
};
private DoFn<KV<String, Integer>, Integer> doFnWithTimers =
new DoFn<KV<String, Integer>, Integer>() {
private final String timerId = "myTimer";
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer) {
timer.offset(Duration.standardSeconds(1)).setRelative();
context.output(3);
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context) {
context.output(42);
}
};
/**
* Demonstrates that a {@link ParDo.SingleOutput} does not match any ParDo matcher.
*/
@Test
public void parDoSingle() {
AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform(ParDo.of(doFn));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoSingleSplittable() {
AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform(ParDo.of(splittableDoFn));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoSingleWithState() {
AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform(ParDo.of(doFnWithState));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoSingleWithTimers() {
AppliedPTransform<?, ?, ?> parDoApplication =
getAppliedTransform(ParDo.of(doFnWithTimers));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoMulti() {
AppliedPTransform<?, ?, ?> parDoApplication =
getAppliedTransform(
ParDo.of(doFn).withOutputTags(new TupleTag<Integer>(), TupleTagList.empty()));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoMultiSplittable() {
AppliedPTransform<?, ?, ?> parDoApplication =
getAppliedTransform(
ParDo.of(splittableDoFn).withOutputTags(new TupleTag<Integer>(), TupleTagList.empty()));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoMultiWithState() {
AppliedPTransform<?, ?, ?> parDoApplication =
getAppliedTransform(
ParDo.of(doFnWithState).withOutputTags(new TupleTag<Integer>(), TupleTagList.empty()));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoMultiWithTimers() {
AppliedPTransform<?, ?, ?> parDoApplication =
getAppliedTransform(
ParDo.of(doFnWithTimers).withOutputTags(new TupleTag<Integer>(), TupleTagList.empty()));
assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(true));
assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
@Test
public void parDoWithFnTypeWithMatchingType() {
DoFn<Object, Object> fn = new DoFn<Object, Object>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
};
AppliedPTransform<?, ?, ?> parDoSingle = getAppliedTransform(ParDo.of(fn));
AppliedPTransform<?, ?, ?> parDoMulti =
getAppliedTransform(
ParDo.of(fn).withOutputTags(new TupleTag<Object>(), TupleTagList.empty()));
PTransformMatcher matcher = PTransformMatchers.parDoWithFnType(fn.getClass());
assertThat(matcher.matches(parDoSingle), is(true));
assertThat(matcher.matches(parDoMulti), is(true));
}
@Test
public void parDoWithFnTypeWithNoMatch() {
DoFn<Object, Object> fn = new DoFn<Object, Object>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
};
AppliedPTransform<?, ?, ?> parDoSingle = getAppliedTransform(ParDo.of(fn));
AppliedPTransform<?, ?, ?> parDoMulti =
getAppliedTransform(
ParDo.of(fn).withOutputTags(new TupleTag<Object>(), TupleTagList.empty()));
PTransformMatcher matcher = PTransformMatchers.parDoWithFnType(doFnWithState.getClass());
assertThat(matcher.matches(parDoSingle), is(false));
assertThat(matcher.matches(parDoMulti), is(false));
}
@Test
public void parDoWithFnTypeNotParDo() {
AppliedPTransform<?, ?, ?> notParDo = getAppliedTransform(Create.empty(VoidCoder.of()));
PTransformMatcher matcher = PTransformMatchers.parDoWithFnType(doFnWithState.getClass());
assertThat(matcher.matches(notParDo), is(false));
}
@Test
public void createViewWithViewFn() {
PCollection<Integer> input = p.apply(Create.of(1));
PCollectionView<Iterable<Integer>> view =
PCollectionViews.iterableView(input, input.getWindowingStrategy(), input.getCoder());
ViewFn<Iterable<WindowedValue<?>>, Iterable<Integer>> viewFn = view.getViewFn();
CreatePCollectionView<?, ?> createView = CreatePCollectionView.of(view);
PTransformMatcher matcher = PTransformMatchers.createViewWithViewFn(viewFn.getClass());
assertThat(matcher.matches(getAppliedTransform(createView)), is(true));
}
@Test
public void createViewWithViewFnDifferentViewFn() {
PCollection<Integer> input = p.apply(Create.of(1));
PCollectionView<Iterable<Integer>> view =
PCollectionViews.iterableView(input, input.getWindowingStrategy(), input.getCoder());
ViewFn<Iterable<WindowedValue<?>>, Iterable<Integer>> viewFn =
new ViewFn<Iterable<WindowedValue<?>>, Iterable<Integer>>() {
@Override
public Materialization<Iterable<WindowedValue<?>>> getMaterialization() {
@SuppressWarnings({"rawtypes", "unchecked"})
Materialization<Iterable<WindowedValue<?>>> materialization =
(Materialization) Materializations.iterable();
return materialization;
}
@Override
public Iterable<Integer> apply(Iterable<WindowedValue<?>> contents) {
return Collections.emptyList();
}
};
CreatePCollectionView<?, ?> createView = CreatePCollectionView.of(view);
PTransformMatcher matcher = PTransformMatchers.createViewWithViewFn(viewFn.getClass());
assertThat(matcher.matches(getAppliedTransform(createView)), is(false));
}
@Test
public void createViewWithViewFnNotCreatePCollectionView() {
PCollection<Integer> input = p.apply(Create.of(1));
PCollectionView<Iterable<Integer>> view =
PCollectionViews.iterableView(input, input.getWindowingStrategy(), input.getCoder());
PTransformMatcher matcher =
PTransformMatchers.createViewWithViewFn(view.getViewFn().getClass());
assertThat(matcher.matches(getAppliedTransform(View.asIterable())), is(false));
}
@Test
public void emptyFlattenWithEmptyFlatten() {
AppliedPTransform application =
AppliedPTransform
.<PCollectionList<Object>, PCollection<Object>, Flatten.PCollections<Object>>of(
"EmptyFlatten",
Collections.<TupleTag<?>, PValue>emptyMap(),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.pCollections(),
p);
assertThat(PTransformMatchers.emptyFlatten().matches(application), is(true));
}
@Test
public void emptyFlattenWithNonEmptyFlatten() {
AppliedPTransform application =
AppliedPTransform
.<PCollectionList<Object>, PCollection<Object>, Flatten.PCollections<Object>>of(
"Flatten",
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.pCollections(),
p);
assertThat(PTransformMatchers.emptyFlatten().matches(application), is(false));
}
@Test
public void emptyFlattenWithNonFlatten() {
AppliedPTransform application =
AppliedPTransform
.<PCollection<Iterable<Object>>, PCollection<Object>, Flatten.Iterables<Object>>of(
"EmptyFlatten",
Collections.<TupleTag<?>, PValue>emptyMap(),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.iterables() /* This isn't actually possible to construct,
* but for the sake of example */,
p);
assertThat(PTransformMatchers.emptyFlatten().matches(application), is(false));
}
@Test
public void flattenWithDuplicateInputsWithoutDuplicates() {
AppliedPTransform application =
AppliedPTransform
.<PCollectionList<Object>, PCollection<Object>, Flatten.PCollections<Object>>of(
"Flatten",
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.pCollections(),
p);
assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(false));
}
@Test
public void flattenWithDuplicateInputsWithDuplicates() {
PCollection<Object> duplicate =
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED);
AppliedPTransform application =
AppliedPTransform
.<PCollectionList<Object>, PCollection<Object>, Flatten.PCollections<Object>>of(
"Flatten",
ImmutableMap.<TupleTag<?>, PValue>builder()
.put(new TupleTag<Object>(), duplicate)
.put(new TupleTag<Object>(), duplicate)
.build(),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.pCollections(),
p);
assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(true));
}
@Test
public void flattenWithDuplicateInputsNonFlatten() {
AppliedPTransform application =
AppliedPTransform
.<PCollection<Iterable<Object>>, PCollection<Object>, Flatten.Iterables<Object>>of(
"EmptyFlatten",
Collections.<TupleTag<?>, PValue>emptyMap(),
Collections.<TupleTag<?>, PValue>singletonMap(
new TupleTag<Object>(),
PCollection.createPrimitiveOutputInternal(
p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED)),
Flatten.iterables() /* This isn't actually possible to construct,
* but for the sake of example */,
p);
assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(false));
}
@Test
public void writeWithRunnerDeterminedSharding() {
ResourceId outputDirectory = LocalResources.fromString("/foo/bar", true /* isDirectory */);
FilenamePolicy policy = DefaultFilenamePolicy.constructUsingStandardParameters(
StaticValueProvider.of(outputDirectory), DefaultFilenamePolicy.DEFAULT_SHARD_TEMPLATE, "");
WriteFiles<Integer> write =
WriteFiles.to(
new FileBasedSink<Integer>(StaticValueProvider.of(outputDirectory), policy) {
@Override
public WriteOperation<Integer> createWriteOperation() {
return null;
}
});
assertThat(
PTransformMatchers.writeWithRunnerDeterminedSharding().matches(appliedWrite(write)),
is(true));
WriteFiles<Integer> withStaticSharding = write.withNumShards(3);
assertThat(
PTransformMatchers.writeWithRunnerDeterminedSharding()
.matches(appliedWrite(withStaticSharding)),
is(false));
WriteFiles<Integer> withCustomSharding =
write.withSharding(Sum.integersGlobally().asSingletonView());
assertThat(
PTransformMatchers.writeWithRunnerDeterminedSharding()
.matches(appliedWrite(withCustomSharding)),
is(false));
}
private AppliedPTransform<?, ?, ?> appliedWrite(WriteFiles<Integer> write) {
return AppliedPTransform.<PCollection<Integer>, PDone, WriteFiles<Integer>>of(
"WriteFiles",
Collections.<TupleTag<?>, PValue>emptyMap(),
Collections.<TupleTag<?>, PValue>emptyMap(),
write,
p);
}
}