ParDo.java example

Explorer
beam-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.transforms;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.io.Serializable;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineRunner;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.CoderRegistry;
import org.apache.beam.sdk.state.StateSpec;
import org.apache.beam.sdk.transforms.DoFn.WindowedContext;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.DisplayData.Builder;
import org.apache.beam.sdk.transforms.display.DisplayData.ItemSpec;
import org.apache.beam.sdk.transforms.display.HasDisplayData;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature.MethodWithExtraParameters;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature.OnTimerMethod;
import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.WindowFn;
import org.apache.beam.sdk.util.NameUtils;
import org.apache.beam.sdk.util.SerializableUtils;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.sdk.values.TupleTagList;
import org.apache.beam.sdk.values.TypeDescriptor;

/**
 * {@link ParDo} is the core element-wise transform in Apache Beam, invoking a user-specified
 * function on each of the elements of the input {@link PCollection} to produce zero or more output
 * elements, all of which are collected into the output {@link PCollection}.
 *
 * <p>Elements are processed independently, and possibly in parallel across
 * distributed cloud resources.
 *
 * <p>The {@link ParDo} processing style is similar to what happens inside
 * the "Mapper" or "Reducer" class of a MapReduce-style algorithm.
 *
 * <h2>{@link DoFn DoFns}</h2>
 *
 * <p>The function to use to process each element is specified by a
 * {@link DoFn DoFn<InputT, OutputT>}, primarily via its
 * {@link DoFn.ProcessElement ProcessElement} method. The {@link DoFn} may also
 * provide a {@link DoFn.StartBundle StartBundle} and {@link DoFn.FinishBundle finishBundle} method.
 *
 * <p>Conceptually, when a {@link ParDo} transform is executed, the
 * elements of the input {@link PCollection} are first divided up
 * into some number of "bundles". These are farmed off to distributed
 * worker machines (or run locally, if using the {@code DirectRunner}).
 * For each bundle of input elements processing proceeds as follows:
 *
 * <ol>
 *   <li>If required, a fresh instance of the argument {@link DoFn} is created
 *     on a worker, and the {@link DoFn.Setup} method is called on this instance. This may be
 *     through deserialization or other means. A {@link PipelineRunner} may reuse {@link DoFn}
 *     instances for multiple bundles. A {@link DoFn} that has terminated abnormally (by throwing an
 *     {@link Exception}) will never be reused.</li>
 *   <li>The {@link DoFn DoFn's} {@link DoFn.StartBundle} method, if provided, is called to
 *     initialize it.</li>
 *   <li>The {@link DoFn DoFn's} {@link DoFn.ProcessElement} method
 *     is called on each of the input elements in the bundle.</li>
 *   <li>The {@link DoFn DoFn's} {@link DoFn.FinishBundle} method, if provided, is called
 *     to complete its work. After {@link DoFn.FinishBundle} is called, the
 *     framework will not again invoke {@link DoFn.ProcessElement} or
 *     {@link DoFn.FinishBundle}
 *     until a new call to {@link DoFn.StartBundle} has occurred.</li>
 *   <li>If any of {@link DoFn.Setup}, {@link DoFn.StartBundle}, {@link DoFn.ProcessElement} or
 *     {@link DoFn.FinishBundle} methods throw an exception, the {@link DoFn.Teardown} method, if
 *     provided, will be called on the {@link DoFn} instance.</li>
 *   <li>If a runner will no longer use a {@link DoFn}, the {@link DoFn.Teardown} method, if
 *     provided, will be called on the discarded instance.</li>
 * </ol>
 *
 * <p>Each of the calls to any of the {@link DoFn DoFn's} processing
 * methods can produce zero or more output elements. All of the
 * of output elements from all of the {@link DoFn} instances
 * are included in an output {@link PCollection}.
 *
 * <p>For example:
 *
 * <pre>{@code
 * PCollection<String> lines = ...;
 * PCollection<String> words =
 *     lines.apply(ParDo.of(new DoFn<String, String>() {
 *        {@literal @}ProcessElement
 *         public void processElement(ProcessContext c) {
 *           String line = c.element();
 *           for (String word : line.split("[^a-zA-Z']+")) {
 *             c.output(word);
 *           }
 *         }}));
 * PCollection<Integer> wordLengths =
 *     words.apply(ParDo.of(new DoFn<String, Integer>() {
 *        {@literal @}ProcessElement
 *         public void processElement(ProcessContext c) {
 *           String word = c.element();
 *           Integer length = word.length();
 *           c.output(length);
 *         }}));
 * }</pre>
 *
 * <p>Each output element has the same timestamp and is in the same windows
 * as its corresponding input element, and the output {@code PCollection}
 * has the same {@link WindowFn} associated with it as the input.
 *
 * <h2>Naming {@link ParDo ParDo} transforms</h2>
 *
 * <p>The name of a transform is used to provide a name for any node in the
 * {@link Pipeline} graph resulting from application of the transform.
 * It is best practice to provide a name at the time of application,
 * via {@link PCollection#apply(String, PTransform)}. Otherwise,
 * a unique name - which may not be stable across pipeline revision -
 * will be generated, based on the transform name.
 *
 * <p>For example:
 *
 * <pre> {@code
 * PCollection<String> words =
 *     lines.apply("ExtractWords", ParDo.of(new DoFn<String, String>() { ... }));
 * PCollection<Integer> wordLengths =
 *     words.apply("ComputeWordLengths", ParDo.of(new DoFn<String, Integer>() { ... }));
 * } </pre>
 *
 * <h2>Side Inputs</h2>
 *
 * <p>While a {@link ParDo} processes elements from a single "main input"
 * {@link PCollection}, it can take additional "side input"
 * {@link PCollectionView PCollectionViews}. These side input
 * {@link PCollectionView PCollectionViews} express styles of accessing
 * {@link PCollection PCollections} computed by earlier pipeline operations,
 * passed in to the {@link ParDo} transform using
 * {@link SingleOutput#withSideInputs}, and their contents accessible to each of
 * the {@link DoFn} operations via {@link DoFn.ProcessContext#sideInput sideInput}.
 * For example:
 *
 * <pre>{@code
 * PCollection<String> words = ...;
 * PCollection<Integer> maxWordLengthCutOff = ...; // Singleton PCollection
 * final PCollectionView<Integer> maxWordLengthCutOffView =
 *     maxWordLengthCutOff.apply(View.<Integer>asSingleton());
 * PCollection<String> wordsBelowCutOff =
 *     words.apply(ParDo.of(new DoFn<String, String>() {
 *        {@literal @}ProcessElement
 *         public void processElement(ProcessContext c) {
 *           String word = c.element();
 *           int lengthCutOff = c.sideInput(maxWordLengthCutOffView);
 *           if (word.length() <= lengthCutOff) {
 *             c.output(word);
 *           }
 *         }}).withSideInputs(maxWordLengthCutOffView));
 * }</pre>
 *
 * <h2>Additional Outputs</h2>
 *
 * <p>Optionally, a {@link ParDo} transform can produce multiple
 * output {@link PCollection PCollections}, both a "main output"
 * {@code PCollection<OutputT>} plus any number of additional output
 * {@link PCollection PCollections}, each keyed by a distinct {@link TupleTag},
 * and bundled in a {@link PCollectionTuple}. The {@link TupleTag TupleTags}
 * to be used for the output {@link PCollectionTuple} are specified by
 * invoking {@link SingleOutput#withOutputTags}. Unconsumed outputs do not
 * necessarily need to be explicitly specified, even if the {@link DoFn}
 * generates them. Within the {@link DoFn}, an element is added to the
 * main output {@link PCollection} as normal, using
 * {@link WindowedContext#output(Object)}, while an element is added to any additional output
 * {@link PCollection} using {@link WindowedContext#output(TupleTag, Object)}. For example:
 *
 * <pre>{@code
 * PCollection<String> words = ...;
 * // Select words whose length is below a cut off,
 * // plus the lengths of words that are above the cut off.
 * // Also select words starting with "MARKER".
 * final int wordLengthCutOff = 10;
 * // Create tags to use for the main and additional outputs.
 * final TupleTag<String> wordsBelowCutOffTag =
 *     new TupleTag<String>(){};
 * final TupleTag<Integer> wordLengthsAboveCutOffTag =
 *     new TupleTag<Integer>(){};
 * final TupleTag<String> markedWordsTag =
 *     new TupleTag<String>(){};
 * PCollectionTuple results =
 *     words.apply(
 *         ParDo
 *         .of(new DoFn<String, String>() {
 *             // Create a tag for the unconsumed output.
 *             final TupleTag<String> specialWordsTag =
 *                 new TupleTag<String>(){};
 *            {@literal @}ProcessElement
 *             public void processElement(ProcessContext c) {
 *               String word = c.element();
 *               if (word.length() <= wordLengthCutOff) {
 *                 // Emit this short word to the main output.
 *                 c.output(word);
 *               } else {
 *                 // Emit this long word's length to a specified output.
 *                 c.output(wordLengthsAboveCutOffTag, word.length());
 *               }
 *               if (word.startsWith("MARKER")) {
 *                 // Emit this word to a different specified output.
 *                 c.output(markedWordsTag, word);
 *               }
 *               if (word.startsWith("SPECIAL")) {
 *                 // Emit this word to the unconsumed output.
 *                 c.output(specialWordsTag, word);
 *               }
 *             }})
 *             // Specify the main and consumed output tags of the
 *             // PCollectionTuple result:
 *         .withOutputTags(wordsBelowCutOffTag,
 *             TupleTagList.of(wordLengthsAboveCutOffTag)
 *                         .and(markedWordsTag)));
 * // Extract the PCollection results, by tag.
 * PCollection<String> wordsBelowCutOff =
 *     results.get(wordsBelowCutOffTag);
 * PCollection<Integer> wordLengthsAboveCutOff =
 *     results.get(wordLengthsAboveCutOffTag);
 * PCollection<String> markedWords =
 *     results.get(markedWordsTag);
 * }</pre>
 *
 * <h2>Output Coders</h2>
 *
 * <p>By default, the {@link Coder Coder<OutputT>} for the
 * elements of the main output {@link PCollection PCollection<OutputT>} is
 * inferred from the concrete type of the {@link DoFn DoFn<InputT, OutputT>}.
 *
 * <p>By default, the {@link Coder Coder<AdditionalOutputT>} for the elements of
 * an output {@link PCollection PCollection<AdditionalOutputT>} is inferred
 * from the concrete type of the corresponding {@link TupleTag TupleTag<AdditionalOutputT>}.
 * To be successful, the {@link TupleTag} should be created as an instance
 * of a trivial anonymous subclass, with {@code {}} suffixed to the
 * constructor call. Such uses block Java's generic type parameter
 * inference, so the {@code <X>} argument must be provided explicitly.
 * For example:
 * <pre> {@code
 * // A TupleTag to use for a side input can be written concisely:
 * final TupleTag<Integer> sideInputag = new TupleTag<>();
 * // A TupleTag to use for an output should be written with "{}",
 * // and explicit generic parameter type:
 * final TupleTag<String> additionalOutputTag = new TupleTag<String>(){};
 * } </pre>
 * This style of {@code TupleTag} instantiation is used in the example of
 * {@link ParDo ParDos} that produce multiple outputs, above.
 *
 * <h2>Serializability of {@link DoFn DoFns}</h2>
 *
 * <p>A {@link DoFn} passed to a {@link ParDo} transform must be
 * {@link Serializable}. This allows the {@link DoFn} instance
 * created in this "main program" to be sent (in serialized form) to
 * remote worker machines and reconstituted for bundles of elements
 * of the input {@link PCollection} being processed. A {@link DoFn}
 * can have instance variable state, and non-transient instance
 * variable state will be serialized in the main program and then
 * deserialized on remote worker machines for some number of bundles
 * of elements to process.
 *
 * <p>{@link DoFn DoFns} expressed as anonymous inner classes can be
 * convenient, but due to a quirk in Java's rules for serializability,
 * non-static inner or nested classes (including anonymous inner
 * classes) automatically capture their enclosing class's instance in
 * their serialized state. This can lead to including much more than
 * intended in the serialized state of a {@link DoFn}, or even things
 * that aren't {@link Serializable}.
 *
 * <p>There are two ways to avoid unintended serialized state in a
 * {@link DoFn}:
 *
 * <ul>
 *
 * <li>Define the {@link DoFn} as a named, static class.
 *
 * <li>Define the {@link DoFn} as an anonymous inner class inside of
 * a static method.
 *
 * </ul>
 *
 * <p>Both of these approaches ensure that there is no implicit enclosing
 * instance serialized along with the {@link DoFn} instance.
 *
 * <p>Prior to Java 8, any local variables of the enclosing
 * method referenced from within an anonymous inner class need to be
 * marked as {@code final}. If defining the {@link DoFn} as a named
 * static class, such variables would be passed as explicit
 * constructor arguments and stored in explicit instance variables.
 *
 * <p>There are three main ways to initialize the state of a
 * {@link DoFn} instance processing a bundle:
 *
 * <ul>
 *
 * <li>Define instance variable state (including implicit instance
 * variables holding final variables captured by an anonymous inner
 * class), initialized by the {@link DoFn}'s constructor (which is
 * implicit for an anonymous inner class). This state will be
 * automatically serialized and then deserialized in the {@link DoFn}
 * instances created for bundles. This method is good for state
 * known when the original {@link DoFn} is created in the main
 * program, if it's not overly large. This is not suitable for any
 * state which must only be used for a single bundle, as {@link DoFn DoFn's}
 * may be used to process multiple bundles.
 *
 * <li>Compute the state as a singleton {@link PCollection} and pass it
 * in as a side input to the {@link DoFn}. This is good if the state
 * needs to be computed by the pipeline, or if the state is very large
 * and so is best read from file(s) rather than sent as part of the
 * {@link DoFn DoFn's} serialized state.
 *
 * <li>Initialize the state in each {@link DoFn} instance, in a
 * {@link DoFn.StartBundle} method. This is good if the initialization
 * doesn't depend on any information known only by the main program or
 * computed by earlier pipeline operations, but is the same for all
 * instances of this {@link DoFn} for all program executions, say
 * setting up empty caches or initializing constant data.
 *
 * </ul>
 *
 * <h2>No Global Shared State</h2>
 *
 * <p>{@link ParDo} operations are intended to be able to run in
 * parallel across multiple worker machines. This precludes easy
 * sharing and updating mutable state across those machines. There is
 * no support in the Beam model for communicating
 * and synchronizing updates to shared state across worker machines,
 * so programs should not access any mutable static variable state in
 * their {@link DoFn}, without understanding that the Java processes
 * for the main program and workers will each have its own independent
 * copy of such state, and there won't be any automatic copying of
 * that state across Java processes. All information should be
 * communicated to {@link DoFn} instances via main and side inputs and
 * serialized state, and all output should be communicated from a
 * {@link DoFn} instance via output {@link PCollection PCollections}, in the absence of
 * external communication mechanisms written by user code.
 *
 * <h2>Fault Tolerance</h2>
 *
 * <p>In a distributed system, things can fail: machines can crash,
 * machines can be unable to communicate across the network, etc.
 * While individual failures are rare, the larger the job, the greater
 * the chance that something, somewhere, will fail. Beam runners may strive
 * to mask such failures by retrying failed {@link DoFn} bundle. This means
 * that a {@link DoFn} instance might process a bundle partially, then
 * crash for some reason, then be rerun (often in a new JVM) on that
 * same bundle and on the same elements as before.
 * Sometimes two or more {@link DoFn} instances will be running on the
 * same bundle simultaneously, with the system taking the results of
 * the first instance to complete successfully. Consequently, the
 * code in a {@link DoFn} needs to be written such that these
 * duplicate (sequential or concurrent) executions do not cause
 * problems. If the outputs of a {@link DoFn} are a pure function of
 * its inputs, then this requirement is satisfied. However, if a
 * {@link DoFn DoFn's} execution has external side-effects, such as performing
 * updates to external HTTP services, then the {@link DoFn DoFn's} code
 * needs to take care to ensure that those updates are idempotent and
 * that concurrent updates are acceptable. This property can be
 * difficult to achieve, so it is advisable to strive to keep
 * {@link DoFn DoFns} as pure functions as much as possible.
 *
 * <h2>Optimization</h2>
 *
 * <p>Beam runners may choose to apply optimizations to a
 * pipeline before it is executed. A key optimization, <i>fusion</i>,
 * relates to {@link ParDo} operations. If one {@link ParDo} operation produces a
 * {@link PCollection} that is then consumed as the main input of another
 * {@link ParDo} operation, the two {@link ParDo} operations will be <i>fused</i>
 * together into a single ParDo operation and run in a single pass;
 * this is "producer-consumer fusion". Similarly, if
 * two or more ParDo operations have the same {@link PCollection} main input,
 * they will be fused into a single {@link ParDo} that makes just one pass
 * over the input {@link PCollection}; this is "sibling fusion".
 *
 * <p>If after fusion there are no more unfused references to a
 * {@link PCollection} (e.g., one between a producer ParDo and a consumer
 * {@link ParDo}), the {@link PCollection} itself is "fused away" and won't ever be
 * written to disk, saving all the I/O and space expense of
 * constructing it.
 *
 * <p>When Beam runners apply fusion optimization, it is essentially "free"
 * to write {@link ParDo} operations in a
 * very modular, composable style, each {@link ParDo} operation doing one
 * clear task, and stringing together sequences of {@link ParDo} operations to
 * get the desired overall effect. Such programs can be easier to
 * understand, easier to unit-test, easier to extend and evolve, and
 * easier to reuse in new programs. The predefined library of
 * PTransforms that come with Beam makes heavy use of
 * this modular, composable style, trusting to the runner to
 * "flatten out" all the compositions into highly optimized stages.
 *
 * @see <a href=
 * "https://beam.apache.org/documentation/programming-guide/#transforms-pardo">
 * the web documentation for ParDo</a>
 */
public class ParDo {

  /**
   * Creates a {@link ParDo} {@link PTransform} that will invoke the
   * given {@link DoFn} function.
   *
   * <p>The resulting {@link PTransform PTransform} is ready to be applied, or further
   * properties can be set on it first.
   */
  public static <InputT, OutputT> SingleOutput<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
    validate(fn);
    return new SingleOutput<InputT, OutputT>(
        fn, Collections.<PCollectionView<?>>emptyList(), displayDataForFn(fn));
  }

  private static <T> DisplayData.ItemSpec<? extends Class<?>> displayDataForFn(T fn) {
    return DisplayData.item("fn", fn.getClass()).withLabel("Transform Function");
  }

  private static void finishSpecifyingStateSpecs(
      DoFn<?, ?> fn,
      CoderRegistry coderRegistry,
      Coder<?> inputCoder) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    Map<String, DoFnSignature.StateDeclaration> stateDeclarations = signature.stateDeclarations();
    for (DoFnSignature.StateDeclaration stateDeclaration : stateDeclarations.values()) {
      try {
        StateSpec<?> stateSpec = (StateSpec<?>) stateDeclaration.field().get(fn);
        stateSpec.offerCoders(codersForStateSpecTypes(stateDeclaration, coderRegistry, inputCoder));
        stateSpec.finishSpecifying();
      } catch (IllegalAccessException e) {
        throw new RuntimeException(e);
      }
    }
  }

  /**
   * Try to provide coders for as many of the type arguments of given
   * {@link DoFnSignature.StateDeclaration} as possible.
   */
  private static <InputT> Coder[] codersForStateSpecTypes(
      DoFnSignature.StateDeclaration stateDeclaration,
      CoderRegistry coderRegistry,
      Coder<InputT> inputCoder) {
    Type stateType = stateDeclaration.stateType().getType();

    if (!(stateType instanceof ParameterizedType)) {
      // No type arguments means no coders to infer.
      return new Coder[0];
    }

    Type[] typeArguments = ((ParameterizedType) stateType).getActualTypeArguments();
    Coder[] coders = new Coder[typeArguments.length];

    for (int i = 0; i < typeArguments.length; i++) {
      Type typeArgument = typeArguments[i];
      TypeDescriptor<?> typeDescriptor = TypeDescriptor.of(typeArgument);
      try {
        coders[i] = coderRegistry.getCoder(typeDescriptor);
      } catch (CannotProvideCoderException e) {
        try {
          coders[i] = coderRegistry.getCoder(
              typeDescriptor, inputCoder.getEncodedTypeDescriptor(), inputCoder);
        } catch (CannotProvideCoderException ignored) {
          // Since not all type arguments will have a registered coder we ignore this exception.
        }
      }
    }

    return coders;
  }

  /**
   * Perform common validations of the {@link DoFn} against the input {@link PCollection}, for
   * example ensuring that the window type expected by the {@link DoFn} matches the window type of
   * the {@link PCollection}.
   */
  private static <InputT, OutputT> void validateWindowType(
      PCollection<? extends InputT> input, DoFn<InputT, OutputT> fn) {
    DoFnSignature signature = DoFnSignatures.getSignature((Class) fn.getClass());

    TypeDescriptor<? extends BoundedWindow> actualWindowT =
        input.getWindowingStrategy().getWindowFn().getWindowTypeDescriptor();

    validateWindowTypeForMethod(actualWindowT, signature.processElement());
    for (OnTimerMethod method : signature.onTimerMethods().values()) {
      validateWindowTypeForMethod(actualWindowT, method);
    }
  }

  private static void validateWindowTypeForMethod(
      TypeDescriptor<? extends BoundedWindow> actualWindowT,
      MethodWithExtraParameters methodSignature) {
    if (methodSignature.windowT() != null) {
      checkArgument(
          methodSignature.windowT().isSupertypeOf(actualWindowT),
          "%s expects window type %s, which is not a supertype of actual window type %s",
          methodSignature.targetMethod(),
          methodSignature.windowT(),
          actualWindowT);
    }
  }

  /**
   * Perform common validations of the {@link DoFn}, for example ensuring that state is used
   * correctly and that its features can be supported.
   */
  private static <InputT, OutputT> void validate(DoFn<InputT, OutputT> fn) {
    DoFnSignature signature = DoFnSignatures.getSignature((Class) fn.getClass());

    // State is semantically incompatible with splitting
    if (!signature.stateDeclarations().isEmpty() && signature.processElement().isSplittable()) {
      throw new UnsupportedOperationException(
          String.format("%s is splittable and uses state, but these are not compatible",
              fn.getClass().getName()));
    }

    // Timers are semantically incompatible with splitting
    if (!signature.timerDeclarations().isEmpty() && signature.processElement().isSplittable()) {
      throw new UnsupportedOperationException(
          String.format("%s is splittable and uses timers, but these are not compatible",
              fn.getClass().getName()));
    }
  }

  /**
   * A {@link PTransform} that, when applied to a {@code PCollection<InputT>},
   * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
   * with all its outputs collected into an output
   * {@code PCollection<OutputT>}.
   *
   * <p>A multi-output form of this transform can be created with
   * {@link SingleOutput#withOutputTags}.
   *
   * @param <InputT> the type of the (main) input {@link PCollection} elements
   * @param <OutputT> the type of the (main) output {@link PCollection} elements
   */
  public static class SingleOutput<InputT, OutputT>
      extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
    private final List<PCollectionView<?>> sideInputs;
    private final DoFn<InputT, OutputT> fn;
    private final DisplayData.ItemSpec<? extends Class<?>> fnDisplayData;

    SingleOutput(
        DoFn<InputT, OutputT> fn,
        List<PCollectionView<?>> sideInputs,
        DisplayData.ItemSpec<? extends Class<?>> fnDisplayData) {
      this.fn = SerializableUtils.clone(fn);
      this.fnDisplayData = fnDisplayData;
      this.sideInputs = sideInputs;
    }

    /**
     * Returns a new {@link ParDo} {@link PTransform} that's like this
     * {@link PTransform} but with the specified additional side inputs. Does not
     * modify this {@link PTransform}.
     *
     * <p>See the discussion of Side Inputs above for more explanation.
     */
    public SingleOutput<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
      return withSideInputs(Arrays.asList(sideInputs));
    }

    /**
     * Returns a new {@link ParDo} {@link PTransform} that's like this
     * {@link PTransform} but with the specified additional side inputs. Does not
     * modify this {@link PTransform}.
     *
     * <p>See the discussion of Side Inputs above for more explanation.
     */
    public SingleOutput<InputT, OutputT> withSideInputs(
        Iterable<? extends PCollectionView<?>> sideInputs) {
      return new SingleOutput<>(
          fn,
          ImmutableList.<PCollectionView<?>>builder()
              .addAll(this.sideInputs)
              .addAll(sideInputs)
              .build(),
          fnDisplayData);
    }

    /**
     * Returns a new multi-output {@link ParDo} {@link PTransform} that's like this {@link
     * PTransform} but with the specified output tags. Does not modify this {@link
     * PTransform}.
     *
     * <p>See the discussion of Additional Outputs above for more explanation.
     */
    public MultiOutput<InputT, OutputT> withOutputTags(
        TupleTag<OutputT> mainOutputTag, TupleTagList additionalOutputTags) {
      return new MultiOutput<>(fn, sideInputs, mainOutputTag, additionalOutputTags, fnDisplayData);
    }

    @Override
    public PCollection<OutputT> expand(PCollection<? extends InputT> input) {
      finishSpecifyingStateSpecs(fn, input.getPipeline().getCoderRegistry(), input.getCoder());
      TupleTag<OutputT> mainOutput = new TupleTag<>();
      return input.apply(withOutputTags(mainOutput, TupleTagList.empty())).get(mainOutput);
    }

    @Override
    @SuppressWarnings("unchecked")
    protected Coder<OutputT> getDefaultOutputCoder(PCollection<? extends InputT> input)
        throws CannotProvideCoderException {
      return input.getPipeline().getCoderRegistry().getCoder(
          getFn().getOutputTypeDescriptor(),
          getFn().getInputTypeDescriptor(),
          ((PCollection<InputT>) input).getCoder());
    }

    @Override
    protected String getKindString() {
      return String.format("ParDo(%s)", NameUtils.approximateSimpleName(getFn()));
    }

    /**
     * {@inheritDoc}
     *
     * <p>{@link ParDo} registers its internal {@link DoFn} as a subcomponent for display data.
     * {@link DoFn} implementations can register display data by overriding
     * {@link DoFn#populateDisplayData}.
     */
    @Override
    public void populateDisplayData(Builder builder) {
      super.populateDisplayData(builder);
      ParDo.populateDisplayData(builder, (HasDisplayData) fn, fnDisplayData);
    }

    public DoFn<InputT, OutputT> getFn() {
      return fn;
    }

    public List<PCollectionView<?>> getSideInputs() {
      return sideInputs;
    }

    /**
     * Returns the side inputs of this {@link ParDo}, tagged with the tag of the
     * {@link PCollectionView}. The values of the returned map will be equal to the result of
     * {@link #getSideInputs()}.
     */
    @Override
    public Map<TupleTag<?>, PValue> getAdditionalInputs() {
      ImmutableMap.Builder<TupleTag<?>, PValue> additionalInputs = ImmutableMap.builder();
      for (PCollectionView<?> sideInput : sideInputs) {
        additionalInputs.put(sideInput.getTagInternal(), sideInput.getPCollection());
      }
      return additionalInputs.build();
    }
  }

  /**
   * A {@link PTransform} that, when applied to a {@code PCollection<InputT>}, invokes a
   * user-specified {@code DoFn<InputT, OutputT>} on all its elements, which can emit elements to
   * any of the {@link PTransform}'s output {@code PCollection}s, which are bundled into a result
   * {@code PCollectionTuple}.
   *
   * @param <InputT> the type of the (main) input {@code PCollection} elements
   * @param <OutputT> the type of the main output {@code PCollection} elements
   */
  public static class MultiOutput<InputT, OutputT>
      extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
    private final List<PCollectionView<?>> sideInputs;
    private final TupleTag<OutputT> mainOutputTag;
    private final TupleTagList additionalOutputTags;
    private final DisplayData.ItemSpec<? extends Class<?>> fnDisplayData;
    private final DoFn<InputT, OutputT> fn;

    MultiOutput(
        DoFn<InputT, OutputT> fn,
        List<PCollectionView<?>> sideInputs,
        TupleTag<OutputT> mainOutputTag,
        TupleTagList additionalOutputTags,
        ItemSpec<? extends Class<?>> fnDisplayData) {
      this.sideInputs = sideInputs;
      this.mainOutputTag = mainOutputTag;
      this.additionalOutputTags = additionalOutputTags;
      this.fn = SerializableUtils.clone(fn);
      this.fnDisplayData = fnDisplayData;
    }

    /**
     * Returns a new multi-output {@link ParDo} {@link PTransform}
     * that's like this {@link PTransform} but with the specified additional side
     * inputs. Does not modify this {@link PTransform}.
     *
     * <p>See the discussion of Side Inputs above for more explanation.
     */
    public MultiOutput<InputT, OutputT> withSideInputs(
        PCollectionView<?>... sideInputs) {
      return withSideInputs(Arrays.asList(sideInputs));
    }

    /**
     * Returns a new multi-output {@link ParDo} {@link PTransform} that's like this {@link
     * PTransform} but with the specified additional side inputs. Does not modify this {@link
     * PTransform}.
     *
     * <p>See the discussion of Side Inputs above for more explanation.
     */
    public MultiOutput<InputT, OutputT> withSideInputs(
        Iterable<? extends PCollectionView<?>> sideInputs) {
      return new MultiOutput<>(
          fn,
          ImmutableList.<PCollectionView<?>>builder()
              .addAll(this.sideInputs)
              .addAll(sideInputs)
              .build(),
          mainOutputTag,
          additionalOutputTags,
          fnDisplayData);
    }


    @Override
    public PCollectionTuple expand(PCollection<? extends InputT> input) {
      // SplittableDoFn should be forbidden on the runner-side.
      validateWindowType(input, fn);

      // Use coder registry to determine coders for all StateSpec defined in the fn signature.
      finishSpecifyingStateSpecs(fn, input.getPipeline().getCoderRegistry(), input.getCoder());

      PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
          input.getPipeline(),
          TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),
          input.getWindowingStrategy(),
          input.isBounded());

      // The fn will likely be an instance of an anonymous subclass
      // such as DoFn<Integer, String> { }, thus will have a high-fidelity
      // TypeDescriptor for the output type.
      outputs.get(mainOutputTag).setTypeDescriptor(getFn().getOutputTypeDescriptor());

      return outputs;
    }

    @Override
    protected Coder<OutputT> getDefaultOutputCoder() {
      throw new RuntimeException(
          "internal error: shouldn't be calling this on a multi-output ParDo");
    }

    @Override
    public <T> Coder<T> getDefaultOutputCoder(
        PCollection<? extends InputT> input, PCollection<T> output)
        throws CannotProvideCoderException {
      @SuppressWarnings("unchecked")
      Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
      return input.getPipeline().getCoderRegistry().getCoder(
          output.getTypeDescriptor(),
          getFn().getInputTypeDescriptor(),
          inputCoder);
      }

    @Override
    protected String getKindString() {
      return String.format("ParMultiDo(%s)", NameUtils.approximateSimpleName(getFn()));
    }

    @Override
    public void populateDisplayData(Builder builder) {
      super.populateDisplayData(builder);
      ParDo.populateDisplayData(builder, fn, fnDisplayData);
    }

    public DoFn<InputT, OutputT> getFn() {
      return fn;
    }

    public TupleTag<OutputT> getMainOutputTag() {
      return mainOutputTag;
    }

    public TupleTagList getAdditionalOutputTags() {
      return additionalOutputTags;
    }

    public List<PCollectionView<?>> getSideInputs() {
      return sideInputs;
    }

    /**
     * Returns the side inputs of this {@link ParDo}, tagged with the tag of the
     * {@link PCollectionView}. The values of the returned map will be equal to the result of
     * {@link #getSideInputs()}.
     */
    @Override
    public Map<TupleTag<?>, PValue> getAdditionalInputs() {
      ImmutableMap.Builder<TupleTag<?>, PValue> additionalInputs = ImmutableMap.builder();
      for (PCollectionView<?> sideInput : sideInputs) {
        additionalInputs.put(sideInput.getTagInternal(), sideInput.getPCollection());
      }
      return additionalInputs.build();
    }
  }

  private static void populateDisplayData(
      DisplayData.Builder builder,
      HasDisplayData fn,
      DisplayData.ItemSpec<? extends Class<?>> fnDisplayData) {
    builder.include("fn", fn).add(fnDisplayData);
  }

  private static boolean isSplittable(DoFn<?, ?> fn) {
    return DoFnSignatures.signatureForDoFn(fn).processElement().isSplittable();
  }
}