/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.values;
import com.google.common.collect.ImmutableMap;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.annotations.Internal;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection.IsBounded;
/**
* A {@link PCollectionTuple} is an immutable tuple of
* heterogeneously-typed {@link PCollection PCollections}, "keyed" by
* {@link TupleTag TupleTags}. A {@link PCollectionTuple} can be used as the input or
* output of a
* {@link PTransform} taking
* or producing multiple PCollection inputs or outputs that can be of
* different types, for instance a
* {@link ParDo} with multiple outputs.
*
* <p>A {@link PCollectionTuple} can be created and accessed like follows:
* <pre> {@code
* PCollection<String> pc1 = ...;
* PCollection<Integer> pc2 = ...;
* PCollection<Iterable<String>> pc3 = ...;
*
* // Create TupleTags for each of the PCollections to put in the
* // PCollectionTuple (the type of the TupleTag enables tracking the
* // static type of each of the PCollections in the PCollectionTuple):
* TupleTag<String> tag1 = new TupleTag<>();
* TupleTag<Integer> tag2 = new TupleTag<>();
* TupleTag<Iterable<String>> tag3 = new TupleTag<>();
*
* // Create a PCollectionTuple with three PCollections:
* PCollectionTuple pcs =
* PCollectionTuple.of(tag1, pc1)
* .and(tag2, pc2)
* .and(tag3, pc3);
*
* // Create an empty PCollectionTuple:
* Pipeline p = ...;
* PCollectionTuple pcs2 = PCollectionTuple.empty(p);
*
* // Get PCollections out of a PCollectionTuple, using the same tags
* // that were used to put them in:
* PCollection<Integer> pcX = pcs.get(tag2);
* PCollection<String> pcY = pcs.get(tag1);
* PCollection<Iterable<String>> pcZ = pcs.get(tag3);
*
* // Get a map of all PCollections in a PCollectionTuple:
* Map<TupleTag<?>, PCollection<?>> allPcs = pcs.getAll();
* } </pre>
*/
public class PCollectionTuple implements PInput, POutput {
/**
* Returns an empty {@link PCollectionTuple} that is part of the given {@link Pipeline}.
*
* <p>A {@link PCollectionTuple} containing additional elements can be created by calling
* {@link #and} on the result.
*/
public static PCollectionTuple empty(Pipeline pipeline) {
return new PCollectionTuple(pipeline);
}
/**
* Returns a singleton {@link PCollectionTuple} containing the given
* {@link PCollection} keyed by the given {@link TupleTag}.
*
* <p>A {@link PCollectionTuple} containing additional elements can be created by calling
* {@link #and} on the result.
*/
public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
return empty(pc.getPipeline()).and(tag, pc);
}
/**
* Returns a new {@link PCollectionTuple} that has each {@link PCollection} and
* {@link TupleTag} of this {@link PCollectionTuple} plus the given {@link PCollection}
* associated with the given {@link TupleTag}.
*
* <p>The given {@link TupleTag} should not already be mapped to a
* {@link PCollection} in this {@link PCollectionTuple}.
*
* <p>Each {@link PCollection} in the resulting {@link PCollectionTuple} must be
* part of the same {@link Pipeline}.
*/
public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
if (pc.getPipeline() != pipeline) {
throw new IllegalArgumentException(
"PCollections come from different Pipelines");
}
return new PCollectionTuple(pipeline,
new ImmutableMap.Builder<TupleTag<?>, PCollection<?>>()
.putAll(pcollectionMap)
.put(tag, pc)
.build());
}
/**
* Returns whether this {@link PCollectionTuple} contains a {@link PCollection} with
* the given tag.
*/
public <T> boolean has(TupleTag<T> tag) {
return pcollectionMap.containsKey(tag);
}
/**
* Returns the {@link PCollection} associated with the given {@link TupleTag}
* in this {@link PCollectionTuple}. Throws {@link IllegalArgumentException} if there is no
* such {@link PCollection}, i.e., {@code !has(tag)}.
*/
public <T> PCollection<T> get(TupleTag<T> tag) {
@SuppressWarnings("unchecked")
PCollection<T> pcollection = (PCollection<T>) pcollectionMap.get(tag);
if (pcollection == null) {
throw new IllegalArgumentException(
"TupleTag not found in this PCollectionTuple tuple");
}
return pcollection;
}
/**
* Returns an immutable Map from {@link TupleTag} to corresponding
* {@link PCollection}, for all the members of this {@link PCollectionTuple}.
*/
public Map<TupleTag<?>, PCollection<?>> getAll() {
return pcollectionMap;
}
/**
* Like {@link #apply(String, PTransform)} but defaulting to the name
* of the {@link PTransform}.
*
* @return the output of the applied {@link PTransform}
*/
public <OutputT extends POutput> OutputT apply(
PTransform<PCollectionTuple, OutputT> t) {
return Pipeline.applyTransform(this, t);
}
/**
* Applies the given {@link PTransform} to this input {@link PCollectionTuple},
* using {@code name} to identify this specific application of the transform.
* This name is used in various places, including the monitoring UI, logging,
* and to stably identify this application node in the job graph.
*
* @return the output of the applied {@link PTransform}
*/
public <OutputT extends POutput> OutputT apply(
String name, PTransform<PCollectionTuple, OutputT> t) {
return Pipeline.applyTransform(name, this, t);
}
/////////////////////////////////////////////////////////////////////////////
// Internal details below here.
final Pipeline pipeline;
final Map<TupleTag<?>, PCollection<?>> pcollectionMap;
PCollectionTuple(Pipeline pipeline) {
this(pipeline, new LinkedHashMap<TupleTag<?>, PCollection<?>>());
}
PCollectionTuple(Pipeline pipeline,
Map<TupleTag<?>, PCollection<?>> pcollectionMap) {
this.pipeline = pipeline;
this.pcollectionMap = Collections.unmodifiableMap(pcollectionMap);
}
/**
* <b><i>For internal use only; no backwards-compatibility guarantees.</i></b>
*
* <p>Returns a {@link PCollectionTuple} with each of the given tags mapping to a new
* output {@link PCollection}.
*
* <p>For use by primitive transformations only.
*/
@Internal
public static PCollectionTuple ofPrimitiveOutputsInternal(
Pipeline pipeline,
TupleTagList outputTags,
WindowingStrategy<?, ?> windowingStrategy,
IsBounded isBounded) {
Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
for (TupleTag<?> outputTag : outputTags.tupleTags) {
if (pcollectionMap.containsKey(outputTag)) {
throw new IllegalArgumentException(
"TupleTag already present in this tuple");
}
// In fact, `token` and `outputCollection` should have
// types TypeDescriptor<T> and PCollection<T> for some
// unknown T. It is safe to create `outputCollection`
// with type PCollection<Object> because it has the same
// erasure as the correct type. When a transform adds
// elements to `outputCollection` they will be of type T.
@SuppressWarnings("unchecked")
TypeDescriptor<Object> token = (TypeDescriptor<Object>) outputTag.getTypeDescriptor();
PCollection<Object> outputCollection = PCollection
.createPrimitiveOutputInternal(pipeline, windowingStrategy, isBounded)
.setTypeDescriptor(token);
pcollectionMap.put(outputTag, outputCollection);
}
return new PCollectionTuple(pipeline, pcollectionMap);
}
@Override
public Pipeline getPipeline() {
return pipeline;
}
@Override
public Map<TupleTag<?>, PValue> expand() {
return ImmutableMap.<TupleTag<?>, PValue>copyOf(pcollectionMap);
}
@Override
public void finishSpecifyingOutput(
String transformName, PInput input, PTransform<?, ?> transform) {
// All component PCollections will already have been finished. Update their names if
// appropriate.
int i = 0;
for (Map.Entry<TupleTag<?>, PCollection<?>> entry
: pcollectionMap.entrySet()) {
TupleTag<?> tag = entry.getKey();
PCollection<?> pc = entry.getValue();
if (pc.getName().equals(PValueBase.defaultName(transformName))) {
pc.setName(String.format("%s.%s", transformName, tag.getOutName(i)));
}
i++;
}
}
@Override
public boolean equals(Object other) {
if (!(other instanceof PCollectionTuple)) {
return false;
}
PCollectionTuple that = (PCollectionTuple) other;
return this.pipeline.equals(that.pipeline) && this.pcollectionMap.equals(that.pcollectionMap);
}
@Override
public int hashCode() {
return Objects.hash(this.pipeline, this.pcollectionMap);
}
}