/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.spark.translation;
import java.io.Serializable;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.beam.runners.spark.coders.CoderHelpers;
import org.apache.beam.runners.spark.util.SideInputBroadcast;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
/**
* SparkPCollectionView is used to pass serialized views to lambdas.
*/
public class SparkPCollectionView implements Serializable {
// Holds the view --> broadcast mapping. Transient so it will be null from resume
private transient volatile Map<PCollectionView<?>, SideInputBroadcast>
broadcastHelperMap = null;
// Holds the Actual data of the views in serialize form
private Map<PCollectionView<?>,
Tuple2<byte[], Coder<Iterable<WindowedValue<?>>>>> pviews =
new LinkedHashMap<>();
// Driver only - during evaluation stage
void putPView(
PCollectionView<?> view,
Iterable<WindowedValue<?>> value,
Coder<Iterable<WindowedValue<?>>> coder) {
pviews.put(view, new Tuple2<>(CoderHelpers.toByteArray(value, coder), coder));
// Currently unsynchronized unpersist, if needed can be changed to blocking
if (broadcastHelperMap != null) {
synchronized (SparkPCollectionView.class) {
SideInputBroadcast helper = broadcastHelperMap.get(view);
if (helper != null) {
helper.unpersist();
broadcastHelperMap.remove(view);
}
}
}
}
SideInputBroadcast getPCollectionView(
PCollectionView<?> view,
JavaSparkContext context) {
// initialize broadcastHelperMap if needed
if (broadcastHelperMap == null) {
synchronized (SparkPCollectionView.class) {
if (broadcastHelperMap == null) {
broadcastHelperMap = new LinkedHashMap<>();
}
}
}
//lazily broadcast views
SideInputBroadcast helper = broadcastHelperMap.get(view);
if (helper == null) {
synchronized (SparkPCollectionView.class) {
helper = broadcastHelperMap.get(view);
if (helper == null) {
helper = createBroadcastHelper(view, context);
}
}
}
return helper;
}
private SideInputBroadcast createBroadcastHelper(
PCollectionView<?> view,
JavaSparkContext context) {
Tuple2<byte[], Coder<Iterable<WindowedValue<?>>>> tuple2 = pviews.get(view);
SideInputBroadcast helper = SideInputBroadcast.create(tuple2._1, tuple2._2);
helper.broadcast(context);
broadcastHelperMap.put(view, helper);
return helper;
}
}