/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.direct;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.MoreObjects;
import com.google.common.base.Optional;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nullable;
import org.apache.beam.runners.core.ReadyCheckingSideInputReader;
import org.apache.beam.runners.core.SideInputReader;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.WindowingStrategy;
/**
* An in-process container for {@link PCollectionView PCollectionViews}, which provides methods for
* constructing {@link SideInputReader SideInputReaders} which block until a side input is
* available and writing to a {@link PCollectionView}.
*/
class SideInputContainer {
private final Collection<PCollectionView<?>> containedViews;
private final LoadingCache<
PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>>
viewByWindows;
/**
* Create a new {@link SideInputContainer} with the provided views and the provided
* context.
*/
public static SideInputContainer create(
final EvaluationContext context, Collection<PCollectionView<?>> containedViews) {
LoadingCache<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>>
viewByWindows = CacheBuilder.newBuilder().build(new CallbackSchedulingLoader(context));
return new SideInputContainer(containedViews, viewByWindows);
}
private SideInputContainer(
Collection<PCollectionView<?>> containedViews,
LoadingCache<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>>
viewByWindows) {
this.containedViews = ImmutableSet.copyOf(containedViews);
this.viewByWindows = viewByWindows;
}
/**
* Return a view of this {@link SideInputContainer} that contains only the views in the
* provided argument. The returned {@link SideInputContainer} is unmodifiable without
* casting, but will change as this {@link SideInputContainer} is modified.
*/
public ReadyCheckingSideInputReader createReaderForViews(
Collection<PCollectionView<?>> newContainedViews) {
if (!containedViews.containsAll(newContainedViews)) {
Set<PCollectionView<?>> currentlyContained = ImmutableSet.copyOf(containedViews);
Set<PCollectionView<?>> newRequested = ImmutableSet.copyOf(newContainedViews);
throw new IllegalArgumentException("Can't create a SideInputReader with unknown views "
+ Sets.difference(newRequested, currentlyContained));
}
return new SideInputContainerSideInputReader(newContainedViews);
}
/**
* Write the provided values to the provided view.
*
* <p>The windowed values are first exploded, then for each window the pane is determined. For
* each window, if the pane is later than the current pane stored within this container, write
* all of the values to the container as the new values of the {@link PCollectionView}.
*
* <p>The provided iterable is expected to contain only a single window and pane.
*/
public void write(PCollectionView<?> view, Iterable<? extends WindowedValue<?>> values) {
Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow =
indexValuesByWindow(values);
for (Map.Entry<BoundedWindow, Collection<WindowedValue<?>>> windowValues :
valuesPerWindow.entrySet()) {
updatePCollectionViewWindowValues(view, windowValues.getKey(), windowValues.getValue());
}
}
/**
* Index the provided values by all {@link BoundedWindow windows} in which they appear.
*/
private Map<BoundedWindow, Collection<WindowedValue<?>>> indexValuesByWindow(
Iterable<? extends WindowedValue<?>> values) {
Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
for (WindowedValue<?> value : values) {
for (BoundedWindow window : value.getWindows()) {
Collection<WindowedValue<?>> windowValues = valuesPerWindow.get(window);
if (windowValues == null) {
windowValues = new ArrayList<>();
valuesPerWindow.put(window, windowValues);
}
windowValues.add(value);
}
}
return valuesPerWindow;
}
/**
* Set the value of the {@link PCollectionView} in the {@link BoundedWindow} to be based on the
* specified values, if the values are part of a later pane than currently exist within the
* {@link PCollectionViewWindow}.
*/
private void updatePCollectionViewWindowValues(
PCollectionView<?> view, BoundedWindow window, Collection<WindowedValue<?>> windowValues) {
PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, window);
AtomicReference<Iterable<? extends WindowedValue<?>>> contents =
viewByWindows.getUnchecked(windowedView);
if (contents.compareAndSet(null, windowValues)) {
// the value had never been set, so we set it and are done.
return;
}
PaneInfo newPane = windowValues.iterator().next().getPane();
Iterable<? extends WindowedValue<?>> existingValues;
long existingPane;
do {
existingValues = contents.get();
existingPane =
Iterables.isEmpty(existingValues)
? -1L
: existingValues.iterator().next().getPane().getIndex();
} while (newPane.getIndex() > existingPane
&& !contents.compareAndSet(existingValues, windowValues));
}
private static class CallbackSchedulingLoader extends
CacheLoader<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>> {
private final EvaluationContext context;
public CallbackSchedulingLoader(
EvaluationContext context) {
this.context = context;
}
@Override
public AtomicReference<Iterable<? extends WindowedValue<?>>>
load(PCollectionViewWindow<?> view) {
AtomicReference<Iterable<? extends WindowedValue<?>>> contents = new AtomicReference<>();
WindowingStrategy<?, ?> windowingStrategy = view.getView().getWindowingStrategyInternal();
context.scheduleAfterOutputWouldBeProduced(view.getView(),
view.getWindow(),
windowingStrategy,
new WriteEmptyViewContents(view.getView(), view.getWindow(), contents));
return contents;
}
}
private static class WriteEmptyViewContents implements Runnable {
private final PCollectionView<?> view;
private final BoundedWindow window;
private final AtomicReference<Iterable<? extends WindowedValue<?>>> contents;
private WriteEmptyViewContents(PCollectionView<?> view, BoundedWindow window,
AtomicReference<Iterable<? extends WindowedValue<?>>> contents) {
this.contents = contents;
this.view = view;
this.window = window;
}
@Override
public void run() {
// The requested window has closed without producing elements, so reflect that in
// the PCollectionView. If set has already been called, will do nothing.
contents.compareAndSet(null, Collections.<WindowedValue<?>>emptyList());
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("view", view)
.add("window", window)
.toString();
}
}
private final class SideInputContainerSideInputReader implements ReadyCheckingSideInputReader {
private final Collection<PCollectionView<?>> readerViews;
private final LoadingCache<
PCollectionViewWindow<?>, Optional<? extends Iterable<? extends WindowedValue<?>>>>
viewContents;
private SideInputContainerSideInputReader(Collection<PCollectionView<?>> readerViews) {
this.readerViews = ImmutableSet.copyOf(readerViews);
this.viewContents = CacheBuilder.newBuilder().build(new CurrentViewContentsLoader());
}
@Override
public boolean isReady(final PCollectionView<?> view, final BoundedWindow window) {
checkArgument(
readerViews.contains(view),
"Tried to check if view %s was ready in a SideInputReader that does not contain it. "
+ "Contained views; %s",
view,
readerViews);
return viewContents.getUnchecked(PCollectionViewWindow.of(view, window)).isPresent();
}
@Override
@Nullable
public <T> T get(final PCollectionView<T> view, final BoundedWindow window) {
checkArgument(readerViews.contains(view),
"call to get(PCollectionView) with unknown view: %s",
view);
checkArgument(
isReady(view, window),
"calling get() on PCollectionView %s that is not ready in window %s",
view,
window);
// Safe covariant cast
@SuppressWarnings("unchecked") Iterable<WindowedValue<?>> values =
(Iterable<WindowedValue<?>>) viewContents.getUnchecked(PCollectionViewWindow.of(view,
window)).get();
return view.getViewFn().apply(values);
}
@Override
public <T> boolean contains(PCollectionView<T> view) {
return readerViews.contains(view);
}
@Override
public boolean isEmpty() {
return readerViews.isEmpty();
}
}
/**
* A {@link CacheLoader} that loads the current contents of a {@link PCollectionViewWindow} into
* an optional.
*/
private class CurrentViewContentsLoader extends CacheLoader<
PCollectionViewWindow<?>, Optional<? extends Iterable<? extends WindowedValue<?>>>> {
@Override
public Optional<? extends Iterable<? extends WindowedValue<?>>>
load(PCollectionViewWindow<?> key) {
return Optional.fromNullable(viewByWindows.getUnchecked(key).get());
}
}
}