/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.runners.core; import com.google.common.collect.FluentIterable; import com.google.common.collect.ImmutableList; import com.google.protobuf.BytesValue; import com.google.protobuf.InvalidProtocolBufferException; import java.io.IOException; import java.util.Collection; import java.util.Map; import org.apache.beam.fn.harness.fn.ThrowingConsumer; import org.apache.beam.fn.v1.BeamFnApi; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.Source.Reader; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.util.SerializableUtils; import org.apache.beam.sdk.util.WindowedValue; /** * A runner which creates {@link Reader}s for each {@link BoundedSource} and executes * the {@link Reader}s read loop. */ public class BoundedSourceRunner<InputT extends BoundedSource<OutputT>, OutputT> { private final PipelineOptions pipelineOptions; private final BeamFnApi.FunctionSpec definition; private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers; public BoundedSourceRunner( PipelineOptions pipelineOptions, BeamFnApi.FunctionSpec definition, Map<String, Collection<ThrowingConsumer<WindowedValue<OutputT>>>> outputMap) { this.pipelineOptions = pipelineOptions; this.definition = definition; this.consumers = ImmutableList.copyOf(FluentIterable.concat(outputMap.values())); } /** * The runner harness is meant to send the source over the Beam Fn Data API which would be * consumed by the {@link #runReadLoop}. Drop this method once the runner harness sends the * source instead of unpacking it from the data block of the function specification. */ @Deprecated public void start() throws Exception { try { // The representation here is defined as the java serialized representation of the // bounded source object packed into a protobuf Any using a protobuf BytesValue wrapper. byte[] bytes = definition.getData().unpack(BytesValue.class).getValue().toByteArray(); @SuppressWarnings("unchecked") InputT boundedSource = (InputT) SerializableUtils.deserializeFromByteArray(bytes, definition.toString()); runReadLoop(WindowedValue.valueInGlobalWindow(boundedSource)); } catch (InvalidProtocolBufferException e) { throw new IOException( String.format("Failed to decode %s, expected %s", definition.getData().getTypeUrl(), BytesValue.getDescriptor().getFullName()), e); } } /** * Creates a {@link Reader} for each {@link BoundedSource} and executes the {@link Reader}s * read loop. See {@link Reader} for further details of the read loop. * * <p>Propagates any exceptions caused during reading or processing via a consumer to the * caller. */ public void runReadLoop(WindowedValue<InputT> value) throws Exception { try (Reader<OutputT> reader = value.getValue().createReader(pipelineOptions)) { if (!reader.start()) { // Reader has no data, immediately return return; } do { // TODO: Should this use the input window as the window for all the outputs? WindowedValue<OutputT> nextValue = WindowedValue.timestampedValueInGlobalWindow( reader.getCurrent(), reader.getCurrentTimestamp()); for (ThrowingConsumer<WindowedValue<OutputT>> consumer : consumers) { consumer.accept(nextValue); } } while (reader.advance()); } } @Override public String toString() { return definition.toString(); } }