/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.api.batch; import co.cask.cdap.api.annotation.Beta; import co.cask.cdap.api.dataset.lib.KeyValue; import co.cask.cdap.etl.api.Emitter; import co.cask.cdap.etl.api.StageLifecycle; import co.cask.cdap.etl.api.Transformation; /** * Batch Source forms the first stage of a Batch ETL Pipeline. In addition to configuring the Batch run, it * also transforms the key value pairs provided by the Batch run into a single output type to be consumed by * subsequent transforms. By default, the value of the key value pair will be emitted. * * {@link BatchSource#initialize}, {@link BatchSource#transform} and {@link BatchSource#destroy} methods are called * inside the Batch Run while {@link BatchSource#prepareRun} and {@link BatchSource#onRunFinish} methods are called * on the client side, which launches the Batch run, before the Batch run starts and after it finishes respectively. * * @param <KEY_IN> the type of input key from the Batch run * @param <VAL_IN> the type of input value from the Batch run * @param <OUT> the type of output for the source */ @Beta public abstract class BatchSource<KEY_IN, VAL_IN, OUT> extends BatchConfigurable<BatchSourceContext> implements Transformation<KeyValue<KEY_IN, VAL_IN>, OUT>, StageLifecycle<BatchRuntimeContext> { public static final String PLUGIN_TYPE = "batchsource"; /** * Initialize the Batch Source stage. Executed inside the Batch Run. This method is guaranteed to be invoked * before any calls to {@link BatchSource#transform} are made. * * @param context {@link BatchRuntimeContext} * @throws Exception if there is any error during initialization */ @Override public void initialize(BatchRuntimeContext context) throws Exception { // no-op } /** * Transform the {@link KeyValue} pair produced by the input, as set in {@link BatchSource#prepareRun}, * to a single object and emit it to the next stage. By default it emits the value. * This method is invoked inside the Batch run. * * @param input the input to transform * @param emitter {@link Emitter} to emit data to the next stage * @throws Exception if there's an error during this method invocation */ @Override public void transform(KeyValue<KEY_IN, VAL_IN> input, Emitter<OUT> emitter) throws Exception { emitter.emit((OUT) input.getValue()); } /** * Destroy the Batch Source stage. Executed at the end of the Batch run. */ @Override public void destroy() { // no-op } }