/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.api.batch; import co.cask.cdap.api.annotation.Beta; import co.cask.cdap.api.dataset.lib.KeyValue; import co.cask.cdap.etl.api.Emitter; import co.cask.cdap.etl.api.StageLifecycle; import co.cask.cdap.etl.api.Transformation; /** * Batch Sink forms the last stage of a Batch ETL Pipeline. In addition to configuring the Batch run, it * also transforms a single input object into a key value pair that the Batch run will output. By default, the input * object is used as both the key and value. * * {@link BatchSink#initialize}, {@link BatchSink#transform} and {@link BatchSink#destroy} methods are called inside * the Batch Run while {@link BatchSink#prepareRun} and {@link BatchSink#onRunFinish} methods are called on the * client side, which launches the Batch run, before the Batch run starts and after it finishes respectively. * * @param <IN> the type of input object to the sink * @param <KEY_OUT> the type of key the sink outputs * @param <VAL_OUT> the type of value the sink outputs */ @Beta public abstract class BatchSink<IN, KEY_OUT, VAL_OUT> extends BatchConfigurable<BatchSinkContext> implements Transformation<IN, KeyValue<KEY_OUT, VAL_OUT>>, StageLifecycle<BatchRuntimeContext> { public static final String PLUGIN_TYPE = "batchsink"; /** * Initialize the Batch Sink stage. Executed inside the Batch Run. This method is guaranteed to be invoked * before any calls to {@link BatchSink#transform} are made. * * @param context {@link BatchRuntimeContext} * @throws Exception if there is any error during initialization */ @Override public void initialize(BatchRuntimeContext context) throws Exception { // no-op } /** * Transform the input received from previous stage to a {@link KeyValue} pair which can be consumed by the output, * as set in {@link BatchSink#prepareRun}. By default, the input object is used as both key and value. * This method is invoked inside the Batch run. * * @param input the input to transform * @param emitter {@link Emitter} to emit data to the next stage * @throws Exception if there's an error during this method invocation */ @Override public void transform(IN input, Emitter<KeyValue<KEY_OUT, VAL_OUT>> emitter) throws Exception { emitter.emit(new KeyValue<>((KEY_OUT) input, (VAL_OUT) input)); } /** * Destroy the Batch Sink stage. Executed at the end of the Batch Run. */ @Override public void destroy() { // no-op } }