/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.api.batch;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.etl.api.PipelineConfigurable;
import co.cask.cdap.etl.api.PipelineConfigurer;
import org.apache.spark.api.java.JavaRDD;
import java.io.Serializable;
/**
* Spark Compute stage.
*
* @param <IN> Type of input object
* @param <OUT> Type of output object
*/
@Beta
public abstract class SparkCompute<IN, OUT> implements PipelineConfigurable, Serializable {
public static final String PLUGIN_TYPE = "sparkcompute";
private static final long serialVersionUID = -8156450728774382658L;
/**
* Configure an ETL pipeline.
*
* @param pipelineConfigurer the configurer used to add required datasets and streams
* @throws IllegalArgumentException if the given config is invalid
*/
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
//no-op
}
/**
* Transform the input and return the output to be sent to the next stage in the pipeline.
*
* @param context {@link SparkExecutionPluginContext} for this job
* @param input input data to be transformed
* @throws Exception if there's an error during this method invocation
*/
public abstract JavaRDD<OUT> transform(SparkExecutionPluginContext context, JavaRDD<IN> input) throws Exception;
}