/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.spark;
import co.cask.cdap.api.annotation.Beta;
import java.io.NotSerializableException;
import java.io.Serializable;
/**
* A Java interface for Spark program to implement. It provides access to {@link JavaSparkExecutionContext} for
* interacting with CDAP.
* <p>
* <pre><code>
* public class JavaSparkTest extends JavaSparkMain {
*
* {@literal @}Override
* public void run(JavaSparkExecutionContext sec) throws Exception {
* JavaSparkContext sc = new JavaSparkContext();
*
* // Create a RDD from stream "input", with event body decoded as UTF-8 String
* JavaRDD<String> streamRDD = sec.fromStreamAsStringPair("input").values();
*
* // Create a RDD from dataset "lookup", which represents a lookup table from String to Long
* JavaPairRDD<String, Long> lookupRDD = sec.fromDataset("lookup");
*
* // Join the "input" stream with the "lookup" dataset and save it to "output" dataset
* JavaPairRDD<String, Long> resultRDD = streamRDD
* .mapToPair(new PairFunction<String, String, String>() {
* {@literal @}Override
* public Tuple2<String, String> call(String s) throws Exception {
* return Tuple2.apply(s, s);
* }
* })
* .join(lookupRDD)
* .mapValues(new Function<Tuple2<String, Long>, Long>() {
* {@literal @}Override
* public Long call(Tuple2<String, Long> v1) throws Exception {
* return v1._2;
* }
* });
*
* sec.saveAsDataset(resultRDD, "output");
* }
* }
* </pre></code>
* </p>
*
* This interface extends serializable because the closures are anonymous class in Java and Spark Serializes the
* closures before sending it to worker nodes. This serialization of inner anonymous class expects the outer
* containing class to be serializable else {@link NotSerializableException} is thrown. Having this interface
* serializable gives a neater API.
*/
@Beta
public interface JavaSparkMain extends Serializable {
/**
* This method will be called when the Spark program starts.
*
* @param sec the context for interacting with CDAP
*/
void run(JavaSparkExecutionContext sec) throws Exception;
}