package com.thinkbiganalytics.spark; /*- * #%L * thinkbig-commons-spark-api * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.hive.HiveContext; import org.apache.spark.sql.types.StructType; import org.springframework.stereotype.Service; import java.io.Serializable; /** * Creates structured data sets using a {@link SQLContext}. */ @Service public interface SparkContextService extends Serializable { /** * Creates a copy of the specified data set. * * @param adaptee an existing data set * @return a copy */ DataSet toDataSet(Object adaptee); /** * Creates a data set from the specified table. * * @param context the Spark SQL context * @param tableName the name of the table * @return the table data */ DataSet toDataSet(SQLContext context, String tableName); /** * Creates a data set from a Spark RDD. * * @param context the Spark SQL context * @param rdd the Spark RDD * @param schema the schema for the RDD * @return a data set */ DataSet toDataSet(SQLContext context, JavaRDD<Row> rdd, StructType schema); /** * Creates a data set from a Spark RDD. * * @param context the Spark SQL context * @param rdd the Spark RDD * @param beanClass the type of RDD * @return a data set */ DataSet toDataSet(SQLContext context, JavaRDD<?> rdd, Class<?> beanClass); /** * Creates a data set from the specified Hive query. * * @param context the Hive context * @param sql the Hive query * @return a data set */ DataSet sql(HiveContext context, String sql); /** * Creates a data set from the specified Spark SQL query. * * @param context the Spark SQL context * @param sql the Spark SQL query * @return a data set */ DataSet sql(SQLContext context, String sql); }