/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Spark SQL interpreter for Zeppelin.
*/
public class SparkSqlInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(SparkSqlInterpreter.class);
public static final String MAX_RESULTS = "zeppelin.spark.maxResult";
AtomicInteger num = new AtomicInteger(0);
private int maxResult;
public SparkSqlInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
this.maxResult = Integer.parseInt(getProperty(MAX_RESULTS));
}
private SparkInterpreter getSparkInterpreter() {
LazyOpenInterpreter lazy = null;
SparkInterpreter spark = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(SparkInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
lazy = (LazyOpenInterpreter) p;
}
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
spark = (SparkInterpreter) p;
if (lazy != null) {
lazy.open();
}
return spark;
}
public boolean concurrentSQL() {
return Boolean.parseBoolean(getProperty("zeppelin.spark.concurrentSQL"));
}
@Override
public void close() {}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
SQLContext sqlc = null;
SparkInterpreter sparkInterpreter = getSparkInterpreter();
if (sparkInterpreter.isUnsupportedSparkVersion()) {
return new InterpreterResult(Code.ERROR, "Spark "
+ sparkInterpreter.getSparkVersion().toString() + " is not supported");
}
sparkInterpreter.populateSparkWebUrl(context);
sparkInterpreter.getZeppelinContext().setInterpreterContext(context);
sqlc = sparkInterpreter.getSQLContext();
SparkContext sc = sqlc.sparkContext();
if (concurrentSQL()) {
sc.setLocalProperty("spark.scheduler.pool", "fair");
} else {
sc.setLocalProperty("spark.scheduler.pool", null);
}
sc.setJobGroup(Utils.buildJobGroupId(context), "Zeppelin", false);
Object rdd = null;
try {
// method signature of sqlc.sql() is changed
// from def sql(sqlText: String): SchemaRDD (1.2 and prior)
// to def sql(sqlText: String): DataFrame (1.3 and later).
// Therefore need to use reflection to keep binary compatibility for all spark versions.
Method sqlMethod = sqlc.getClass().getMethod("sql", String.class);
rdd = sqlMethod.invoke(sqlc, st);
} catch (InvocationTargetException ite) {
if (Boolean.parseBoolean(getProperty("zeppelin.spark.sql.stacktrace"))) {
throw new InterpreterException(ite);
}
logger.error("Invocation target exception", ite);
String msg = ite.getTargetException().getMessage()
+ "\nset zeppelin.spark.sql.stacktrace = true to see full stacktrace";
return new InterpreterResult(Code.ERROR, msg);
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException e) {
throw new InterpreterException(e);
}
String msg = sparkInterpreter.getZeppelinContext().showData(rdd);
sc.clearJobGroup();
return new InterpreterResult(Code.SUCCESS, msg);
}
@Override
public void cancel(InterpreterContext context) {
SparkInterpreter sparkInterpreter = getSparkInterpreter();
SQLContext sqlc = sparkInterpreter.getSQLContext();
SparkContext sc = sqlc.sparkContext();
sc.cancelJobGroup(Utils.buildJobGroupId(context));
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
SparkInterpreter sparkInterpreter = getSparkInterpreter();
return sparkInterpreter.getProgress(context);
}
@Override
public Scheduler getScheduler() {
if (concurrentSQL()) {
int maxConcurrency = 10;
return SchedulerFactory.singleton().createOrGetParallelScheduler(
SparkSqlInterpreter.class.getName() + this.hashCode(), maxConcurrency);
} else {
// getSparkInterpreter() calls open() inside.
// That means if SparkInterpreter is not opened, it'll wait until SparkInterpreter open.
// In this moment UI displays 'READY' or 'FINISHED' instead of 'PENDING' or 'RUNNING'.
// It's because of scheduler is not created yet, and scheduler is created by this function.
// Therefore, we can still use getSparkInterpreter() here, but it's better and safe
// to getSparkInterpreter without opening it.
Interpreter intp =
getInterpreterInTheSameSessionByClassName(SparkInterpreter.class.getName());
if (intp != null) {
return intp.getScheduler();
} else {
return null;
}
}
}
@Override
public List<InterpreterCompletion> completion(String buf, int cursor,
InterpreterContext interpreterContext) {
return null;
}
}