/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.zeppelin.spark; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.List; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import org.apache.spark.SparkContext; import org.apache.spark.sql.SQLContext; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterGroup; import org.apache.zeppelin.interpreter.InterpreterException; import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.LazyOpenInterpreter; import org.apache.zeppelin.interpreter.WrappedInterpreter; import org.apache.zeppelin.scheduler.Scheduler; import org.apache.zeppelin.scheduler.SchedulerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Spark SQL interpreter for Zeppelin. */ public class SparkSqlInterpreter extends Interpreter { Logger logger = LoggerFactory.getLogger(SparkSqlInterpreter.class); AtomicInteger num = new AtomicInteger(0); static { Interpreter.register( "sql", "spark", SparkSqlInterpreter.class.getName(), new InterpreterPropertyBuilder() .add("zeppelin.spark.maxResult", SparkInterpreter.getSystemDefault("ZEPPELIN_SPARK_MAXRESULT", "zeppelin.spark.maxResult", "1000"), "Max number of SparkSQL result to display.") .add("zeppelin.spark.concurrentSQL", SparkInterpreter.getSystemDefault("ZEPPELIN_SPARK_CONCURRENTSQL", "zeppelin.spark.concurrentSQL", "false"), "Execute multiple SQL concurrently if set true.") .build()); } private String getJobGroup(InterpreterContext context){ return "zeppelin-" + context.getParagraphId(); } private int maxResult; public SparkSqlInterpreter(Properties property) { super(property); } @Override public void open() { this.maxResult = Integer.parseInt(getProperty("zeppelin.spark.maxResult")); } private SparkInterpreter getSparkInterpreter() { InterpreterGroup intpGroup = getInterpreterGroup(); LazyOpenInterpreter lazy = null; SparkInterpreter spark = null; synchronized (intpGroup) { for (Interpreter intp : getInterpreterGroup()){ if (intp.getClassName().equals(SparkInterpreter.class.getName())) { Interpreter p = intp; while (p instanceof WrappedInterpreter) { if (p instanceof LazyOpenInterpreter) { lazy = (LazyOpenInterpreter) p; } p = ((WrappedInterpreter) p).getInnerInterpreter(); } spark = (SparkInterpreter) p; } } } if (lazy != null) { lazy.open(); } return spark; } public boolean concurrentSQL() { return Boolean.parseBoolean(getProperty("zeppelin.spark.concurrentSQL")); } @Override public void close() {} @Override public InterpreterResult interpret(String st, InterpreterContext context) { SQLContext sqlc = null; SparkInterpreter sparkInterpreter = getSparkInterpreter(); if (sparkInterpreter.getSparkVersion().isUnsupportedVersion()) { return new InterpreterResult(Code.ERROR, "Spark " + sparkInterpreter.getSparkVersion().toString() + " is not supported"); } sqlc = getSparkInterpreter().getSQLContext(); SparkContext sc = sqlc.sparkContext(); if (concurrentSQL()) { sc.setLocalProperty("spark.scheduler.pool", "fair"); } else { sc.setLocalProperty("spark.scheduler.pool", null); } sc.setJobGroup(getJobGroup(context), "Zeppelin", false); Object rdd = null; try { // method signature of sqlc.sql() is changed // from def sql(sqlText: String): SchemaRDD (1.2 and prior) // to def sql(sqlText: String): DataFrame (1.3 and later). // Therefore need to use reflection to keep binary compatibility for all spark versions. Method sqlMethod = sqlc.getClass().getMethod("sql", String.class); rdd = sqlMethod.invoke(sqlc, st); } catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { throw new InterpreterException(e); } String msg = ZeppelinContext.showDF(sc, context, rdd, maxResult); sc.clearJobGroup(); return new InterpreterResult(Code.SUCCESS, msg); } @Override public void cancel(InterpreterContext context) { SQLContext sqlc = getSparkInterpreter().getSQLContext(); SparkContext sc = sqlc.sparkContext(); sc.cancelJobGroup(getJobGroup(context)); } @Override public FormType getFormType() { return FormType.SIMPLE; } @Override public int getProgress(InterpreterContext context) { SparkInterpreter sparkInterpreter = getSparkInterpreter(); return sparkInterpreter.getProgress(context); } @Override public Scheduler getScheduler() { if (concurrentSQL()) { int maxConcurrency = 10; return SchedulerFactory.singleton().createOrGetParallelScheduler( SparkSqlInterpreter.class.getName() + this.hashCode(), maxConcurrency); } else { // getSparkInterpreter() calls open() inside. // That means if SparkInterpreter is not opened, it'll wait until SparkInterpreter open. // In this moment UI displays 'READY' or 'FINISHED' instead of 'PENDING' or 'RUNNING'. // It's because of scheduler is not created yet, and scheduler is created by this function. // Therefore, we can still use getSparkInterpreter() here, but it's better and safe // to getSparkInterpreter without opening it. for (Interpreter intp : getInterpreterGroup()) { if (intp.getClassName().equals(SparkInterpreter.class.getName())) { Interpreter p = intp; return p.getScheduler(); } else { continue; } } throw new InterpreterException("Can't find SparkInterpreter"); } } @Override public List<String> completion(String buf, int cursor) { return null; } }