SparkSqlInterpreter.java example

Explorer

incubator-zeppelin-master
- incubator-zeppelin-druid-master
  - angular
    - src
      - main
        java
        org
        apache
        zeppelin
        angular
        AngularInterpreter.java
  - cassandra
    - src
      - main
        java
        org
        apache
        zeppelin
        cassandra
        CassandraInterpreter.java
        ParsingException.java
      - test
        java
        org
        apache
        zeppelin
        cassandra
        CassandraInterpreterTest.java
        InterpreterLogicTest.java
  - druid
    - src
      - main
        java
        org
        apache
        zeppelin
        druid
        DruidSqlInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        druid
        DruidSqlInterpreterTest.java
  - elasticsearch
    - src
      - main
        java
        org
        apache
        zeppelin
        elasticsearch
        ElasticsearchInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        elasticsearch
        ElasticsearchInterpreterTest.java
  - flink
    - src
      - main
        java
        org
        apache
        zeppelin
        flink
        FlinkInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        flink
        FlinkInterpreterTest.java
  - geode
    - src
      - main
        java
        org
        apache
        zeppelin
        geode
        GeodeOqlInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        geode
        GeodeOqlInterpreterTest.java
  - hive
    - src
      - main
        java
        org
        apache
        zeppelin
        hive
        HiveInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        hive
        HiveInterpreterTest.java
  - ignite
    - src
      - main
        java
        org
        apache
        zeppelin
        ignite
        IgniteInterpreter.java
        IgniteInterpreterUtils.java
        IgniteSqlInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        ignite
        IgniteInterpreterTest.java
        IgniteSqlInterpreterTest.java
        Person.java
  - kylin
    - src
      - main
        java
        org
        apache
        zeppelin
        kylin
        KylinInterpreter.java
      - test
        java
        KylinInterpreterTest.java
  - lens
    - src
      - main
        java
        org
        apache
        zeppelin
        lens
        ExecutionDetail.java
        LensBootstrap.java
        LensInterpreter.java
        LensJLineShellComponent.java
        LensSimpleExecutionStrategy.java
      - test
        java
        org
        apache
        zeppelin
        lens
        LensInterpreterTest.java
  - markdown
    - src
      - main
        java
        org
        apache
        zeppelin
        markdown
        Markdown.java
      - test
        java
        org
        apache
        zeppelin
        markdown
        MarkdownTest.java
  - phoenix
    - src
      - main
        java
        org
        apache
        zeppelin
        phoenix
        PhoenixInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        phoenix
        PhoenixInterpreterTest.java
  - postgresql
    - src
      - main
        java
        org
        apache
        zeppelin
        postgresql
        PostgreSqlInterpreter.java
        SqlCompleter.java
      - test
        java
        org
        apache
        zeppelin
        postgresql
        PostgreSqlInterpreterTest.java
        SqlCompleterTest.java
  - scalding
    - src
      - main
        java
        org
        apache
        zeppelin
        scalding
        ScaldingInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        scalding
        ScaldingInterpreterTest.java
  - shell
    - src
      - main
        java
        org
        apache
        zeppelin
        shell
        ShellInterpreter.java
  - spark
    - src
      - main
        java
        org
        apache
        zeppelin
        spark
        DepInterpreter.java
        PySparkInterpreter.java
        SparkInterpreter.java
        SparkSqlInterpreter.java
        SparkVersion.java
        ZeppelinContext.java
        dep
        Booter.java
        Dependency.java
        DependencyContext.java
        DependencyResolver.java
        Repository.java
        RepositoryListener.java
        RepositorySystemFactory.java
        TransferListener.java
      - test
        java
        org
        apache
        zeppelin
        spark
        DepInterpreterTest.java
        SparkInterpreterTest.java
        SparkSqlInterpreterTest.java
        SparkVersionTest.java
        dep
        DependencyResolverTest.java
  - tajo
    - src
      - main
        java
        org
        apache
        zeppelin
        tajo
        TajoInterpreter.java
      - test
        java
        org
        apache
        zeppelin
        tajo
        TajoInterpreterTest.java
        TesterConnection.java
        TesterDatabaseMetaData.java
        TesterResultSet.java
        TesterResultSetMetaData.java
        TesterStatement.java
        TesterTajoInterpreter.java
  - zeppelin-interpreter
    - src
      - main
        java
        org
        apache
        zeppelin
        display
        AngularObject.java
        AngularObjectListener.java
        AngularObjectRegistry.java
        AngularObjectRegistryListener.java
        AngularObjectWatcher.java
        GUI.java
        Input.java
        interpreter
        ClassloaderInterpreter.java
        Interpreter.java
        InterpreterContext.java
        InterpreterContextRunner.java
        InterpreterException.java
        InterpreterGroup.java
        InterpreterProperty.java
        InterpreterPropertyBuilder.java
        InterpreterResult.java
        InterpreterUtils.java
        LazyOpenInterpreter.java
        WrappedInterpreter.java
        remote
        ClientFactory.java
        InterpreterContextRunnerPool.java
        RemoteAngularObject.java
        RemoteAngularObjectRegistry.java
        RemoteInterpreter.java
        RemoteInterpreterContextRunner.java
        RemoteInterpreterEventPoller.java
        RemoteInterpreterProcess.java
        RemoteInterpreterServer.java
        RemoteInterpreterUtils.java
        thrift
        RemoteInterpreterContext.java
        RemoteInterpreterEvent.java
        RemoteInterpreterEventType.java
        RemoteInterpreterResult.java
        RemoteInterpreterService.java
        scheduler
        ExecutorFactory.java
        FIFOScheduler.java
        Job.java
        JobListener.java
        JobProgressPoller.java
        ParallelScheduler.java
        RemoteScheduler.java
        Scheduler.java
        SchedulerFactory.java
        SchedulerListener.java
      - test
        java
        org
        apache
        zeppelin
        display
        AngularObjectRegistryTest.java
        AngularObjectTest.java
        InputTest.java
        interpreter
        InterpreterContextTest.java
        InterpreterResultTest.java
        InterpreterTest.java
        remote
        RemoteAngularObjectTest.java
        RemoteInterpreterProcessTest.java
        RemoteInterpreterServerTest.java
        RemoteInterpreterTest.java
        RemoteInterpreterUtilsTest.java
        mock
        MockInterpreterA.java
        MockInterpreterAngular.java
        MockInterpreterB.java
        scheduler
        FIFOSchedulerTest.java
        ParallelSchedulerTest.java
        RemoteSchedulerTest.java
        SleepingJob.java
  - zeppelin-server
    - src
      - main
        java
        org
        apache
        zeppelin
        rest
        InterpreterRestApi.java
        NotebookResponse.java
        NotebookRestApi.java
        ZeppelinRestApi.java
        message
        CronRequest.java
        InterpreterSettingListForNoteBind.java
        NewInterpreterSettingRequest.java
        NewNotebookRequest.java
        NewParagraphRequest.java
        RunParagraphWithParametersRequest.java
        UpdateInterpreterSettingRequest.java
        server
        CorsFilter.java
        JsonExclusionStrategy.java
        JsonResponse.java
        ZeppelinServer.java
        socket
        Message.java
        NotebookServer.java
        NotebookSocket.java
        NotebookSocketListener.java
        utils
        SecurityUtils.java
      - test
        java
        com
        webautomation
        ScreenCaptureHtmlUnitDriver.java
        org
        apache
        zeppelin
        CommandExecutor.java
        ProcessData.java
        WebDriverManager.java
        ZeppelinIT.java
        ZeppelinITUtils.java
        interpreter
        mock
        MockInterpreter1.java
        rest
        AbstractTestRestApi.java
        ZeppelinRestApiTest.java
        ZeppelinSparkClusterTest.java
        security
        SecurityUtilsTest.java
        server
        CorsFilterTest.java
        socket
        NotebookServerTest.java
        TestHttpServletRequest.java
  - zeppelin-zengine
    - src
      - main
        java
        org
        apache
        zeppelin
        conf
        ZeppelinConfiguration.java
        interpreter
        InterpreterFactory.java
        InterpreterInfoSaving.java
        InterpreterOption.java
        InterpreterSerializer.java
        InterpreterSetting.java
        notebook
        JobListenerFactory.java
        Note.java
        NoteInfo.java
        NoteInterpreterLoader.java
        Notebook.java
        Paragraph.java
        repo
        GitNotebookRepo.java
        NotebookRepo.java
        NotebookRepoSync.java
        NotebookRepoVersioned.java
        S3NotebookRepo.java
        VFSNotebookRepo.java
        utility
        IdHashes.java
        search
        LuceneSearch.java
        SearchService.java
        util
        Util.java
      - test
        java
        org
        apache
        zeppelin
        conf
        ZeppelinConfigurationTest.java
        interpreter
        InterpreterFactoryTest.java
        mock
        MockInterpreter1.java
        MockInterpreter11.java
        MockInterpreter2.java
        notebook
        NoteInterpreterLoaderTest.java
        NotebookTest.java
        ParagraphTest.java
        repo
        GitNotebookRepoTest.java
        NotebookRepoSyncInitializationTest.java
        NotebookRepoSyncTest.java
        VFSNotebookRepoTest.java
        mock
        VFSNotebookRepoMock.java
        search
        LuceneSearchTest.java
        util
        UtilTest.java
        UtilsForTests.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zeppelin.spark;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Spark SQL interpreter for Zeppelin.
 */
public class SparkSqlInterpreter extends Interpreter {
  Logger logger = LoggerFactory.getLogger(SparkSqlInterpreter.class);
  AtomicInteger num = new AtomicInteger(0);

  static {
    Interpreter.register(
        "sql",
        "spark",
        SparkSqlInterpreter.class.getName(),
        new InterpreterPropertyBuilder()
            .add("zeppelin.spark.maxResult",
                SparkInterpreter.getSystemDefault("ZEPPELIN_SPARK_MAXRESULT",
                    "zeppelin.spark.maxResult", "1000"),
                "Max number of SparkSQL result to display.")
            .add("zeppelin.spark.concurrentSQL",
                SparkInterpreter.getSystemDefault("ZEPPELIN_SPARK_CONCURRENTSQL",
                    "zeppelin.spark.concurrentSQL", "false"),
                "Execute multiple SQL concurrently if set true.")
            .build());
  }

  private String getJobGroup(InterpreterContext context){
    return "zeppelin-" + context.getParagraphId();
  }

  private int maxResult;

  public SparkSqlInterpreter(Properties property) {
    super(property);
  }

  @Override
  public void open() {
    this.maxResult = Integer.parseInt(getProperty("zeppelin.spark.maxResult"));
  }

  private SparkInterpreter getSparkInterpreter() {
    InterpreterGroup intpGroup = getInterpreterGroup();
    LazyOpenInterpreter lazy = null;
    SparkInterpreter spark = null;
    synchronized (intpGroup) {
      for (Interpreter intp : getInterpreterGroup()){
        if (intp.getClassName().equals(SparkInterpreter.class.getName())) {
          Interpreter p = intp;
          while (p instanceof WrappedInterpreter) {
            if (p instanceof LazyOpenInterpreter) {
              lazy = (LazyOpenInterpreter) p;
            }
            p = ((WrappedInterpreter) p).getInnerInterpreter();
          }
          spark = (SparkInterpreter) p;
        }
      }
    }
    if (lazy != null) {
      lazy.open();
    }
    return spark;
  }

  public boolean concurrentSQL() {
    return Boolean.parseBoolean(getProperty("zeppelin.spark.concurrentSQL"));
  }

  @Override
  public void close() {}

  @Override
  public InterpreterResult interpret(String st, InterpreterContext context) {
    SQLContext sqlc = null;
    SparkInterpreter sparkInterpreter = getSparkInterpreter();

    if (sparkInterpreter.getSparkVersion().isUnsupportedVersion()) {
      return new InterpreterResult(Code.ERROR, "Spark "
          + sparkInterpreter.getSparkVersion().toString() + " is not supported");
    }

    sqlc = getSparkInterpreter().getSQLContext();
    SparkContext sc = sqlc.sparkContext();
    if (concurrentSQL()) {
      sc.setLocalProperty("spark.scheduler.pool", "fair");
    } else {
      sc.setLocalProperty("spark.scheduler.pool", null);
    }

    sc.setJobGroup(getJobGroup(context), "Zeppelin", false);
    Object rdd = null;
    try {
      // method signature of sqlc.sql() is changed
      // from  def sql(sqlText: String): SchemaRDD (1.2 and prior)
      // to    def sql(sqlText: String): DataFrame (1.3 and later).
      // Therefore need to use reflection to keep binary compatibility for all spark versions.
      Method sqlMethod = sqlc.getClass().getMethod("sql", String.class);
      rdd = sqlMethod.invoke(sqlc, st);
    } catch (NoSuchMethodException | SecurityException | IllegalAccessException
        | IllegalArgumentException | InvocationTargetException e) {
      throw new InterpreterException(e);
    }

    String msg = ZeppelinContext.showDF(sc, context, rdd, maxResult);
    sc.clearJobGroup();
    return new InterpreterResult(Code.SUCCESS, msg);
  }

  @Override
  public void cancel(InterpreterContext context) {
    SQLContext sqlc = getSparkInterpreter().getSQLContext();
    SparkContext sc = sqlc.sparkContext();

    sc.cancelJobGroup(getJobGroup(context));
  }

  @Override
  public FormType getFormType() {
    return FormType.SIMPLE;
  }


  @Override
  public int getProgress(InterpreterContext context) {
    SparkInterpreter sparkInterpreter = getSparkInterpreter();
    return sparkInterpreter.getProgress(context);
  }

  @Override
  public Scheduler getScheduler() {
    if (concurrentSQL()) {
      int maxConcurrency = 10;
      return SchedulerFactory.singleton().createOrGetParallelScheduler(
          SparkSqlInterpreter.class.getName() + this.hashCode(), maxConcurrency);
    } else {
      // getSparkInterpreter() calls open() inside.
      // That means if SparkInterpreter is not opened, it'll wait until SparkInterpreter open.
      // In this moment UI displays 'READY' or 'FINISHED' instead of 'PENDING' or 'RUNNING'.
      // It's because of scheduler is not created yet, and scheduler is created by this function.
      // Therefore, we can still use getSparkInterpreter() here, but it's better and safe
      // to getSparkInterpreter without opening it.
      for (Interpreter intp : getInterpreterGroup()) {
        if (intp.getClassName().equals(SparkInterpreter.class.getName())) {
          Interpreter p = intp;
          return p.getScheduler();
        } else {
          continue;
        }
      }
      throw new InterpreterException("Can't find SparkInterpreter");
    }
  }

  @Override
  public List<String> completion(String buf, int cursor) {
    return null;
  }
}