HiveServer.java example

Explorer
hive_blinkdb-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.ServerUtils;
import org.apache.hadoop.hive.common.LogUtils;
import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
import org.apache.hadoop.hive.common.cli.CommonCliOptions;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Schema;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.plan.api.QueryPlan;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.thrift.TException;
import org.apache.thrift.TProcessor;
import org.apache.thrift.TProcessorFactory;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.server.TServer;
import org.apache.thrift.server.TThreadPoolServer;
import org.apache.thrift.transport.TServerSocket;
import org.apache.thrift.transport.TServerTransport;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.facebook.fb303.fb_status;

/**
 * Thrift Hive Server Implementation.
 */
public class HiveServer extends ThriftHive {
  private static final String VERSION = "1";

  /**
   * default port on which to start the Hive server
   */
  private static final int DEFAULT_HIVE_SERVER_PORT = 10000;

  /**
   * default minimum number of threads serving the Hive server
   */
  private static final int DEFAULT_MIN_WORKER_THREADS = 100;

  /**
   * default maximum number of threads serving the Hive server
   */
  private static final int DEFAULT_MAX_WORKER_THREADS = Integer.MAX_VALUE;

  /**
   * Handler which implements the Hive Interface This class can be used in lieu
   * of the HiveClient class to get an embedded server.
   */
  public static class HiveServerHandler extends HiveMetaStore.HMSHandler
      implements HiveInterface {
    /**
     * Hive server uses org.apache.hadoop.hive.ql.Driver for run() and
     * getResults() methods.
     * It is the instance of the last Hive query.
     */
    private Driver driver;
    private CommandProcessorResponse response;
    /**
     * For processors other than Hive queries (Driver), they output to session.out (a temp file)
     * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn.
     */
    private BufferedReader pipeIn;

    /**
     * Flag that indicates whether the last executed command was a Hive query.
     */
    private boolean isHiveQuery;

    public static final Log LOG = LogFactory.getLog(HiveServer.class.getName());

    /**
     * Construct a new handler.
     *
     * @throws MetaException unable to create metastore
     */
    public HiveServerHandler() throws MetaException {
      this(new HiveConf(SessionState.class));
    }

    /**
     * Construct a new handler with the specified hive configuration.
     *
     * @param conf caller specified hive configuration
     * @throws MetaException unable to create metastore
     */
    public HiveServerHandler(HiveConf conf) throws MetaException {
      super(HiveServer.class.getName(), conf);

      isHiveQuery = false;
      driver = null;
      SessionState session = new SessionState(conf);
      SessionState.start(session);
      setupSessionIO(session);
    }

    private void setupSessionIO(SessionState session) {
      try {
        LOG.info("Putting temp output to file " + session.getTmpOutputFile().toString());
        session.in = null; // hive server's session input stream is not used
        // open a per-session file in auto-flush mode for writing temp results
        session.out = new PrintStream(new FileOutputStream(session.getTmpOutputFile()), true, "UTF-8");
        // TODO: for hadoop jobs, progress is printed out to session.err,
        // we should find a way to feed back job progress to client
        session.err = new PrintStream(System.err, true, "UTF-8");
      } catch (IOException e) {
        LOG.error("Error in creating temp output file ", e);
        try {
          session.in = null;
          session.out = new PrintStream(System.out, true, "UTF-8");
          session.err = new PrintStream(System.err, true, "UTF-8");
      	  } catch (UnsupportedEncodingException ee) {
      	    ee.printStackTrace();
      	    session.out = null;
      	    session.err = null;
      	  }
      }
    }

    /**
     * Executes a query.
     *
     * @param cmd
     *          HiveQL query to execute
     */
    public void execute(String cmd) throws HiveServerException, TException {
      HiveServerHandler.LOG.info("Running the query: " + cmd);
      SessionState session = SessionState.get();

      String cmd_trimmed = cmd.trim();
      String[] tokens = cmd_trimmed.split("\\s");
      String cmd_1 = cmd_trimmed.substring(tokens[0].length()).trim();

      int ret = 0;
      String errorMessage = "";
      String SQLState = null;

      try {
        CommandProcessor proc = CommandProcessorFactory.get(tokens[0]);
        if (proc != null) {
          if (proc instanceof Driver) {
            isHiveQuery = true;
            driver = (Driver) proc;
            // In Hive server mode, we are not able to retry in the FetchTask
            // case, when calling fetch quueries since execute() has returned.
            // For now, we disable the test attempts.
            driver.setTryCount(Integer.MAX_VALUE);
            response = driver.run(cmd);
          } else {
            isHiveQuery = false;
            driver = null;
            // need to reset output for each non-Hive query
            setupSessionIO(session);
            response = proc.run(cmd_1);
          }

          ret = response.getResponseCode();
          SQLState = response.getSQLState();
          errorMessage = response.getErrorMessage();
        }
      } catch (Exception e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage("Error running query: " + e.toString());
        ex.setErrorCode(ret == 0? -10000: ret);
        throw ex;
      }

      if (ret != 0) {
        throw new HiveServerException("Query returned non-zero code: " + ret
            + ", cause: " + errorMessage, ret, SQLState);
      }
    }

    /**
     * Should be called by the client at the end of a session.
     */
    public void clean() {
      if (driver != null) {
        driver.close();
        driver.destroy();
      }

      SessionState session = SessionState.get();
      if (session.getTmpOutputFile() != null) {
        session.getTmpOutputFile().delete();
      }
      pipeIn = null;
    }

    /**
     * Return the status information about the Map-Reduce cluster.
     */
    public HiveClusterStatus getClusterStatus() throws HiveServerException,
        TException {
      HiveClusterStatus hcs;
      try {
        Driver drv = new Driver();
        drv.init();

        ClusterStatus cs = drv.getClusterStatus();
        JobTrackerState state = JobTrackerState.valueOf(ShimLoader.getHadoopShims().getJobTrackerState(cs).name());

        hcs = new HiveClusterStatus(cs.getTaskTrackers(), cs.getMapTasks(), cs
            .getReduceTasks(), cs.getMaxMapTasks(), cs.getMaxReduceTasks(),
            state);
      } catch (Exception e) {
        LOG.error(e.toString());
        e.printStackTrace();
        HiveServerException ex = new HiveServerException();
        ex.setMessage("Unable to get cluster status: " + e.toString());
        throw ex;
      }
      return hcs;
    }

    /**
     * Return the Hive schema of the query result.
     */
    public Schema getSchema() throws HiveServerException, TException {
      if (!isHiveQuery) {
        Schema schema = response.getSchema();
        if (schema == null) {
          // Return empty schema if the last command was not a Hive query
          return new Schema();
        }
        else {
          return schema;
        }
      }

      assert driver != null: "getSchema() is called on a Hive query and driver is NULL.";

      try {
        Schema schema = driver.getSchema();
        if (schema == null) {
          schema = new Schema();
        }
        LOG.info("Returning schema: " + schema);
        return schema;
      } catch (Exception e) {
        LOG.error(e.toString());
        e.printStackTrace();
        HiveServerException ex = new HiveServerException();
        ex.setMessage("Unable to get schema: " + e.toString());
        throw ex;
      }
    }

    /**
     * Return the Thrift schema of the query result.
     */
    public Schema getThriftSchema() throws HiveServerException, TException {
      if (!isHiveQuery) {
        // Return empty schema if the last command was not a Hive query
        return new Schema();
      }

      assert driver != null: "getThriftSchema() is called on a Hive query and driver is NULL.";

      try {
        Schema schema = driver.getThriftSchema();
        if (schema == null) {
          schema = new Schema();
        }
        LOG.info("Returning schema: " + schema);
        return schema;
      } catch (Exception e) {
        LOG.error(e.toString());
        e.printStackTrace();
        HiveServerException ex = new HiveServerException();
        ex.setMessage("Unable to get schema: " + e.toString());
        throw ex;
      }
    }


    /**
     * Fetches the next row in a query result set.
     *
     * @return the next row in a query result set. null if there is no more row
     *         to fetch.
     */
    public String fetchOne() throws HiveServerException, TException {
      if (!isHiveQuery) {
        // Return no results if the last command was not a Hive query
        List<String> results = new ArrayList<String>(1);
        readResults(results, 1);
        if (results.size() > 0) {
          return results.get(0);
        } else { //  throw an EOF exception
          throw new HiveServerException("OK", 0, "");
        }
      }

      assert driver != null: "fetchOne() is called on a Hive query and driver is NULL.";

      ArrayList<String> result = new ArrayList<String>();
      driver.setMaxRows(1);
      try {
        if (driver.getResults(result)) {
          return result.get(0);
        }
        // TODO: Cannot return null here because thrift cannot handle nulls
        // TODO: Returning empty string for now. Need to figure out how to
        // TODO: return null in some other way
        throw new HiveServerException("OK", 0, "");
       // return "";
      } catch (CommandNeedRetryException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      } catch (IOException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      }
    }

    private void cleanTmpFile() {
      if (pipeIn != null) {
        SessionState session = SessionState.get();
        File tmp = session.getTmpOutputFile();
        tmp.delete();
        pipeIn = null;
      }
    }

    /**
     * Reads the temporary results for non-Hive (non-Driver) commands to the
     * resulting List of strings.
     * @param results list of strings containing the results
     * @param nLines number of lines read at once. If it is <= 0, then read all lines.
     */
    private void readResults(List<String> results, int nLines) {

      if (pipeIn == null) {
        SessionState session = SessionState.get();
        File tmp = session.getTmpOutputFile();
        try {
          pipeIn = new BufferedReader(new FileReader(tmp));
        } catch (FileNotFoundException e) {
          LOG.error("File " + tmp + " not found. ", e);
          return;
        }
      }

      boolean readAll = false;

      for (int i = 0; i < nLines || nLines <= 0; ++i) {
        try {
          String line = pipeIn.readLine();
          if (line == null) {
            // reached the end of the result file
            readAll = true;
            break;
          } else {
            results.add(line);
          }
        } catch (IOException e) {
          LOG.error("Reading temp results encountered an exception: ", e);
          readAll = true;
        }
      }
      if (readAll) {
        cleanTmpFile();
      }
    }

    /**
     * Fetches numRows rows.
     *
     * @param numRows
     *          Number of rows to fetch.
     * @return A list of rows. The size of the list is numRows if there are at
     *         least numRows rows available to return. The size is smaller than
     *         numRows if there aren't enough rows. The list will be empty if
     *         there is no more row to fetch or numRows == 0.
     * @throws HiveServerException
     *           Invalid value for numRows (numRows < 0)
     */
    public List<String> fetchN(int numRows) throws HiveServerException,
        TException {
      if (numRows < 0) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage("Invalid argument for number of rows: " + numRows);
        throw ex;
      }

      ArrayList<String> result = new ArrayList<String>();

      if (!isHiveQuery) {
        readResults(result, numRows);
        return result;
      }

      assert driver != null: "fetchN() is called on a Hive query and driver is NULL.";

      driver.setMaxRows(numRows);
      try {
        driver.getResults(result);
      } catch (CommandNeedRetryException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      } catch (IOException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      }
      return result;
    }

    /**
     * Fetches all the rows in a result set.
     *
     * @return All the rows in a result set of a query executed using execute
     *         method.
     *
     *         TODO: Currently the server buffers all the rows before returning
     *         them to the client. Decide whether the buffering should be done
     *         in the client.
     */
    public List<String> fetchAll() throws HiveServerException, TException {

      ArrayList<String> rows = new ArrayList<String>();
      ArrayList<String> result = new ArrayList<String>();

      if (!isHiveQuery) {
        // Return all results if numRows <= 0
        readResults(result, 0);
        return result;
      }

      try {
        while (driver.getResults(result)) {
          rows.addAll(result);
          result.clear();
        }
      } catch (CommandNeedRetryException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      } catch (IOException e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.getMessage());
        throw ex;
      }
      return rows;
    }

    /**
     * Return the status of the server.
     */
    @Override
    public fb_status getStatus() {
      return fb_status.ALIVE;
    }

    /**
     * Return the version of the server software.
     */
    @Override
    public String getVersion() {
      return VERSION;
    }

    @Override
    public QueryPlan getQueryPlan() throws HiveServerException, TException {
      QueryPlan qp = new QueryPlan();

      if (!isHiveQuery) {
        return qp;
      }

      assert driver != null: "getQueryPlan() is called on a Hive query and driver is NULL.";

      // TODO for now only return one query at a time
      // going forward, all queries associated with a single statement
      // will be returned in a single QueryPlan
      try {
        qp.addToQueries(driver.getQueryPlan());
      } catch (Exception e) {
        HiveServerException ex = new HiveServerException();
        ex.setMessage(e.toString());
        throw ex;
      }
      return qp;
    }

  }

  /**
   * ThriftHiveProcessorFactory.
   *
   */
  public static class ThriftHiveProcessorFactory extends TProcessorFactory {
    private final HiveConf conf;

    public ThriftHiveProcessorFactory(TProcessor processor, HiveConf conf) {
      super(processor);
      this.conf = conf;
    }

    @Override
    public TProcessor getProcessor(TTransport trans) {
      try {
        Iface handler = new HiveServerHandler(new HiveConf(conf));
        return new ThriftHive.Processor(handler);
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }

  /**
   * HiveServer specific CLI
   *
   */
  static public class HiveServerCli extends CommonCliOptions {
    private static final String OPTION_MAX_WORKER_THREADS = "maxWorkerThreads";
    private static final String OPTION_MIN_WORKER_THREADS = "minWorkerThreads";

    public int port = DEFAULT_HIVE_SERVER_PORT;
    public int minWorkerThreads = DEFAULT_MIN_WORKER_THREADS;
    public int maxWorkerThreads = DEFAULT_MAX_WORKER_THREADS;

    @SuppressWarnings("static-access")
    public HiveServerCli() {
      super("hiveserver", true);

      // -p port
      OPTIONS.addOption(OptionBuilder
          .hasArg()
          .withArgName("port")
          .withDescription("Hive Server port number, default:"
              + DEFAULT_HIVE_SERVER_PORT)
          .create('p'));

      // min worker thread count
      OPTIONS.addOption(OptionBuilder
          .hasArg()
          .withLongOpt(OPTION_MIN_WORKER_THREADS)
          .withDescription("minimum number of worker threads, default:"
              + DEFAULT_MIN_WORKER_THREADS)
          .create());

      // max worker thread count
      OPTIONS.addOption(OptionBuilder
          .hasArg()
          .withLongOpt(OPTION_MAX_WORKER_THREADS)
          .withDescription("maximum number of worker threads, default:"
              + DEFAULT_MAX_WORKER_THREADS)
          .create());
    }

    @Override
    public void parse(String[] args) {
      super.parse(args);

      // support the old syntax "hiveserver [port [threads]]" but complain
      args = commandLine.getArgs();
      if (args.length >= 1) {
        // complain about the deprecated syntax -- but still run
        System.err.println(
            "This usage has been deprecated, consider using the new command "
            + "line syntax (run with -h to see usage information)");

        port = Integer.parseInt(args[0]);
      }
      if (args.length >= 2) {
        minWorkerThreads = Integer.parseInt(args[1]);
      }

      // notice that command line options take precedence over the
      // deprecated (old style) naked args...
      if (commandLine.hasOption('p')) {
        port = Integer.parseInt(commandLine.getOptionValue('p'));
      } else {
        // legacy handling
        String hivePort = System.getenv("HIVE_PORT");
        if (hivePort != null) {
          port = Integer.parseInt(hivePort);
        }
      }
      if (commandLine.hasOption(OPTION_MIN_WORKER_THREADS)) {
        minWorkerThreads = Integer.parseInt(
            commandLine.getOptionValue(OPTION_MIN_WORKER_THREADS));
      }
      if (commandLine.hasOption(OPTION_MAX_WORKER_THREADS)) {
        maxWorkerThreads = Integer.parseInt(
            commandLine.getOptionValue(OPTION_MAX_WORKER_THREADS));
      }
    }
  }

  public static void main(String[] args) {
    try {
      HiveServerCli cli = new HiveServerCli();

      cli.parse(args);

      // NOTE: It is critical to do this prior to initializing log4j, otherwise
      // any log specific settings via hiveconf will be ignored
      Properties hiveconf = cli.addHiveconfToSystemProperties();

      // NOTE: It is critical to do this here so that log4j is reinitialized
      // before any of the other core hive classes are loaded
      try {
        LogUtils.initHiveLog4j();
      } catch (LogInitializationException e) {
        HiveServerHandler.LOG.warn(e.getMessage());
      }

      HiveConf conf = new HiveConf(HiveServerHandler.class);
      ServerUtils.cleanUpScratchDir(conf);
      TServerTransport serverTransport = new TServerSocket(cli.port);

      // set all properties specified on the command line
      for (Map.Entry<Object, Object> item : hiveconf.entrySet()) {
        conf.set((String) item.getKey(), (String) item.getValue());
      }

      ThriftHiveProcessorFactory hfactory =
        new ThriftHiveProcessorFactory(null, conf);

      TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverTransport)
        .processorFactory(hfactory)
        .transportFactory(new TTransportFactory())
        .protocolFactory(new TBinaryProtocol.Factory())
        .minWorkerThreads(cli.minWorkerThreads)
        .maxWorkerThreads(cli.maxWorkerThreads);

      TServer server = new TThreadPoolServer(sargs);

      String msg = "Starting hive server on port " + cli.port
        + " with " + cli.minWorkerThreads + " min worker threads and "
        + cli.maxWorkerThreads + " max worker threads";
      HiveServerHandler.LOG.info(msg);
      if (cli.isVerbose()) {
        System.err.println(msg);
      }

      server.serve();
    } catch (Exception x) {
      x.printStackTrace();
    }
  }
}