/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.service.cli.operation; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.security.PrivilegedExceptionAction; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.Future; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.CharEncoding; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.common.metrics.common.MetricsScope; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.QueryDisplay; import org.apache.hadoop.hive.ql.QueryInfo; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.FetchOrientation; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.OperationState; import org.apache.hive.service.cli.RowSet; import org.apache.hive.service.cli.RowSetFactory; import org.apache.hive.service.cli.TableSchema; import org.apache.hive.service.cli.session.HiveSession; import org.apache.hive.service.server.ThreadWithGarbageCleanup; import org.codehaus.jackson.JsonGenerationException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; /** * SQLOperation. * */ @SuppressWarnings("deprecation") public class SQLOperation extends ExecuteStatementOperation { private Driver driver = null; private CommandProcessorResponse response; private TableSchema resultSchema = null; private Schema mResultSchema = null; private AbstractSerDe serde = null; private boolean fetchStarted = false; private volatile MetricsScope currentSQLStateScope; private QueryInfo queryInfo; private long queryTimeout; private ScheduledExecutorService timeoutExecutor; private final boolean runAsync; /** * A map to track query count running by each user */ private static Map<String, AtomicInteger> userQueries = new HashMap<String, AtomicInteger>(); private static final String ACTIVE_SQL_USER = MetricsConstant.SQL_OPERATION_PREFIX + "active_user"; private MetricsScope submittedQryScp; public SQLOperation(HiveSession parentSession, String statement, Map<String, String> confOverlay, boolean runInBackground, long queryTimeout) { // TODO: call setRemoteUser in ExecuteStatementOperation or higher. super(parentSession, statement, confOverlay, runInBackground); this.runAsync = runInBackground; this.queryTimeout = queryTimeout; long timeout = HiveConf.getTimeVar(queryState.getConf(), HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS, TimeUnit.SECONDS); if (timeout > 0 && (queryTimeout <= 0 || timeout < queryTimeout)) { this.queryTimeout = timeout; } setupSessionIO(parentSession.getSessionState()); queryInfo = new QueryInfo(getState().toString(), getParentSession().getUserName(), getExecutionEngine(), getHandle().getHandleIdentifier().toString()); Metrics metrics = MetricsFactory.getInstance(); if (metrics != null) { submittedQryScp = metrics.createScope(MetricsConstant.HS2_SUBMITTED_QURIES); } } @Override public boolean shouldRunAsync() { return runAsync; } private void setupSessionIO(SessionState sessionState) { try { sessionState.in = null; // hive server's session input stream is not used sessionState.out = new PrintStream(System.out, true, CharEncoding.UTF_8); sessionState.info = new PrintStream(System.err, true, CharEncoding.UTF_8); sessionState.err = new PrintStream(System.err, true, CharEncoding.UTF_8); } catch (UnsupportedEncodingException e) { LOG.error("Error creating PrintStream", e); e.printStackTrace(); sessionState.out = null; sessionState.info = null; sessionState.err = null; } } /** * Compile the query and extract metadata * * @throws HiveSQLException */ public void prepare(QueryState queryState) throws HiveSQLException { setState(OperationState.RUNNING); try { driver = new Driver(queryState, getParentSession().getUserName(), queryInfo); // Start the timer thread for cancelling the query when query timeout is reached // queryTimeout == 0 means no timeout if (queryTimeout > 0) { timeoutExecutor = new ScheduledThreadPoolExecutor(1); Runnable timeoutTask = new Runnable() { @Override public void run() { try { String queryId = queryState.getQueryId(); LOG.info("Query timed out after: " + queryTimeout + " seconds. Cancelling the execution now: " + queryId); SQLOperation.this.cancel(OperationState.TIMEDOUT); } catch (HiveSQLException e) { LOG.error("Error cancelling the query after timeout: " + queryTimeout + " seconds", e); } finally { // Stop timeoutExecutor.shutdown(); } } }; timeoutExecutor.schedule(timeoutTask, queryTimeout, TimeUnit.SECONDS); } queryInfo.setQueryDisplay(driver.getQueryDisplay()); // set the operation handle information in Driver, so that thrift API users // can use the operation handle they receive, to lookup query information in // Yarn ATS String guid64 = Base64.encodeBase64URLSafeString(getHandle().getHandleIdentifier() .toTHandleIdentifier().getGuid()).trim(); driver.setOperationId(guid64); // In Hive server mode, we are not able to retry in the FetchTask // case, when calling fetch queries since execute() has returned. // For now, we disable the test attempts. driver.setTryCount(Integer.MAX_VALUE); response = driver.compileAndRespond(statement); if (0 != response.getResponseCode()) { throw toSQLException("Error while compiling statement", response); } mResultSchema = driver.getSchema(); // hasResultSet should be true only if the query has a FetchTask // "explain" is an exception for now if(driver.getPlan().getFetchTask() != null) { //Schema has to be set if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) { throw new HiveSQLException("Error compiling query: Schema and FieldSchema " + "should be set when query plan has a FetchTask"); } resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); } else { setHasResultSet(false); } // Set hasResultSet true if the plan has ExplainTask // TODO explain should use a FetchTask for reading for (Task<? extends Serializable> task: driver.getPlan().getRootTasks()) { if (task.getClass() == ExplainTask.class) { resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); break; } } } catch (HiveSQLException e) { setState(OperationState.ERROR); throw e; } catch (Throwable e) { setState(OperationState.ERROR); throw new HiveSQLException("Error running query: " + e.toString(), e); } } private void runQuery() throws HiveSQLException { try { OperationState opState = getStatus().getState(); // Operation may have been cancelled by another thread if (opState.isTerminal()) { LOG.info("Not running the query. Operation is already in terminal state: " + opState + ", perhaps cancelled due to query timeout or by another thread."); return; } // In Hive server mode, we are not able to retry in the FetchTask // case, when calling fetch queries since execute() has returned. // For now, we disable the test attempts. driver.setTryCount(Integer.MAX_VALUE); response = driver.run(); if (0 != response.getResponseCode()) { throw toSQLException("Error while processing statement", response); } } catch (Throwable e) { /** * If the operation was cancelled by another thread, or the execution timed out, Driver#run * may return a non-zero response code. We will simply return if the operation state is * CANCELED, TIMEDOUT or CLOSED, otherwise throw an exception */ if ((getStatus().getState() == OperationState.CANCELED) || (getStatus().getState() == OperationState.TIMEDOUT) || (getStatus().getState() == OperationState.CLOSED)) { LOG.warn("Ignore exception in terminal state", e); return; } setState(OperationState.ERROR); if (e instanceof HiveSQLException) { throw (HiveSQLException) e; } else { throw new HiveSQLException("Error running query: " + e.toString(), e); } } setState(OperationState.FINISHED); } @Override public void runInternal() throws HiveSQLException { setState(OperationState.PENDING); boolean runAsync = shouldRunAsync(); final boolean asyncPrepare = runAsync && HiveConf.getBoolVar(queryState.getConf(), HiveConf.ConfVars.HIVE_SERVER2_ASYNC_EXEC_ASYNC_COMPILE); if (!asyncPrepare) { prepare(queryState); } if (!runAsync) { runQuery(); } else { // We'll pass ThreadLocals in the background thread from the foreground (handler) thread. // 1) ThreadLocal Hive object needs to be set in background thread // 2) The metastore client in Hive is associated with right user. // 3) Current UGI will get used by metastore when metastore is in embedded mode Runnable work = new BackgroundWork(getCurrentUGI(), parentSession.getSessionHive(), SessionState.getPerfLogger(), SessionState.get(), asyncPrepare); try { // This submit blocks if no background threads are available to run this operation Future<?> backgroundHandle = getParentSession().submitBackgroundOperation(work); setBackgroundHandle(backgroundHandle); } catch (RejectedExecutionException rejected) { setState(OperationState.ERROR); throw new HiveSQLException("The background threadpool cannot accept" + " new task for execution, please retry the operation", rejected); } } } private final class BackgroundWork implements Runnable { private final UserGroupInformation currentUGI; private final Hive parentHive; private final PerfLogger parentPerfLogger; private final SessionState parentSessionState; private final boolean asyncPrepare; private BackgroundWork(UserGroupInformation currentUGI, Hive parentHive, PerfLogger parentPerfLogger, SessionState parentSessionState, boolean asyncPrepare) { this.currentUGI = currentUGI; this.parentHive = parentHive; this.parentPerfLogger = parentPerfLogger; this.parentSessionState = parentSessionState; this.asyncPrepare = asyncPrepare; } @Override public void run() { PrivilegedExceptionAction<Object> doAsAction = new PrivilegedExceptionAction<Object>() { @Override public Object run() throws HiveSQLException { Hive.set(parentHive); // TODO: can this result in cross-thread reuse of session state? SessionState.setCurrentSessionState(parentSessionState); PerfLogger.setPerfLogger(parentPerfLogger); LogUtils.registerLoggingContext(queryState.getConf()); try { if (asyncPrepare) { prepare(queryState); } runQuery(); } catch (HiveSQLException e) { // TODO: why do we invent our own error path op top of the one from Future.get? setOperationException(e); LOG.error("Error running hive query: ", e); } finally { LogUtils.unregisterLoggingContext(); } return null; } }; try { currentUGI.doAs(doAsAction); } catch (Exception e) { setOperationException(new HiveSQLException(e)); LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); } finally { /** * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup * when this thread is garbage collected later. * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize() */ if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) { ThreadWithGarbageCleanup currentThread = (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread(); currentThread.cacheThreadLocalRawStore(); } } } } /** * Returns the current UGI on the stack * * @return UserGroupInformation * * @throws HiveSQLException */ private UserGroupInformation getCurrentUGI() throws HiveSQLException { try { return Utils.getUGI(); } catch (Exception e) { throw new HiveSQLException("Unable to get current user", e); } } private synchronized void cleanup(OperationState state) throws HiveSQLException { setState(state); //Need shut down background thread gracefully, driver.close will inform background thread //a cancel request is sent. if (shouldRunAsync() && state != OperationState.CANCELED && state != OperationState.TIMEDOUT) { Future<?> backgroundHandle = getBackgroundHandle(); if (backgroundHandle != null) { boolean success = backgroundHandle.cancel(true); String queryId = queryState.getQueryId(); if (success) { LOG.info("The running operation has been successfully interrupted: " + queryId); } else if (state == OperationState.CANCELED) { LOG.info("The running operation could not be cancelled, typically because it has already completed normally: " + queryId); } } } if (driver != null) { driver.close(); driver.destroy(); } driver = null; SessionState ss = SessionState.get(); if (ss == null) { LOG.warn("Operation seems to be in invalid state, SessionState is null"); } else { ss.deleteTmpOutputFile(); ss.deleteTmpErrOutputFile(); } // Shutdown the timeout thread if any, while closing this operation if ((timeoutExecutor != null) && (state != OperationState.TIMEDOUT) && (state.isTerminal())) { timeoutExecutor.shutdownNow(); } } @Override public void cancel(OperationState stateAfterCancel) throws HiveSQLException { String queryId = null; if (stateAfterCancel == OperationState.CANCELED) { queryId = queryState.getQueryId(); LOG.info("Cancelling the query execution: " + queryId); } cleanup(stateAfterCancel); cleanupOperationLog(); if (stateAfterCancel == OperationState.CANCELED) { LOG.info("Successfully cancelled the query: " + queryId); } } @Override public void close() throws HiveSQLException { cleanup(OperationState.CLOSED); cleanupOperationLog(); } @Override public TableSchema getResultSetSchema() throws HiveSQLException { // Since compilation is always a blocking RPC call, and schema is ready after compilation, // we can return when are in the RUNNING state. assertState(new ArrayList<OperationState>(Arrays.asList(OperationState.RUNNING, OperationState.FINISHED))); if (resultSchema == null) { resultSchema = new TableSchema(driver.getSchema()); } return resultSchema; } private transient final List<Object> convey = new ArrayList<Object>(); @Override public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { validateDefaultFetchOrientation(orientation); assertState(new ArrayList<OperationState>(Arrays.asList(OperationState.FINISHED))); FetchTask fetchTask = driver.getFetchTask(); boolean isBlobBased = false; if (fetchTask != null && fetchTask.getWork().isUsingThriftJDBCBinarySerDe()) { // Just fetch one blob if we've serialized thrift objects in final tasks maxRows = 1; isBlobBased = true; } driver.setMaxRows((int) maxRows); RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion(), isBlobBased); try { /* if client is requesting fetch-from-start and its not the first time reading from this operation * then reset the fetch position to beginning */ if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) { driver.resetFetch(); } fetchStarted = true; driver.setMaxRows((int) maxRows); if (driver.getResults(convey)) { return decode(convey, rowSet); } return rowSet; } catch (IOException e) { throw new HiveSQLException(e); } catch (CommandNeedRetryException e) { throw new HiveSQLException(e); } catch (Exception e) { throw new HiveSQLException(e); } finally { convey.clear(); } } @Override public String getTaskStatus() throws HiveSQLException { if (driver != null) { List<QueryDisplay.TaskDisplay> statuses = driver.getQueryDisplay().getTaskDisplays(); if (statuses != null) { ByteArrayOutputStream out = null; try { ObjectMapper mapper = new ObjectMapper(); out = new ByteArrayOutputStream(); mapper.writeValue(out, statuses); return out.toString("UTF-8"); } catch (JsonGenerationException e) { throw new HiveSQLException(e); } catch (JsonMappingException e) { throw new HiveSQLException(e); } catch (IOException e) { throw new HiveSQLException(e); } finally { if (out != null) { try { out.close(); } catch (IOException e) { throw new HiveSQLException(e); } } } } } // Driver not initialized return null; } private RowSet decode(List<Object> rows, RowSet rowSet) throws Exception { if (driver.isFetchingTable()) { return prepareFromRow(rows, rowSet); } return decodeFromString(rows, rowSet); } // already encoded to thrift-able object in ThriftFormatter private RowSet prepareFromRow(List<Object> rows, RowSet rowSet) throws Exception { for (Object row : rows) { rowSet.addRow((Object[]) row); } return rowSet; } private RowSet decodeFromString(List<Object> rows, RowSet rowSet) throws SQLException, SerDeException { getSerDe(); StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object[] deserializedFields = new Object[fieldRefs.size()]; Object rowObj; ObjectInspector fieldOI; int protocol = getProtocolVersion().getValue(); for (Object rowString : rows) { try { rowObj = serde.deserialize(new BytesWritable(((String)rowString).getBytes("UTF-8"))); } catch (UnsupportedEncodingException e) { throw new SerDeException(e); } for (int i = 0; i < fieldRefs.size(); i++) { StructField fieldRef = fieldRefs.get(i); fieldOI = fieldRef.getFieldObjectInspector(); Object fieldData = soi.getStructFieldData(rowObj, fieldRef); deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol); } rowSet.addRow(deserializedFields); } return rowSet; } private AbstractSerDe getSerDe() throws SQLException { if (serde != null) { return serde; } try { List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas(); StringBuilder namesSb = new StringBuilder(); StringBuilder typesSb = new StringBuilder(); if (fieldSchemas != null && !fieldSchemas.isEmpty()) { for (int pos = 0; pos < fieldSchemas.size(); pos++) { if (pos != 0) { namesSb.append(","); typesSb.append(","); } namesSb.append(fieldSchemas.get(pos).getName()); typesSb.append(fieldSchemas.get(pos).getType()); } } String names = namesSb.toString(); String types = typesSb.toString(); serde = new LazySimpleSerDe(); Properties props = new Properties(); if (names.length() > 0) { LOG.debug("Column names: " + names); props.setProperty(serdeConstants.LIST_COLUMNS, names); } if (types.length() > 0) { LOG.debug("Column types: " + types); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); } SerDeUtils.initializeSerDe(serde, queryState.getConf(), props, null); } catch (Exception ex) { ex.printStackTrace(); throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex); } return serde; } /** * Get summary information of this SQLOperation for display in WebUI. */ public QueryInfo getQueryInfo() { return queryInfo; } @Override protected void onNewState(OperationState state, OperationState prevState) { super.onNewState(state, prevState); currentSQLStateScope = updateOperationStateMetrics(currentSQLStateScope, MetricsConstant.SQL_OPERATION_PREFIX, MetricsConstant.COMPLETED_SQL_OPERATION_PREFIX, state); Metrics metrics = MetricsFactory.getInstance(); if (metrics != null) { // New state is changed to running from something else (user is active) if (state == OperationState.RUNNING && prevState != state) { incrementUserQueries(metrics); } // New state is not running (user not active) any more if (prevState == OperationState.RUNNING && prevState != state) { decrementUserQueries(metrics); } } if (state == OperationState.FINISHED || state == OperationState.CANCELED || state == OperationState.ERROR) { //update runtime queryInfo.setRuntime(getOperationComplete() - getOperationStart()); if (metrics != null && submittedQryScp != null) { metrics.endScope(submittedQryScp); } } if (state == OperationState.CLOSED) { queryInfo.setEndTime(); } else { //CLOSED state not interesting, state before (FINISHED, ERROR) is. queryInfo.updateState(state.toString()); } if (state == OperationState.ERROR) { markQueryMetric(MetricsFactory.getInstance(), MetricsConstant.HS2_FAILED_QUERIES); } if (state == OperationState.FINISHED) { markQueryMetric(MetricsFactory.getInstance(), MetricsConstant.HS2_SUCCEEDED_QUERIES); } } private void incrementUserQueries(Metrics metrics) { String username = parentSession.getUserName(); if (username != null) { synchronized (userQueries) { AtomicInteger count = userQueries.get(username); if (count == null) { count = new AtomicInteger(0); AtomicInteger prev = userQueries.put(username, count); if (prev == null) { metrics.incrementCounter(ACTIVE_SQL_USER); } else { count = prev; } } count.incrementAndGet(); } } } private void decrementUserQueries(Metrics metrics) { String username = parentSession.getUserName(); if (username != null) { synchronized (userQueries) { AtomicInteger count = userQueries.get(username); if (count != null && count.decrementAndGet() <= 0) { metrics.decrementCounter(ACTIVE_SQL_USER); userQueries.remove(username); } } } } private void markQueryMetric(Metrics metric, String name) { if(metric != null) { metric.markMeter(name); } } public String getExecutionEngine() { return queryState.getConf().getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); } }