/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.explore.service.hive;
import co.cask.cdap.api.dataset.Dataset;
import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.api.dataset.lib.PartitionDetail;
import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet;
import co.cask.cdap.app.runtime.scheduler.SchedulerQueueResolver;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.ConfigurationUtil;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.data.dataset.SystemDatasetInstantiator;
import co.cask.cdap.data.dataset.SystemDatasetInstantiatorFactory;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.data2.transaction.stream.StreamConfig;
import co.cask.cdap.explore.service.Explore;
import co.cask.cdap.explore.service.ExploreException;
import co.cask.cdap.explore.service.ExploreService;
import co.cask.cdap.explore.service.ExploreServiceUtils;
import co.cask.cdap.explore.service.ExploreTableManager;
import co.cask.cdap.explore.service.HandleNotFoundException;
import co.cask.cdap.explore.service.HiveStreamRedirector;
import co.cask.cdap.explore.service.MetaDataInfo;
import co.cask.cdap.explore.service.TableNotFoundException;
import co.cask.cdap.explore.utils.ExploreTableNaming;
import co.cask.cdap.hive.context.CConfCodec;
import co.cask.cdap.hive.context.ContextManager;
import co.cask.cdap.hive.context.HConfCodec;
import co.cask.cdap.hive.context.TxnCodec;
import co.cask.cdap.hive.datasets.DatasetStorageHandler;
import co.cask.cdap.hive.stream.StreamStorageHandler;
import co.cask.cdap.proto.ColumnDesc;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.QueryHandle;
import co.cask.cdap.proto.QueryInfo;
import co.cask.cdap.proto.QueryResult;
import co.cask.cdap.proto.QueryStatus;
import co.cask.cdap.proto.TableInfo;
import co.cask.cdap.proto.TableNameInfo;
import co.cask.cdap.store.NamespaceStore;
import co.cask.tephra.Transaction;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Charsets;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.AbstractIdleService;
import com.google.gson.Gson;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hive.service.auth.HiveAuthFactory;
import org.apache.hive.service.cli.CLIService;
import org.apache.hive.service.cli.ColumnDescriptor;
import org.apache.hive.service.cli.FetchOrientation;
import org.apache.hive.service.cli.GetInfoType;
import org.apache.hive.service.cli.GetInfoValue;
import org.apache.hive.service.cli.HiveSQLException;
import org.apache.hive.service.cli.OperationHandle;
import org.apache.hive.service.cli.SessionHandle;
import org.apache.hive.service.cli.TableSchema;
import org.apache.hive.service.cli.thrift.TColumnValue;
import org.apache.thrift.TException;
import org.apache.twill.common.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.WeakReference;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import javax.annotation.Nullable;
/**
* Defines common functionality used by different HiveExploreServices. The common functionality includes
* starting/stopping transactions, serializing configuration and saving operation information.
*
* Overridden {@link co.cask.cdap.explore.service.Explore} methods also call {@code startAndWait()},
* which effectively allows this {@link com.google.common.util.concurrent.Service} to not have to start
* until the first call to the explore methods is made. This is used for {@link Constants.Explore#START_ON_DEMAND},
* which, if true, does not start the {@link ExploreService} when the explore HTTP services are started.
*/
public abstract class BaseHiveExploreService extends AbstractIdleService implements ExploreService {
private static final Logger LOG = LoggerFactory.getLogger(BaseHiveExploreService.class);
private static final Gson GSON = new Gson();
private static final int PREVIEW_COUNT = 5;
private static final long METASTORE_CLIENT_CLEANUP_PERIOD = 60;
public static final String HIVE_METASTORE_TOKEN_KEY = "hive.metastore.token.signature";
public static final String SPARK_YARN_DIST_FILES = "spark.yarn.dist.files";
private final CConfiguration cConf;
private final Configuration hConf;
private final TransactionSystemClient txClient;
private final SchedulerQueueResolver schedulerQueueResolver;
// Handles that are running, or not yet completely fetched, they have longer timeout
private final Cache<QueryHandle, OperationInfo> activeHandleCache;
// Handles that don't have any more results to be fetched, they can be timed out aggressively.
private final Cache<QueryHandle, InactiveOperationInfo> inactiveHandleCache;
private final CLIService cliService;
private final ScheduledExecutorService scheduledExecutorService;
private final long cleanupJobSchedule;
private final File previewsDir;
private final ScheduledExecutorService metastoreClientsExecutorService;
private final StreamAdmin streamAdmin;
private final DatasetFramework datasetFramework;
private final ExploreTableManager exploreTableManager;
private final SystemDatasetInstantiatorFactory datasetInstantiatorFactory;
private final ExploreTableNaming tableNaming;
private final ThreadLocal<Supplier<IMetaStoreClient>> metastoreClientLocal;
// The following two fields are for tracking GC'ed metastore clients and be able to call close on them.
private final Map<Reference<? extends Supplier<IMetaStoreClient>>, IMetaStoreClient> metastoreClientReferences;
private final ReferenceQueue<Supplier<IMetaStoreClient>> metastoreClientReferenceQueue;
private final Map<String, String> sparkConf = new HashMap<>();
protected abstract QueryStatus doFetchStatus(OperationHandle handle) throws HiveSQLException, ExploreException,
HandleNotFoundException;
protected abstract OperationHandle doExecute(SessionHandle sessionHandle, String statement)
throws HiveSQLException, ExploreException;
protected BaseHiveExploreService(TransactionSystemClient txClient, DatasetFramework datasetFramework,
CConfiguration cConf, Configuration hConf,
File previewsDir, StreamAdmin streamAdmin, NamespaceStore store,
SystemDatasetInstantiatorFactory datasetInstantiatorFactory,
ExploreTableNaming tableNaming) {
this.cConf = cConf;
this.hConf = hConf;
this.schedulerQueueResolver = new SchedulerQueueResolver(cConf, store);
this.previewsDir = previewsDir;
this.metastoreClientLocal = new ThreadLocal<>();
this.metastoreClientReferences = Maps.newConcurrentMap();
this.metastoreClientReferenceQueue = new ReferenceQueue<>();
this.datasetFramework = datasetFramework;
this.streamAdmin = streamAdmin;
this.exploreTableManager = new ExploreTableManager(this, datasetInstantiatorFactory, new ExploreTableNaming());
this.datasetInstantiatorFactory = datasetInstantiatorFactory;
this.tableNaming = tableNaming;
// Create a Timer thread to periodically collect metastore clients that are no longer in used and call close on them
this.metastoreClientsExecutorService =
Executors.newSingleThreadScheduledExecutor(Threads.createDaemonThreadFactory("metastore-client-gc"));
this.scheduledExecutorService =
Executors.newSingleThreadScheduledExecutor(Threads.createDaemonThreadFactory("explore-handle-timeout"));
this.activeHandleCache =
CacheBuilder.newBuilder()
.expireAfterWrite(cConf.getLong(Constants.Explore.ACTIVE_OPERATION_TIMEOUT_SECS), TimeUnit.SECONDS)
.removalListener(new ActiveOperationRemovalHandler(this, scheduledExecutorService))
.build();
this.inactiveHandleCache =
CacheBuilder.newBuilder()
.expireAfterWrite(cConf.getLong(Constants.Explore.INACTIVE_OPERATION_TIMEOUT_SECS), TimeUnit.SECONDS)
.build();
this.cliService = createCLIService();
this.txClient = txClient;
ContextManager.saveContext(datasetFramework, streamAdmin, datasetInstantiatorFactory);
cleanupJobSchedule = cConf.getLong(Constants.Explore.CLEANUP_JOB_SCHEDULE_SECS);
LOG.info("Active handle timeout = {} secs", cConf.getLong(Constants.Explore.ACTIVE_OPERATION_TIMEOUT_SECS));
LOG.info("Inactive handle timeout = {} secs", cConf.getLong(Constants.Explore.INACTIVE_OPERATION_TIMEOUT_SECS));
LOG.info("Cleanup job schedule = {} secs", cleanupJobSchedule);
}
protected CLIService createCLIService() {
return new CLIService(null);
}
protected HiveConf getHiveConf() {
HiveConf conf = new HiveConf();
// Read delegation token if security is enabled.
if (UserGroupInformation.isSecurityEnabled()) {
conf.set(HIVE_METASTORE_TOKEN_KEY, HiveAuthFactory.HS2_CLIENT_TOKEN);
// mapreduce.job.credentials.binary is added by Hive only if Kerberos credentials are present and impersonation
// is enabled. However, in our case we don't have Kerberos credentials for Explore service.
// Hence it will not be automatically added by Hive, instead we have to add it ourselves.
// TODO: When Explore does secure impersonation this has to be the tokens of the user,
// TODO: ... and not the tokens of the service itself.
String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopAuthToken != null) {
conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
if ("tez".equals(conf.get("hive.execution.engine"))) {
// Add token file location property for tez if engine is tez
conf.set("tez.credentials.path", hadoopAuthToken);
}
}
}
// Since we use delegation token in HIVE, unset the SPNEGO authentication if it is
// enabled. Please see CDAP-3452 for details.
conf.unset("hive.server2.authentication.spnego.keytab");
conf.unset("hive.server2.authentication.spnego.principal");
return conf;
}
protected CLIService getCliService() {
return cliService;
}
private IMetaStoreClient getMetaStoreClient() throws ExploreException {
if (metastoreClientLocal.get() == null) {
try {
IMetaStoreClient client = new HiveMetaStoreClient(getHiveConf());
Supplier<IMetaStoreClient> supplier = Suppliers.ofInstance(client);
metastoreClientLocal.set(supplier);
// We use GC of the supplier as a signal for us to know that a thread is gone
// The supplier is set into the thread local, which will get GC'ed when the thread is gone.
// Since we use a weak reference key to the supplier that points to the client
// (in the metastoreClientReferences map), it won't block GC of the supplier instance.
// We can use the weak reference, which is retrieved through polling the ReferenceQueue,
// to get back the client and call close() on it.
metastoreClientReferences.put(
new WeakReference<>(supplier, metastoreClientReferenceQueue), client);
} catch (MetaException e) {
throw new ExploreException("Error initializing Hive Metastore client", e);
}
}
return metastoreClientLocal.get().get();
}
private void closeMetastoreClient(IMetaStoreClient client) {
try {
client.close();
} catch (Throwable t) {
LOG.error("Exception raised in closing Metastore client", t);
}
}
@Override
protected void startUp() throws Exception {
LOG.info("Starting {}...", BaseHiveExploreService.class.getSimpleName());
HiveConf hiveConf = getHiveConf();
if (ExploreServiceUtils.isSparkEngine(hiveConf)) {
LOG.info("Engine is spark");
setupSparkConf();
}
cliService.init(hiveConf);
cliService.start();
metastoreClientsExecutorService.scheduleWithFixedDelay(
new Runnable() {
@Override
public void run() {
Reference<? extends Supplier<IMetaStoreClient>> ref = metastoreClientReferenceQueue.poll();
while (ref != null) {
IMetaStoreClient client = metastoreClientReferences.remove(ref);
if (client != null) {
closeMetastoreClient(client);
}
ref = metastoreClientReferenceQueue.poll();
}
}
},
METASTORE_CLIENT_CLEANUP_PERIOD, METASTORE_CLIENT_CLEANUP_PERIOD, TimeUnit.SECONDS);
// Schedule the cache cleanup
scheduledExecutorService.scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
runCacheCleanup();
}
}, cleanupJobSchedule, cleanupJobSchedule, TimeUnit.SECONDS
);
}
@Override
protected void shutDown() throws Exception {
LOG.info("Stopping {}...", BaseHiveExploreService.class.getSimpleName());
// By this time we should not get anymore new requests, since HTTP service has already been stopped.
// Close all handles
if (!activeHandleCache.asMap().isEmpty()) {
LOG.info("Timing out active handles...");
}
activeHandleCache.invalidateAll();
// Make sure the cache entries get expired.
runCacheCleanup();
// Wait for all cleanup jobs to complete
scheduledExecutorService.awaitTermination(10, TimeUnit.SECONDS);
scheduledExecutorService.shutdown();
metastoreClientsExecutorService.shutdownNow();
// Go through all non-cleanup'ed clients and call close() upon them
for (IMetaStoreClient client : metastoreClientReferences.values()) {
closeMetastoreClient(client);
}
cliService.stop();
}
private void setupSparkConf() {
// Copy over hadoop configuration as spark properties since we don't localize hadoop conf dirs due to CDAP-5019
for (Map.Entry<String, String> entry : hConf) {
sparkConf.put("spark.hadoop." + entry.getKey(), hConf.get(entry.getKey()));
}
// don't localize config, we pass all hadoop configuration in spark properties
sparkConf.put("spark.yarn.localizeConfig", "false");
// Setup files to be copied over to spark containers
sparkConf.put(BaseHiveExploreService.SPARK_YARN_DIST_FILES,
System.getProperty(BaseHiveExploreService.SPARK_YARN_DIST_FILES));
if (UserGroupInformation.isSecurityEnabled()) {
// define metastore token key name
sparkConf.put("spark.hadoop." + HIVE_METASTORE_TOKEN_KEY, HiveAuthFactory.HS2_CLIENT_TOKEN);
// tokens are already provided for spark client
sparkConf.put("spark.yarn.security.tokens.hive.enabled", "false");
sparkConf.put("spark.yarn.security.tokens.hbase.enabled", "false");
// Hive needs to ignore security settings while running spark job
sparkConf.put(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.toString(), "NONE");
sparkConf.put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.toString(), "false");
}
}
@Override
public QueryHandle getColumns(String catalog, String schemaPattern, String tableNamePattern, String columnNamePattern)
throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(schemaPattern);
operationHandle = cliService.getColumns(sessionHandle, catalog, database,
tableNamePattern, columnNamePattern);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", database);
LOG.trace("Retrieving columns: catalog {}, schemaPattern {}, tableNamePattern {}, columnNamePattern {}",
catalog, database, tableNamePattern, columnNamePattern);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getCatalogs() throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
operationHandle = cliService.getCatalogs(sessionHandle);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", "");
LOG.trace("Retrieving catalogs");
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getSchemas(String catalog, String schemaPattern) throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(schemaPattern);
operationHandle = cliService.getSchemas(sessionHandle, catalog, database);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", database);
LOG.trace("Retrieving schemas: catalog {}, schema {}", catalog, database);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getFunctions(String catalog, String schemaPattern, String functionNamePattern)
throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(schemaPattern);
operationHandle = cliService.getFunctions(sessionHandle, catalog,
database, functionNamePattern);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", database);
LOG.trace("Retrieving functions: catalog {}, schema {}, function {}",
catalog, database, functionNamePattern);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public MetaDataInfo getInfo(MetaDataInfo.InfoType infoType) throws ExploreException, SQLException {
startAndWait();
try {
MetaDataInfo ret = infoType.getDefaultValue();
if (ret != null) {
return ret;
}
SessionHandle sessionHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
// Convert to GetInfoType
GetInfoType hiveInfoType = null;
for (GetInfoType t : GetInfoType.values()) {
if (t.name().equals("CLI_" + infoType.name())) {
hiveInfoType = t;
break;
}
}
if (hiveInfoType == null) {
// Should not come here, unless there is a mismatch between Explore and Hive info types.
LOG.warn("Could not find Hive info type %s", infoType);
return null;
}
GetInfoValue val = cliService.getInfo(sessionHandle, hiveInfoType);
LOG.trace("Retrieving info: {}, got value {}", infoType, val);
return new MetaDataInfo(val.getStringValue(), val.getShortValue(), val.getIntValue(), val.getLongValue());
} finally {
closeInternal(getQueryHandle(sessionConf), new ReadOnlyOperationInfo(sessionHandle, null, sessionConf, "", ""));
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getTables(String catalog, String schemaPattern, String tableNamePattern,
List<String> tableTypes) throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(schemaPattern);
operationHandle = cliService.getTables(sessionHandle, catalog, database,
tableNamePattern, tableTypes);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", database);
LOG.trace("Retrieving tables: catalog {}, schemaNamePattern {}, tableNamePattern {}, tableTypes {}",
catalog, database, tableNamePattern, tableTypes);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public List<TableNameInfo> getTables(@Nullable final String database) throws ExploreException {
startAndWait();
// TODO check if the database user is allowed to access if security is enabled
try {
List<String> databases;
if (database == null) {
databases = getMetaStoreClient().getAllDatabases();
} else {
databases = ImmutableList.of(getHiveDatabase(database));
}
ImmutableList.Builder<TableNameInfo> builder = ImmutableList.builder();
for (String db : databases) {
List<String> tables = getMetaStoreClient().getAllTables(db);
for (String table : tables) {
builder.add(new TableNameInfo(db, table));
}
}
return builder.build();
} catch (TException e) {
throw new ExploreException("Error connecting to Hive metastore", e);
}
}
@Override
public TableInfo getTableInfo(@Nullable String database, String table)
throws ExploreException, TableNotFoundException {
startAndWait();
// TODO check if the database user is allowed to access if security is enabled
try {
String db = database == null ? "default" : getHiveDatabase(database);
Table tableInfo = getMetaStoreClient().getTable(db, table);
List<FieldSchema> tableFields = tableInfo.getSd().getCols();
// for whatever reason, it seems like the table columns for partitioned tables are not present
// in the storage descriptor. If columns are missing, do a separate call for schema.
if (tableFields == null || tableFields.isEmpty()) {
// don't call .getSchema()... class not found exception if we do in the thrift code...
tableFields = getMetaStoreClient().getFields(db, table);
}
ImmutableList.Builder<TableInfo.ColumnInfo> schemaBuilder = ImmutableList.builder();
Set<String> fieldNames = Sets.newHashSet();
for (FieldSchema column : tableFields) {
schemaBuilder.add(new TableInfo.ColumnInfo(column.getName(), column.getType(), column.getComment()));
fieldNames.add(column.getName());
}
ImmutableList.Builder<TableInfo.ColumnInfo> partitionKeysBuilder = ImmutableList.builder();
for (FieldSchema column : tableInfo.getPartitionKeys()) {
TableInfo.ColumnInfo columnInfo = new TableInfo.ColumnInfo(column.getName(), column.getType(),
column.getComment());
partitionKeysBuilder.add(columnInfo);
// add partition keys to the schema if they are not already there,
// since they show up when you do a 'describe <table>' command.
if (!fieldNames.contains(column.getName())) {
schemaBuilder.add(columnInfo);
}
}
// its a cdap generated table if it uses our storage handler, or if a property is set on the table.
String cdapName = null;
Map<String, String> tableParameters = tableInfo.getParameters();
if (tableParameters != null) {
cdapName = tableParameters.get(Constants.Explore.CDAP_NAME);
}
// tables created after CDAP 2.6 should set the "cdap.name" property, but older ones
// do not. So also check if it uses a cdap storage handler.
String storageHandler = tableInfo.getParameters().get("storage_handler");
boolean isDatasetTable = cdapName != null ||
DatasetStorageHandler.class.getName().equals(storageHandler) ||
StreamStorageHandler.class.getName().equals(storageHandler);
return new TableInfo(tableInfo.getTableName(), tableInfo.getDbName(), tableInfo.getOwner(),
(long) tableInfo.getCreateTime() * 1000, (long) tableInfo.getLastAccessTime() * 1000,
tableInfo.getRetention(), partitionKeysBuilder.build(), tableInfo.getParameters(),
tableInfo.getTableType(), schemaBuilder.build(), tableInfo.getSd().getLocation(),
tableInfo.getSd().getInputFormat(), tableInfo.getSd().getOutputFormat(),
tableInfo.getSd().isCompressed(), tableInfo.getSd().getNumBuckets(),
tableInfo.getSd().getSerdeInfo().getSerializationLib(),
tableInfo.getSd().getSerdeInfo().getParameters(), isDatasetTable);
} catch (NoSuchObjectException e) {
throw new TableNotFoundException(e);
} catch (TException e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getTableTypes() throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
operationHandle = cliService.getTableTypes(sessionHandle);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", "");
LOG.trace("Retrieving table types");
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle getTypeInfo() throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
operationHandle = cliService.getTypeInfo(sessionHandle);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, "", "");
LOG.trace("Retrieving type info");
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle createNamespace(Id.Namespace namespace) throws ExploreException, SQLException {
startAndWait();
try {
// Even with the "IF NOT EXISTS" in the create command, Hive still logs a non-fatal warning internally
// when attempting to create the "default" namespace (since it already exists in Hive).
// This check prevents the extra warn log.
if (Id.Namespace.DEFAULT.equals(namespace)) {
return QueryHandle.NO_OP;
}
Map<String, String> sessionConf = startSession();
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
try {
sessionHandle = cliService.openSession("", "", sessionConf);
String database = getHiveDatabase(namespace.getId());
// "IF NOT EXISTS" so that this operation is idempotent.
String statement = String.format("CREATE DATABASE IF NOT EXISTS %s", database);
operationHandle = doExecute(sessionHandle, statement);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, statement, database);
LOG.info("Creating database {} with handle {}", namespace, handle);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle deleteNamespace(Id.Namespace namespace) throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
Map<String, String> sessionConf = startSession();
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(namespace.getId());
String statement = String.format("DROP DATABASE %s", database);
operationHandle = doExecute(sessionHandle, statement);
QueryHandle handle = saveReadOnlyOperation(operationHandle, sessionHandle, sessionConf, statement, database);
LOG.info("Deleting database {} with handle {}", database, handle);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryHandle execute(Id.Namespace namespace, String statement) throws ExploreException, SQLException {
startAndWait();
try {
SessionHandle sessionHandle = null;
OperationHandle operationHandle = null;
LOG.trace("Got statement: {}", statement);
Map<String, String> sessionConf = startSession(namespace);
try {
sessionHandle = openHiveSession(sessionConf);
String database = getHiveDatabase(namespace.getId());
// Switch database to the one being passed in.
setCurrentDatabase(database);
operationHandle = doExecute(sessionHandle, statement);
QueryHandle handle = saveReadWriteOperation(operationHandle, sessionHandle, sessionConf,
statement, database);
LOG.trace("Executing statement: {} with handle {}", statement, handle);
return handle;
} catch (Throwable e) {
closeInternal(getQueryHandle(sessionConf),
new ReadWriteOperationInfo(sessionHandle, operationHandle, sessionConf, "", ""));
throw e;
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} catch (Throwable e) {
throw new ExploreException(e);
}
}
@Override
public QueryStatus getStatus(QueryHandle handle) throws ExploreException, HandleNotFoundException, SQLException {
startAndWait();
InactiveOperationInfo inactiveOperationInfo = inactiveHandleCache.getIfPresent(handle);
if (inactiveOperationInfo != null) {
// Operation has been made inactive, so return the saved status.
LOG.trace("Returning saved status for inactive handle {}", handle);
return inactiveOperationInfo.getStatus();
}
try {
// Fetch status from Hive
QueryStatus status = fetchStatus(getOperationInfo(handle));
LOG.trace("Status of handle {} is {}", handle, status);
// No results or error, so can be timed out aggressively
if (status.getStatus() == QueryStatus.OpStatus.FINISHED && !status.hasResults()) {
// In case of a query that writes to a Dataset, we will always fall into this condition,
// and timing out aggressively will also close the transaction and make the writes visible
timeoutAggressively(handle, getResultSchema(handle), status);
} else if (status.getStatus() == QueryStatus.OpStatus.ERROR) {
// getResultSchema will fail if the query is in error
timeoutAggressively(handle, ImmutableList.<ColumnDesc>of(), status);
}
return status;
} catch (HiveSQLException e) {
throw getSqlException(e);
}
}
@Override
public List<QueryResult> nextResults(QueryHandle handle, int size)
throws ExploreException, HandleNotFoundException, SQLException {
startAndWait();
InactiveOperationInfo inactiveOperationInfo = inactiveHandleCache.getIfPresent(handle);
if (inactiveOperationInfo != null) {
// Operation has been made inactive, so all results should have been fetched already - return empty list.
LOG.trace("Returning empty result for inactive handle {}", handle);
return ImmutableList.of();
}
try {
List<QueryResult> results = fetchNextResults(handle, size);
QueryStatus status = getStatus(handle);
if (results.isEmpty() && status.getStatus() == QueryStatus.OpStatus.FINISHED) {
// Since operation has fetched all the results, handle can be timed out aggressively.
timeoutAggressively(handle, getResultSchema(handle), status);
}
return results;
} catch (HiveSQLException e) {
throw getSqlException(e);
}
}
protected abstract List<QueryResult> doFetchNextResults(OperationHandle handle,
FetchOrientation fetchOrientation,
int size) throws Exception;
@SuppressWarnings("unchecked")
protected List<QueryResult> fetchNextResults(QueryHandle handle, int size)
throws HiveSQLException, ExploreException, HandleNotFoundException {
startAndWait();
Lock nextLock = getOperationInfo(handle).getNextLock();
nextLock.lock();
try {
// Fetch results from Hive
LOG.trace("Getting results for handle {}", handle);
OperationHandle operationHandle = getOperationHandle(handle);
if (operationHandle.hasResultSet()) {
return doFetchNextResults(operationHandle, FetchOrientation.FETCH_NEXT, size);
} else {
return Collections.emptyList();
}
} catch (Exception e) {
throw Throwables.propagate(e);
} finally {
nextLock.unlock();
}
}
@Override
public List<QueryResult> previewResults(QueryHandle handle)
throws ExploreException, HandleNotFoundException, SQLException {
startAndWait();
if (inactiveHandleCache.getIfPresent(handle) != null) {
throw new HandleNotFoundException("Query is inactive.", true);
}
OperationInfo operationInfo = getOperationInfo(handle);
Lock previewLock = operationInfo.getPreviewLock();
previewLock.lock();
try {
File previewFile = operationInfo.getPreviewFile();
if (previewFile != null) {
try {
Reader reader = com.google.common.io.Files.newReader(previewFile, Charsets.UTF_8);
try {
return GSON.fromJson(reader, new TypeToken<List<QueryResult>>() { }.getType());
} finally {
Closeables.closeQuietly(reader);
}
} catch (FileNotFoundException e) {
LOG.error("Could not retrieve preview result file {}", previewFile, e);
throw new ExploreException(e);
}
}
try {
// Create preview results for query
previewFile = new File(previewsDir, handle.getHandle());
try (FileWriter fileWriter = new FileWriter(previewFile)) {
List<QueryResult> results = fetchNextResults(handle, PREVIEW_COUNT);
GSON.toJson(results, fileWriter);
operationInfo.setPreviewFile(previewFile);
return results;
}
} catch (IOException e) {
LOG.error("Could not write preview results into file", e);
throw new ExploreException(e);
}
} finally {
previewLock.unlock();
}
}
@Override
public List<ColumnDesc> getResultSchema(QueryHandle handle)
throws ExploreException, HandleNotFoundException, SQLException {
startAndWait();
try {
InactiveOperationInfo inactiveOperationInfo = inactiveHandleCache.getIfPresent(handle);
if (inactiveOperationInfo != null) {
// Operation has been made inactive, so return saved schema.
LOG.trace("Returning saved schema for inactive handle {}", handle);
return inactiveOperationInfo.getSchema();
}
// Fetch schema from hive
LOG.trace("Getting schema for handle {}", handle);
OperationHandle operationHandle = getOperationHandle(handle);
return getResultSchemaInternal(operationHandle);
} catch (HiveSQLException e) {
throw getSqlException(e);
}
}
protected List<ColumnDesc> getResultSchemaInternal(OperationHandle operationHandle) throws SQLException {
ImmutableList.Builder<ColumnDesc> listBuilder = ImmutableList.builder();
if (operationHandle.hasResultSet()) {
TableSchema tableSchema = cliService.getResultSetMetadata(operationHandle);
for (ColumnDescriptor colDesc : tableSchema.getColumnDescriptors()) {
listBuilder.add(new ColumnDesc(colDesc.getName(), colDesc.getTypeName(),
colDesc.getOrdinalPosition(), colDesc.getComment()));
}
}
return listBuilder.build();
}
private void setCurrentDatabase(String dbName) throws Throwable {
SessionState.get().setCurrentDatabase(dbName);
}
/**
* Cancel a running Hive operation. After the operation moves into a {@link QueryStatus.OpStatus#CANCELED},
* {@link #close(QueryHandle)} needs to be called to release resources.
*
* @param handle handle returned by {@link Explore#execute(Id.Namespace, String)}.
* @throws ExploreException on any error cancelling operation.
* @throws HandleNotFoundException when handle is not found.
* @throws SQLException if there are errors in the SQL statement.
*/
void cancelInternal(QueryHandle handle) throws ExploreException, HandleNotFoundException, SQLException {
try {
InactiveOperationInfo inactiveOperationInfo = inactiveHandleCache.getIfPresent(handle);
if (inactiveOperationInfo != null) {
// Operation has been made inactive, so no point in cancelling it.
LOG.trace("Not running cancel for inactive handle {}", handle);
return;
}
LOG.trace("Cancelling operation {}", handle);
cliService.cancelOperation(getOperationHandle(handle));
} catch (HiveSQLException e) {
throw getSqlException(e);
}
}
@Override
public void close(QueryHandle handle) throws ExploreException, HandleNotFoundException {
startAndWait();
inactiveHandleCache.invalidate(handle);
activeHandleCache.invalidate(handle);
}
@Override
public List<QueryInfo> getQueries(Id.Namespace namespace) throws ExploreException, SQLException {
startAndWait();
List<QueryInfo> result = Lists.newArrayList();
String namespaceHiveDb = getHiveDatabase(namespace.getId());
for (Map.Entry<QueryHandle, OperationInfo> entry : activeHandleCache.asMap().entrySet()) {
try {
if (entry.getValue().getNamespace().equals(namespaceHiveDb)) {
// we use empty query statement for get tables, get schemas, we don't need to return it this method call.
if (!entry.getValue().getStatement().isEmpty()) {
QueryStatus status = getStatus(entry.getKey());
result.add(new QueryInfo(entry.getValue().getTimestamp(), entry.getValue().getStatement(),
entry.getKey(), status, true));
}
}
} catch (HandleNotFoundException e) {
// ignore the handle not found exception. this method returns all queries and handle, if the
// handle is removed from the internal cache, then there is no point returning them from here.
}
}
for (Map.Entry<QueryHandle, InactiveOperationInfo> entry : inactiveHandleCache.asMap().entrySet()) {
InactiveOperationInfo inactiveOperationInfo = entry.getValue();
if (inactiveOperationInfo.getNamespace().equals(getHiveDatabase(namespace.getId()))) {
// we use empty query statement for get tables, get schemas, we don't need to return it this method call.
if (!inactiveOperationInfo.getStatement().isEmpty()) {
if (inactiveOperationInfo.getStatus() == null) {
LOG.error("Null status for query {}, handle {}", inactiveOperationInfo.getStatement(), entry.getKey());
}
result.add(new QueryInfo(inactiveOperationInfo.getTimestamp(),
inactiveOperationInfo.getStatement(), entry.getKey(),
inactiveOperationInfo.getStatus(), false));
}
}
}
Collections.sort(result);
return result;
}
@Override
public int getActiveQueryCount(Id.Namespace namespace) throws ExploreException {
startAndWait();
int count = 0;
String namespaceHiveDb = getHiveDatabase(namespace.getId());
for (Map.Entry<QueryHandle, OperationInfo> entry : activeHandleCache.asMap().entrySet()) {
if (entry.getValue().getNamespace().equals(namespaceHiveDb)) {
// we use empty query statement for get tables, get schemas, we don't need to return it this method call.
if (!entry.getValue().getStatement().isEmpty()) {
count++;
}
}
}
return count;
}
// this upgrade code is for upgrading CDAP v2.6 to v2.8 and above.
@Override
public void upgrade() throws Exception {
// all old CDAP tables used to be in the default database
LOG.info("Checking for tables that need upgrade...");
List<TableNameInfo> tables = getTables("default");
for (TableNameInfo tableNameInfo : tables) {
String tableName = tableNameInfo.getTableName();
TableInfo tableInfo = getTableInfo(tableNameInfo.getDatabaseName(), tableName);
if (!requiresUpgrade(tableInfo)) {
continue;
}
// wait for dataset service to come up. it will be needed when creating tables
waitForDatasetService(600);
String storageHandler = tableInfo.getParameters().get("storage_handler");
if (StreamStorageHandler.class.getName().equals(storageHandler) && tableName.startsWith("cdap_")) {
LOG.info("Upgrading stream table {}", tableName);
upgradeStreamTable(tableInfo);
} else if (DatasetStorageHandler.class.getName().equals(storageHandler) && tableName.startsWith("cdap_")) {
LOG.info("Upgrading record scannable dataset table {}.", tableName);
upgradeRecordScannableTable(tableInfo);
} else if (tableName.startsWith("cdap_")) {
LOG.info("Upgrading file set table {}.", tableName);
// handle filesets differently since they can have partitions,
// and dropping the table will remove all partitions
upgradeFilesetTable(tableInfo);
}
}
}
private void waitForDatasetService(int secondsToWait) throws InterruptedException {
int count = 0;
LOG.info("Waiting for dataset service to come up before upgrading Explore.");
while (count < secondsToWait) {
try {
datasetFramework.getInstances(Id.Namespace.DEFAULT);
LOG.info("Dataset service is up and running, proceeding with explore upgrade.");
return;
} catch (Exception e) {
count++;
TimeUnit.SECONDS.sleep(1);
}
}
LOG.error("Timed out waiting for dataset service to come up. Restart CDAP Master to upgrade old Hive tables.");
}
private void upgradeFilesetTable(TableInfo tableInfo) throws Exception {
// these were only available starting from CDAP 2.7, which has the cdap name in table properties
String dsName = tableInfo.getParameters().get(Constants.Explore.CDAP_NAME);
// except the name was always prefixed by cdap.user.<name>
dsName = dsName.substring("cdap.user.".length(), dsName.length());
Id.DatasetInstance datasetID = Id.DatasetInstance.from(Id.Namespace.DEFAULT, dsName);
DatasetSpecification spec = datasetFramework.getDatasetSpec(datasetID);
// enable the new table
enableDataset(datasetID, spec);
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
Dataset dataset = datasetInstantiator.getDataset(datasetID);
// if this is a time partitioned file set, we need to add all partitions
if (dataset instanceof TimePartitionedFileSet) {
TimePartitionedFileSet tpfs = (TimePartitionedFileSet) dataset;
Set<PartitionDetail> partitionDetails = tpfs.getPartitions(null);
if (!partitionDetails.isEmpty()) {
QueryHandle handle = exploreTableManager.addPartitions(datasetID, partitionDetails);
QueryStatus status = waitForCompletion(handle);
// if add partitions failed, stop
if (status.getStatus() != QueryStatus.OpStatus.FINISHED) {
throw new ExploreException("Failed to add all partitions to dataset " + datasetID);
}
}
}
}
// now it is safe to drop the old table
dropTable(tableInfo.getTableName());
}
private void upgradeRecordScannableTable(TableInfo tableInfo) throws Exception {
// get the dataset name from the serde properties.
Map<String, String> serdeProperties = tableInfo.getSerdeParameters();
String datasetName = serdeProperties.get(Constants.Explore.DATASET_NAME);
// except the name was always prefixed by cdap.user.<name>
datasetName = datasetName.substring("cdap.user.".length(), datasetName.length());
Id.DatasetInstance datasetID = Id.DatasetInstance.from(Id.Namespace.DEFAULT, datasetName);
DatasetSpecification spec = datasetFramework.getDatasetSpec(datasetID);
// if there are no partitions, we can just enable the new table and drop the old one.
enableDataset(datasetID, spec);
dropTable(tableInfo.getTableName());
}
private void enableDataset(Id.DatasetInstance datasetID, DatasetSpecification spec) throws Exception {
LOG.info("Enabling exploration on dataset {}", datasetID);
QueryHandle enableHandle = exploreTableManager.enableDataset(datasetID, spec);
// wait until enable is done
QueryStatus status = waitForCompletion(enableHandle);
// if enable failed, stop
if (status.getStatus() != QueryStatus.OpStatus.FINISHED) {
throw new ExploreException("Failed to enable exploration of dataset " + datasetID);
}
}
private void dropTable(String tableName) throws Exception {
LOG.info("Dropping old upgraded table {}", tableName);
QueryHandle disableHandle = execute(Id.Namespace.DEFAULT, "DROP TABLE IF EXISTS " + tableName);
// make sure disable finished
QueryStatus status = waitForCompletion(disableHandle);
if (status.getStatus() != QueryStatus.OpStatus.FINISHED) {
throw new ExploreException("Failed to disable old Hive table " + tableName);
}
}
private void upgradeStreamTable(TableInfo tableInfo) throws Exception {
// get the stream name from the serde properties.
Map<String, String> serdeProperties = tableInfo.getSerdeParameters();
String streamName = serdeProperties.get(Constants.Explore.STREAM_NAME);
Id.Stream streamID = Id.Stream.from(Id.Namespace.DEFAULT, streamName);
// enable the table in the default namespace
LOG.info("Enabling exploration on stream {}", streamID);
StreamConfig streamConfig = streamAdmin.getConfig(streamID);
QueryHandle enableHandle = exploreTableManager.enableStream(
tableNaming.getTableName(streamID), streamID, streamConfig.getFormat());
// wait til enable is done
QueryStatus status = waitForCompletion(enableHandle);
// if enable failed, stop
if (status.getStatus() != QueryStatus.OpStatus.FINISHED) {
throw new ExploreException("Failed to enable exploration of stream " + streamID);
}
// safe to disable old table now
dropTable(tableInfo.getTableName());
}
private QueryStatus waitForCompletion(QueryHandle handle) throws HandleNotFoundException, SQLException,
ExploreException, InterruptedException {
QueryStatus status = getStatus(handle);
while (!status.getStatus().isDone()) {
TimeUnit.SECONDS.sleep(1);
status = getStatus(handle);
}
return status;
}
private boolean requiresUpgrade(TableInfo tableInfo) {
// if this is a cdap dataset.
if (tableInfo.isBackedByDataset()) {
String cdapVersion = tableInfo.getParameters().get(Constants.Explore.CDAP_VERSION);
// for now, good enough to check if it contains the version or not.
// In the future we can actually do version comparison with ProjectInfo.Version
return cdapVersion == null;
}
return false;
}
void closeInternal(QueryHandle handle, OperationInfo opInfo)
throws ExploreException, SQLException {
try {
LOG.trace("Closing operation {}", handle);
if (opInfo.getOperationHandle() != null) {
cliService.closeOperation(opInfo.getOperationHandle());
}
} catch (HiveSQLException e) {
throw getSqlException(e);
} finally {
try {
if (opInfo.getSessionHandle() != null) {
closeHiveSession(opInfo.getSessionHandle());
}
} finally {
cleanUp(handle, opInfo);
}
}
}
private SessionHandle openHiveSession(Map<String, String> sessionConf) throws HiveSQLException {
SessionHandle sessionHandle = doOpenHiveSession(sessionConf);
try {
HiveStreamRedirector.redirectToLogger(SessionState.get());
} catch (Throwable t) {
LOG.error("Error redirecting Hive output streams to logs.", t);
}
return sessionHandle;
}
protected SessionHandle doOpenHiveSession(Map<String, String> sessionConf) throws HiveSQLException {
return cliService.openSession("", "", sessionConf);
}
private void closeHiveSession(SessionHandle sessionHandle) {
try {
cliService.closeSession(sessionHandle);
} catch (Throwable e) {
LOG.error("Got error closing session", e);
}
}
private String getHiveDatabase(@Nullable String namespace) {
// null namespace implies that the operation happens across all databases
if (namespace == null) {
return null;
}
String tablePrefix = cConf.get(Constants.Dataset.TABLE_PREFIX);
return namespace.equals(Id.Namespace.DEFAULT.getId()) ? namespace : String.format("%s_%s", tablePrefix, namespace);
}
/**
* Starts a long running transaction, and also sets up session configuration.
* @return configuration for a hive session that contains a transaction, and serialized CDAP configuration and
* HBase configuration. This will be used by the map-reduce tasks started by Hive.
* @throws IOException
* @throws ExploreException
*/
protected Map<String, String> startSession() throws IOException, ExploreException {
return startSession(null);
}
protected Map<String, String> startSession(Id.Namespace namespace) throws IOException, ExploreException {
Map<String, String> sessionConf = Maps.newHashMap();
QueryHandle queryHandle = QueryHandle.generate();
sessionConf.put(Constants.Explore.QUERY_ID, queryHandle.getHandle());
String schedulerQueue = namespace != null ? schedulerQueueResolver.getQueue(namespace)
: schedulerQueueResolver.getDefaultQueue();
if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
sessionConf.put(JobContext.QUEUE_NAME, schedulerQueue);
}
Transaction tx = startTransaction();
ConfigurationUtil.set(sessionConf, Constants.Explore.TX_QUERY_KEY, TxnCodec.INSTANCE, tx);
ConfigurationUtil.set(sessionConf, Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE, cConf);
ConfigurationUtil.set(sessionConf, Constants.Explore.HCONF_KEY, HConfCodec.INSTANCE, hConf);
if (ExploreServiceUtils.isSparkEngine(getHiveConf())) {
sessionConf.putAll(sparkConf);
}
if (UserGroupInformation.isSecurityEnabled()) {
// make sure RM does not cancel delegation tokens after the query is run
sessionConf.put("mapreduce.job.complete.cancel.delegation.tokens", "false");
// refresh delegations for the job - TWILL-170
updateTokenStore();
}
return sessionConf;
}
/**
* Updates the token store to be used for the hive job, based upon the Explore container's credentials.
* This is because twill doesn't update the container_tokens on upon token refresh.
* See: https://issues.apache.org/jira/browse/TWILL-170
*/
private void updateTokenStore() throws IOException, ExploreException {
String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopTokenFileLocation == null) {
LOG.warn("Skipping update of token store due to failure to find environment variable '{}'.",
UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
return;
}
Path credentialsFile = Paths.get(hadoopTokenFileLocation);
FileAttribute<Set<PosixFilePermission>> originalPermissionAttributes =
PosixFilePermissions.asFileAttribute(Files.getPosixFilePermissions(credentialsFile));
Path tmpFile = Files.createTempFile(credentialsFile.getParent(), "credentials.store", null,
originalPermissionAttributes);
LOG.debug("Writing to temporary file: {}", tmpFile);
try (DataOutputStream os = new DataOutputStream(Files.newOutputStream(tmpFile))) {
Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
credentials.writeTokenStorageToStream(os);
}
Files.move(tmpFile, credentialsFile, StandardCopyOption.ATOMIC_MOVE);
LOG.debug("Secure store saved to {}", credentialsFile);
}
protected QueryHandle getQueryHandle(Map<String, String> sessionConf) throws HandleNotFoundException {
return QueryHandle.fromId(sessionConf.get(Constants.Explore.QUERY_ID));
}
/**
* Returns {@link OperationHandle} associated with Explore {@link QueryHandle}.
* @param handle explore handle.
* @return OperationHandle.
* @throws ExploreException
*/
protected OperationHandle getOperationHandle(QueryHandle handle) throws ExploreException, HandleNotFoundException {
return getOperationInfo(handle).getOperationHandle();
}
protected QueryStatus fetchStatus(OperationInfo operationInfo)
throws ExploreException, HandleNotFoundException, HiveSQLException {
QueryStatus queryStatus = doFetchStatus(operationInfo.getOperationHandle());
operationInfo.setStatus(queryStatus);
return queryStatus;
}
/**
* Saves information associated with a Hive operation that is read-only on Datasets.
* @param operationHandle {@link OperationHandle} of the Hive operation running.
* @param sessionHandle {@link SessionHandle} for the Hive operation running.
* @param sessionConf configuration for the session running the Hive operation.
* @param statement SQL statement executed with the call.
* @return {@link QueryHandle} that represents the Hive operation being run.
*/
protected QueryHandle saveReadOnlyOperation(OperationHandle operationHandle, SessionHandle sessionHandle,
Map<String, String> sessionConf, String statement, String namespace) {
QueryHandle handle = QueryHandle.fromId(sessionConf.get(Constants.Explore.QUERY_ID));
activeHandleCache.put(handle,
new ReadOnlyOperationInfo(sessionHandle, operationHandle, sessionConf, statement, namespace));
return handle;
}
/**
* Saves information associated with a Hive operation that writes to a Dataset.
* @param operationHandle {@link OperationHandle} of the Hive operation running.
* @param sessionHandle {@link SessionHandle} for the Hive operation running.
* @param sessionConf configuration for the session running the Hive operation.
* @param statement SQL statement executed with the call.
* @return {@link QueryHandle} that represents the Hive operation being run.
*/
protected QueryHandle saveReadWriteOperation(OperationHandle operationHandle, SessionHandle sessionHandle,
Map<String, String> sessionConf, String statement, String namespace) {
QueryHandle handle = QueryHandle.fromId(sessionConf.get(Constants.Explore.QUERY_ID));
activeHandleCache.put(handle,
new ReadWriteOperationInfo(sessionHandle, operationHandle,
sessionConf, statement, namespace));
return handle;
}
/**
* Called after a handle has been used to fetch all its results. This handle can be timed out aggressively.
* It also closes associated transaction.
*
* @param handle operation handle.
*/
private void timeoutAggressively(QueryHandle handle, List<ColumnDesc> schema, QueryStatus status)
throws HandleNotFoundException {
OperationInfo opInfo = activeHandleCache.getIfPresent(handle);
if (opInfo == null) {
LOG.trace("Could not find OperationInfo for handle {}, it might already have been moved to inactive list",
handle);
return;
}
closeTransaction(handle, opInfo);
LOG.trace("Timing out handle {} aggressively", handle);
inactiveHandleCache.put(handle, new InactiveOperationInfo(opInfo, schema, status));
activeHandleCache.invalidate(handle);
}
private OperationInfo getOperationInfo(QueryHandle handle) throws HandleNotFoundException {
// First look in running handles and handles that still can be fetched.
OperationInfo opInfo = activeHandleCache.getIfPresent(handle);
if (opInfo != null) {
return opInfo;
}
throw new HandleNotFoundException("Invalid handle provided");
}
/**
* Cleans up the metadata associated with active {@link QueryHandle}. It also closes associated transaction.
* @param handle handle of the running Hive operation.
*/
protected void cleanUp(QueryHandle handle, OperationInfo opInfo) {
try {
if (opInfo.getPreviewFile() != null) {
opInfo.getPreviewFile().delete();
}
closeTransaction(handle, opInfo);
} finally {
activeHandleCache.invalidate(handle);
}
}
private Transaction startTransaction() throws IOException {
Transaction tx = txClient.startLong();
LOG.trace("Transaction {} started.", tx);
return tx;
}
private void closeTransaction(QueryHandle handle, OperationInfo opInfo) {
try {
String txCommitted = opInfo.getSessionConf().get(Constants.Explore.TX_QUERY_CLOSED);
if (txCommitted != null && Boolean.parseBoolean(txCommitted)) {
LOG.trace("Transaction for handle {} has already been closed", handle);
return;
}
Transaction tx = ConfigurationUtil.get(opInfo.getSessionConf(),
Constants.Explore.TX_QUERY_KEY,
TxnCodec.INSTANCE);
LOG.trace("Closing transaction {} for handle {}", tx, handle);
if (opInfo.isReadOnly() ||
(opInfo.getStatus() != null && opInfo.getStatus().getStatus() == QueryStatus.OpStatus.FINISHED)) {
if (!(txClient.commit(tx))) {
txClient.invalidate(tx.getWritePointer());
LOG.info("Invalidating transaction: {}", tx);
}
} else {
txClient.invalidate(tx.getWritePointer());
}
} catch (Throwable e) {
LOG.error("Got exception while closing transaction.", e);
} finally {
opInfo.getSessionConf().put(Constants.Explore.TX_QUERY_CLOSED, "true");
}
}
private void runCacheCleanup() {
LOG.trace("Running cache cleanup");
activeHandleCache.cleanUp();
inactiveHandleCache.cleanUp();
}
// Hive wraps all exceptions, including SQL exceptions in HiveSQLException. We would like to surface the SQL
// exception to the user, and not other Hive server exceptions. We are using a heuristic to determine whether a
// HiveSQLException is a SQL exception or not by inspecting the SQLState of HiveSQLException. If SQLState is not
// null then we surface the SQL exception.
private RuntimeException getSqlException(HiveSQLException e) throws ExploreException, SQLException {
if (e.getSQLState() != null) {
throw e;
}
throw new ExploreException(e);
}
protected Object tColumnToObject(TColumnValue tColumnValue) throws ExploreException {
if (tColumnValue.isSetBoolVal()) {
return tColumnValue.getBoolVal().isValue();
} else if (tColumnValue.isSetByteVal()) {
return tColumnValue.getByteVal().getValue();
} else if (tColumnValue.isSetDoubleVal()) {
return tColumnValue.getDoubleVal().getValue();
} else if (tColumnValue.isSetI16Val()) {
return tColumnValue.getI16Val().getValue();
} else if (tColumnValue.isSetI32Val()) {
return tColumnValue.getI32Val().getValue();
} else if (tColumnValue.isSetI64Val()) {
return tColumnValue.getI64Val().getValue();
} else if (tColumnValue.isSetStringVal()) {
return tColumnValue.getStringVal().getValue();
}
throw new ExploreException("Unknown column value encountered: " + tColumnValue);
}
}