/* * Copyright 2013 Klarna AB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.klarna.hiverunner; import com.klarna.hiverunner.config.HiveRunnerConfig; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.hsqldb.jdbc.JDBCDriver; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.util.Map; import java.util.UUID; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*; /** * Responsible for common configuration for running the HiveServer within this JVM with zero external dependencies. * <p/> * This class contains a bunch of methods meant to be overridden in order to create slightly different contexts. * * This context configures HiveServer for both mr and tez. There's nothing contradicting with those configurations so * they may coexist in order to allow test cases to alter execution engines within the same test by * E.g: 'set hive.execution.engine=tez;'. */ public class StandaloneHiveServerContext implements HiveServerContext { private static final Logger LOGGER = LoggerFactory.getLogger(StandaloneHiveServerContext.class); private String metaStorageUrl; protected HiveConf hiveConf = new HiveConf(); private final TemporaryFolder basedir; private final HiveRunnerConfig hiveRunnerConfig; public StandaloneHiveServerContext(TemporaryFolder basedir, HiveRunnerConfig hiveRunnerConfig) { this.basedir = basedir; this.hiveRunnerConfig = hiveRunnerConfig; } @Override public final void init() { configureMiscHiveSettings(hiveConf); configureMetaStore(hiveConf); configureMrExecutionEngine(hiveConf); configureTezExecutionEngine(hiveConf); configureJavaSecurityRealm(hiveConf); configureSupportConcurrency(hiveConf); configureFileSystem(basedir, hiveConf); configureAssertionStatus(hiveConf); overrideHiveConf(hiveConf); } protected void configureMiscHiveSettings(HiveConf hiveConf) { hiveConf.setBoolVar(HIVESTATSAUTOGATHER, false); // Turn of dependency to calcite library hiveConf.setBoolVar(HIVE_CBO_ENABLED, false); // Disable to get rid of clean up exception when stopping the Session. hiveConf.setBoolVar(HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); hiveConf.setVar(HADOOPBIN, "NO_BIN!"); } protected void overrideHiveConf(HiveConf hiveConf) { for (Map.Entry<String, String> hiveConfEntry : hiveRunnerConfig.getHiveConfSystemOverride().entrySet()) { hiveConf.set(hiveConfEntry.getKey(), hiveConfEntry.getValue()); } } protected void configureMrExecutionEngine(HiveConf conf) { /* * Switch off all optimizers otherwise we didn't * manage to contain the map reduction within this JVM. */ conf.setBoolVar(HIVE_INFER_BUCKET_SORT, false); conf.setBoolVar(HIVEMETADATAONLYQUERIES, false); conf.setBoolVar(HIVEOPTINDEXFILTER, false); conf.setBoolVar(HIVECONVERTJOIN, false); conf.setBoolVar(HIVESKEWJOIN, false); // Defaults to a 1000 millis sleep in. We can speed up the tests a bit by setting this to 1 millis instead. // org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper. hiveConf.setLongVar(HiveConf.ConfVars.HIVECOUNTERSPULLINTERVAL, 1L); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_RPC_QUERY_PLAN, true); } protected void configureTezExecutionEngine(HiveConf conf) { /* Tez local mode settings */ conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); conf.set("fs.defaultFS", "file:///"); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); /* Set to be able to run tests offline */ conf.set(TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK, "true"); /* General attempts to strip of unnecessary functionality to speed up test execution and increase stability */ conf.set(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, "false"); conf.set(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, "false"); conf.set(TezConfiguration.DAG_RECOVERY_ENABLED, "false"); conf.set(TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX, "1"); conf.set(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE, "false"); conf.set(TezConfiguration.DAG_RECOVERY_ENABLED, "false"); conf.set(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, "false"); } protected void configureJavaSecurityRealm(HiveConf hiveConf) { // These three properties gets rid of: 'Unable to load realm info from SCDynamicStore' // which seems to have a timeout of about 5 secs. System.setProperty("java.security.krb5.realm", ""); System.setProperty("java.security.krb5.kdc", ""); System.setProperty("java.security.krb5.conf", "/dev/null"); } protected void configureAssertionStatus(HiveConf conf) { ClassLoader.getSystemClassLoader().setPackageAssertionStatus("org.apache.hadoop.hive.serde2.objectinspector", false); } protected void configureSupportConcurrency(HiveConf conf) { hiveConf.setBoolVar(HIVE_SUPPORT_CONCURRENCY, false); } protected void configureMetaStore(HiveConf conf) { String jdbcDriver = JDBCDriver.class.getName(); try { Class.forName(jdbcDriver); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } // Set the hsqldb driver metaStorageUrl = "jdbc:hsqldb:mem:" + UUID.randomUUID().toString(); hiveConf.set("datanucleus.connectiondrivername", jdbcDriver); hiveConf.set("javax.jdo.option.ConnectionDriverName", jdbcDriver); // No pooling needed. This will save us a lot of threads hiveConf.set("datanucleus.connectionPoolingType", "None"); conf.setBoolVar(METASTORE_VALIDATE_CONSTRAINTS, true); conf.setBoolVar(METASTORE_VALIDATE_COLUMNS, true); conf.setBoolVar(METASTORE_VALIDATE_TABLES, true); } protected void configureFileSystem(TemporaryFolder basedir, HiveConf conf) { conf.setVar(METASTORECONNECTURLKEY, metaStorageUrl + ";create=true"); createAndSetFolderProperty(METASTOREWAREHOUSE, "warehouse", conf, basedir); createAndSetFolderProperty(SCRATCHDIR, "scratchdir", conf, basedir); createAndSetFolderProperty(LOCALSCRATCHDIR, "localscratchdir", conf, basedir); createAndSetFolderProperty(HIVEHISTORYFILELOC, "tmp", conf, basedir); conf.setBoolVar(HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS, true); createAndSetFolderProperty("hadoop.tmp.dir", "hadooptmp", conf, basedir); createAndSetFolderProperty("test.log.dir", "logs", conf, basedir); /* Tez specific configurations below */ /* Tez will upload a hive-exec.jar to this location. It looks like it will do this only once per test suite so it makes sense to keep this in a central location rather than in the tmp dir of each test. */ File installation_dir = newFolder(getBaseDir(), "tez_installation_dir"); conf.setVar(HiveConf.ConfVars.HIVE_JAR_DIRECTORY, installation_dir.getAbsolutePath()); conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, installation_dir.getAbsolutePath()); } File newFolder(TemporaryFolder basedir, String folder) { try { File newFolder = basedir.newFolder(folder); FileUtil.setPermission(newFolder, FsPermission.getDirDefault()); return newFolder; } catch (IOException e) { throw new IllegalStateException("Failed to create tmp dir: " + e.getMessage(), e); } } public HiveConf getHiveConf() { return hiveConf; } @Override public TemporaryFolder getBaseDir() { return basedir; } protected final void createAndSetFolderProperty(HiveConf.ConfVars var, String folder, HiveConf conf, TemporaryFolder basedir) { conf.setVar(var, newFolder(basedir, folder).getAbsolutePath()); } protected final void createAndSetFolderProperty(String key, String folder, HiveConf conf, TemporaryFolder basedir) { conf.set(key, newFolder(basedir, folder).getAbsolutePath()); } }