/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.templeton; import java.io.File; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConfUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; import org.apache.hive.hcatalog.templeton.tool.JobState; import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; import org.apache.hive.hcatalog.templeton.tool.ZooKeeperCleanup; import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage; /** * The configuration for Templeton. This merges the normal Hadoop * configuration with the Templeton specific variables. * * The Templeton configuration variables are described in * templeton-default.xml * * The Templeton specific configuration is split into two layers * * 1. webhcat-default.xml - All the configuration variables that * Templeton needs. These are the defaults that ship with the app * and should only be changed be the app developers. * * 2. webhcat-site.xml - The (possibly empty) configuration that the * system administrator can set variables for their Hadoop cluster. * * The configuration files are loaded in this order with later files * overriding earlier ones. * * To find the configuration files, we first attempt to load a file * from the CLASSPATH and then look in the directory specified in the * TEMPLETON_HOME environment variable. * * In addition the configuration files may access the special env * variable env for all environment variables. For example, the * hadoop executable could be specified using: *<pre> * ${env.HADOOP_PREFIX}/bin/hadoop *</pre> */ public class AppConfig extends Configuration { public static final String[] HADOOP_CONF_FILENAMES = { "core-default.xml", "core-site.xml", "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" }; public static final String[] HADOOP_PREFIX_VARS = { "HADOOP_PREFIX", "HADOOP_HOME" }; public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME"; public static final String WEBHCAT_CONF_DIR = "WEBHCAT_CONF_DIR"; public static final String[] TEMPLETON_CONF_FILENAMES = { "webhcat-default.xml", "webhcat-site.xml" }; public enum JobsListOrder { lexicographicalasc, lexicographicaldesc, } public static final String PORT = "templeton.port"; public static final String JETTY_CONFIGURATION = "templeton.jetty.configuration"; public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding"; public static final String EXEC_ENVS_NAME = "templeton.exec.envs"; public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes"; public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs"; public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout"; public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name"; public static final String ENABLE_JOB_RECONNECT_DEFAULT = "templeton.enable.job.reconnect.default"; public static final String HADOOP_NAME = "templeton.hadoop"; public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir"; public static final String HCAT_NAME = "templeton.hcat"; public static final String PYTHON_NAME = "templeton.python"; public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; public static final String HIVE_PATH_NAME = "templeton.hive.path"; public static final String MAPPER_MEMORY_MB = "templeton.mapper.memory.mb"; public static final String MR_AM_MEMORY_MB = "templeton.mr.am.memory.mb"; public static final String TEMPLETON_JOBSLIST_ORDER = "templeton.jobs.listorder"; /* * These parameters controls the maximum number of concurrent job submit/status/list * operations in templeton service. If more number of concurrent requests comes then * they will be rejected with BusyException. */ public static final String JOB_SUBMIT_MAX_THREADS = "templeton.parallellism.job.submit"; public static final String JOB_STATUS_MAX_THREADS = "templeton.parallellism.job.status"; public static final String JOB_LIST_MAX_THREADS = "templeton.parallellism.job.list"; /* * These parameters controls the maximum time job submit/status/list operation is * executed in templeton service. On time out, the execution is interrupted and * TimeoutException is returned to client. On time out * For list and status operation, there is no action needed as they are read requests. * For submit operation, we do best effort to kill the job if its generated. Enabling * this parameter may have following side effects * 1) There is a possibility for having active job for some time when the client gets * response for submit operation and a list operation from client could potential * show the newly created job which may eventually be killed with no guarantees. * 2) If submit operation retried by client then there is a possibility of duplicate * jobs triggered. * * Time out configs should be configured in seconds. * */ public static final String JOB_SUBMIT_TIMEOUT = "templeton.job.submit.timeout"; public static final String JOB_STATUS_TIMEOUT = "templeton.job.status.timeout"; public static final String JOB_LIST_TIMEOUT = "templeton.job.list.timeout"; /* * If task execution time out is configured for submit operation then job may need to * be killed on execution time out. These parameters controls the maximum number of * retries and retry wait time in seconds for executing the time out task. */ public static final String JOB_TIMEOUT_TASK_RETRY_COUNT = "templeton.job.timeout.task.retry.count"; public static final String JOB_TIMEOUT_TASK_RETRY_INTERVAL = "templeton.job.timeout.task.retry.interval"; /** * see webhcat-default.xml */ public static final String HIVE_HOME_PATH = "templeton.hive.home"; /** * see webhcat-default.xml */ public static final String HCAT_HOME_PATH = "templeton.hcat.home"; /** * is a comma separated list of name=value pairs; * In case some value is itself a comma-separated list, the comma needs to * be escaped with {@link org.apache.hadoop.util.StringUtils#ESCAPE_CHAR}. See other usage * of escape/unescape methods in {@link org.apache.hadoop.util.StringUtils} in webhcat. */ public static final String HIVE_PROPS_NAME = "templeton.hive.properties"; public static final String SQOOP_ARCHIVE_NAME = "templeton.sqoop.archive"; public static final String SQOOP_PATH_NAME = "templeton.sqoop.path"; public static final String SQOOP_HOME_PATH = "templeton.sqoop.home"; public static final String LIB_JARS_NAME = "templeton.libjars"; public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive"; public static final String PIG_PATH_NAME = "templeton.pig.path"; public static final String STREAMING_JAR_NAME = "templeton.streaming.jar"; public static final String OVERRIDE_JARS_NAME = "templeton.override.jars"; public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS = "templeton.controller.mr.child.opts"; public static final String TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS = "templeton.controller.mr.am.java.opts"; public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab"; public static final String CALLBACK_INTERVAL_NAME = "templeton.callback.retry.interval"; public static final String CALLBACK_RETRY_NAME = "templeton.callback.retry.attempts"; //Hadoop property names (set by templeton logic) public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval"; public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts"; public static final String HADOOP_END_URL_NAME = "job.end.notification.url"; public static final String HADOOP_SPECULATIVE_NAME = "mapred.map.tasks.speculative.execution"; public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; public static final String HADOOP_MAP_MEMORY_MB = "mapreduce.map.memory.mb"; public static final String HADOOP_MR_AM_JAVA_OPTS = "yarn.app.mapreduce.am.command-opts"; public static final String HADOOP_MR_AM_MEMORY_MB = "yarn.app.mapreduce.am.resource.mb"; public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; /** * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in * LaunchMapper before launching Hive command */ public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files"; public static final String XSRF_FILTER_ENABLED = "templeton.xsrf.filter.enabled"; private static final Logger LOG = LoggerFactory.getLogger(AppConfig.class); public AppConfig() { init(); LOG.info("Using Hadoop version " + VersionInfo.getVersion()); } private void init() { for (Map.Entry<String, String> e : System.getenv().entrySet()) set("env." + e.getKey(), e.getValue()); String templetonDir = getTempletonDir(); for (String fname : TEMPLETON_CONF_FILENAMES) { logConfigLoadAttempt(templetonDir + File.separator + fname); if (! loadOneClasspathConfig(fname)) loadOneFileConfig(templetonDir, fname); } String hadoopConfDir = getHadoopConfDir(); for (String fname : HADOOP_CONF_FILENAMES) { logConfigLoadAttempt(hadoopConfDir + File.separator + fname); loadOneFileConfig(hadoopConfDir, fname); } ProxyUserSupport.processProxyuserConfig(this); handleHiveProperties(); LOG.info(dumpEnvironent()); } /** * When auto-shipping hive tar (for example when hive query or pig script * is submitted via webhcat), Hive client is launched on some remote node where Hive has not * been installed. We need pass some properties to that client to make sure it connects to the * right Metastore, configures Tez, etc. Here we look for such properties in hive config, * and set a comma-separated list of key values in {@link #HIVE_PROPS_NAME}. * The HIVE_CONF_HIDDEN_LIST should be handled separately too - this also should be copied from * the hive config to the webhcat config if not defined there. * Note that the user may choose to set the same keys in HIVE_PROPS_NAME directly, in which case * those values should take precedence. */ private void handleHiveProperties() { HiveConf hiveConf = new HiveConf();//load hive-site.xml from classpath List<String> interestingPropNames = Arrays.asList( HiveConf.ConfVars.METASTOREURIS.varname, HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname, HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI.varname, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, HiveConf.ConfVars.HIVE_CONF_HIDDEN_LIST.varname); //each items is a "key=value" format List<String> webhcatHiveProps = new ArrayList<String>(hiveProps()); for(String interestingPropName : interestingPropNames) { String value = hiveConf.get(interestingPropName); if(value != null) { boolean found = false; for(String whProp : webhcatHiveProps) { if(whProp.startsWith(interestingPropName + "=")) { found = true; break; } } if(!found) { webhcatHiveProps.add(interestingPropName + "=" + value); } } } StringBuilder hiveProps = new StringBuilder(); for(String whProp : webhcatHiveProps) { //make sure to escape separator char in prop values hiveProps.append(hiveProps.length() > 0 ? "," : "").append(StringUtils.escapeString(whProp)); } set(HIVE_PROPS_NAME, hiveProps.toString()); // Setting the hidden list String hiddenProperties = hiveConf.get(HiveConf.ConfVars.HIVE_CONF_HIDDEN_LIST.varname); if (this.get(HiveConf.ConfVars.HIVE_CONF_HIDDEN_LIST.varname) == null && hiddenProperties!=null) { set(HiveConf.ConfVars.HIVE_CONF_HIDDEN_LIST.varname, hiddenProperties); } } private static void logConfigLoadAttempt(String path) { LOG.info("Attempting to load config file: " + path); } /** * Dumps all env and config state. Should be called once on WebHCat start up to facilitate * support/debugging. Later it may be worth adding a REST call which will return this data. */ private String dumpEnvironent() { StringBuilder sb = TempletonUtils.dumpPropMap("========WebHCat System.getenv()========", System.getenv()); sb.append("START========WebHCat AppConfig.iterator()========: \n"); HiveConfUtil.dumpConfig(this, sb); sb.append("END========WebHCat AppConfig.iterator()========: \n"); sb.append(TempletonUtils.dumpPropMap("========WebHCat System.getProperties()========", System.getProperties())); sb.append(HiveConfUtil.dumpConfig(new HiveConf())); return sb.toString(); } public JobsListOrder getListJobsOrder() { String requestedOrder = get(TEMPLETON_JOBSLIST_ORDER); if (requestedOrder != null) { try { return JobsListOrder.valueOf(requestedOrder.toLowerCase()); } catch(IllegalArgumentException ex) { LOG.warn("Ignoring setting " + TEMPLETON_JOBSLIST_ORDER + " configured with in-correct value " + requestedOrder); } } // Default to lexicographicalasc return JobsListOrder.lexicographicalasc; } public void startCleanup() { JobState.getStorageInstance(this).startCleanup(this); } public String getHadoopConfDir() { return get(HADOOP_CONF_DIR); } public static String getTempletonDir() { return System.getenv(TEMPLETON_HOME_VAR); } public static String getWebhcatConfDir() { return System.getenv(WEBHCAT_CONF_DIR); } private boolean loadOneFileConfig(String dir, String fname) { if (dir != null) { File f = new File(dir, fname); if (f.exists()) { addResource(new Path(f.getAbsolutePath())); LOG.info("loaded config file " + f.getAbsolutePath()); return true; } } return false; } private boolean loadOneClasspathConfig(String fname) { URL x = getResource(fname); if (x != null) { addResource(x); LOG.info("loaded config from classpath " + x); return true; } return false; } public String jettyConfiguration() { return get(JETTY_CONFIGURATION); } public String libJars() { return get(LIB_JARS_NAME); } public String hadoopQueueName() { return get(HADOOP_QUEUE_NAME); } public String enableJobReconnectDefault() { return get(ENABLE_JOB_RECONNECT_DEFAULT); } public String clusterHadoop() { return get(HADOOP_NAME); } public String clusterHcat() { return get(HCAT_NAME); } public String clusterPython() { return get(PYTHON_NAME); } public String pigPath() { return get(PIG_PATH_NAME); } public String pigArchive() { return get(PIG_ARCHIVE_NAME); } public String hivePath() { return get(HIVE_PATH_NAME); } public String hiveArchive() { return get(HIVE_ARCHIVE_NAME); } public String sqoopPath() { return get(SQOOP_PATH_NAME); } public String sqoopArchive() { return get(SQOOP_ARCHIVE_NAME); } public String sqoopHome() { return get(SQOOP_HOME_PATH); } public String streamingJar() { return get(STREAMING_JAR_NAME); } public String kerberosSecret() { return get(KERBEROS_SECRET); } public String kerberosPrincipal(){ return get(KERBEROS_PRINCIPAL); } public String kerberosKeytab() { return get(KERBEROS_KEYTAB); } public String controllerMRChildOpts() { return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); } public String controllerAMChildOpts() { return get(TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS); } public String mapperMemoryMb() { return get(MAPPER_MEMORY_MB); } public String amMemoryMb() { return get(MR_AM_MEMORY_MB); } /** * @see #HIVE_PROPS_NAME */ public Collection<String> hiveProps() { String[] props= StringUtils.split(get(HIVE_PROPS_NAME)); //since raw data was (possibly) escaped to make split work, //now need to remove escape chars so they don't interfere with downstream processing if (props == null) { return Collections.emptyList(); } else { for(int i = 0; i < props.length; i++) { props[i] = TempletonUtils.unEscapeString(props[i]); } return Arrays.asList(props); } } public String[] overrideJars() { if (getBoolean(OVERRIDE_JARS_ENABLED, true)) return getStrings(OVERRIDE_JARS_NAME); else return null; } public String overrideJarsString() { if (getBoolean(OVERRIDE_JARS_ENABLED, true)) return get(OVERRIDE_JARS_NAME); else return null; } public long zkCleanupInterval() { return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, (1000L * 60L * 60L * 12L)); } public long zkMaxAge() { return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, (1000L * 60L * 60L * 24L * 7L)); } public String zkHosts() { return get(ZooKeeperStorage.ZK_HOSTS); } public int zkSessionTimeout() { return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000); } }