/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.conf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import javax.security.auth.login.LoginException;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
/**
* Hive Configuration.
*/
public class HiveConf extends Configuration {
protected String hiveJar;
protected Properties origProp;
protected String auxJars;
private static final Log l4j = LogFactory.getLog(HiveConf.class);
private static URL hiveSiteURL = null;
private static URL confVarURL = null;
static {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
if (classLoader == null) {
classLoader = HiveConf.class.getClassLoader();
}
// Log a warning if hive-default.xml is found on the classpath
URL hiveDefaultURL = classLoader.getResource("hive-default.xml");
if (hiveDefaultURL != null) {
l4j.warn("DEPRECATED: Ignoring hive-default.xml found on the CLASSPATH at " +
hiveDefaultURL.getPath());
}
// Look for hive-site.xml on the CLASSPATH and log its location if found.
hiveSiteURL = classLoader.getResource("hive-site.xml");
if (hiveSiteURL == null) {
l4j.warn("hive-site.xml not found on CLASSPATH");
} else {
l4j.debug("Using hive-site.xml found on CLASSPATH at " + hiveSiteURL.getPath());
}
}
/**
* Metastore related options that the db is initialized against. When a conf
* var in this is list is changed, the metastore instance for the CLI will
* be recreated so that the change will take effect.
*/
public static final HiveConf.ConfVars[] metaVars = {
HiveConf.ConfVars.METASTOREDIRECTORY,
HiveConf.ConfVars.METASTOREWAREHOUSE,
HiveConf.ConfVars.METASTOREURIS,
HiveConf.ConfVars.METASTORE_MODE,
HiveConf.ConfVars.METASTORETHRIFTRETRIES,
HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY,
HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT,
HiveConf.ConfVars.METASTOREPWD,
HiveConf.ConfVars.METASTORECONNECTURLHOOK,
HiveConf.ConfVars.METASTORECONNECTURLKEY,
HiveConf.ConfVars.METASTOREATTEMPTS,
HiveConf.ConfVars.METASTOREINTERVAL,
HiveConf.ConfVars.METASTOREFORCERELOADCONF,
HiveConf.ConfVars.METASTORESERVERMINTHREADS,
HiveConf.ConfVars.METASTORESERVERMAXTHREADS,
HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE,
HiveConf.ConfVars.METASTORE_INT_ORIGINAL,
HiveConf.ConfVars.METASTORE_INT_ARCHIVED,
HiveConf.ConfVars.METASTORE_INT_EXTRACTED,
HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE,
HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL,
HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL,
HiveConf.ConfVars.METASTORE_CACHE_PINOBJTYPES,
HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE,
HiveConf.ConfVars.METASTORE_VALIDATE_TABLES,
HiveConf.ConfVars.METASTORE_VALIDATE_COLUMNS,
HiveConf.ConfVars.METASTORE_VALIDATE_CONSTRAINTS,
HiveConf.ConfVars.METASTORE_STORE_MANAGER_TYPE,
HiveConf.ConfVars.METASTORE_AUTO_CREATE_SCHEMA,
HiveConf.ConfVars.METASTORE_AUTO_START_MECHANISM_MODE,
HiveConf.ConfVars.METASTORE_TRANSACTION_ISOLATION,
HiveConf.ConfVars.METASTORE_CACHE_LEVEL2,
HiveConf.ConfVars.METASTORE_CACHE_LEVEL2_TYPE,
HiveConf.ConfVars.METASTORE_IDENTIFIER_FACTORY,
HiveConf.ConfVars.METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK,
HiveConf.ConfVars.METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS,
HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX,
HiveConf.ConfVars.METASTORE_EVENT_LISTENERS,
HiveConf.ConfVars.METASTORE_EVENT_CLEAN_FREQ,
HiveConf.ConfVars.METASTORE_EVENT_EXPIRY_DURATION,
HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
HiveConf.ConfVars.METASTORE_END_FUNCTION_LISTENERS,
HiveConf.ConfVars.METASTORE_PART_INHERIT_TBL_PROPS,
HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS,
};
/**
* dbVars are the parameters can be set per database. If these
* parameters are set as a database property, when switching to that
* database, the HiveConf variable will be changed. The change of these
* parameters will effectively change the DFS and MapReduce clusters
* for different databases.
*/
public static final HiveConf.ConfVars[] dbVars = {
HiveConf.ConfVars.HADOOPBIN,
HiveConf.ConfVars.HADOOPJT,
HiveConf.ConfVars.METASTOREWAREHOUSE,
HiveConf.ConfVars.SCRATCHDIR
};
/**
* ConfVars.
*
* These are the default configuration properties for Hive. Each HiveConf
* object is initialized as follows:
*
* 1) Hadoop configuration properties are applied.
* 2) ConfVar properties with non-null values are overlayed.
* 3) hive-site.xml properties are overlayed.
*
* WARNING: think twice before adding any Hadoop configuration properties
* with non-null values to this list as they will override any values defined
* in the underlying Hadoop configuration.
*/
public static enum ConfVars {
// QL execution stuff
SCRIPTWRAPPER("hive.exec.script.wrapper", null),
PLAN("hive.exec.plan", ""),
SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive-" + System.getProperty("user.name")),
SUBMITVIACHILD("hive.exec.submitviachild", false),
SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000),
ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false),
COMPRESSRESULT("hive.exec.compress.output", false),
COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false),
COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", ""),
COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", ""),
BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (1000 * 1000 * 1000)),
MAXREDUCERS("hive.exec.reducers.max", 999),
PREEXECHOOKS("hive.exec.pre.hooks", ""),
POSTEXECHOOKS("hive.exec.post.hooks", ""),
ONFAILUREHOOKS("hive.exec.failure.hooks", ""),
CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", ""),
EXECPARALLEL("hive.exec.parallel", false), // parallel query launching
EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8),
HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true),
HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L),
DYNAMICPARTITIONING("hive.exec.dynamic.partition", true),
DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict"),
DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000),
DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100),
MAXCREATEDFILES("hive.exec.max.created.files", 100000L),
DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", "/tmp/"+System.getProperty("user.name")+"/hive_resources"),
DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__"),
DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__"),
// Whether to show a link to the most failed task + debugging tips
SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true),
JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true),
JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000),
TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000),
OUTPUT_FILE_EXTENSION("hive.output.file.extension", null),
// should hive determine whether to run in local mode automatically ?
LOCALMODEAUTO("hive.exec.mode.local.auto", false),
// if yes:
// run in local mode only if input bytes is less than this. 128MB by default
LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L),
// run in local mode only if number of tasks (for map and reduce each) is
// less than this
LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4),
// if true, DROP TABLE/VIEW does not fail if table/view doesn't exist and IF EXISTS is
// not specified
DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true),
// Hadoop Configuration Properties
// Properties with null values are ignored and exist only for the purpose of giving us
// a symbolic name to reference in the Hive source code. Properties with non-null
// values will override any values set in the underlying Hadoop configuration.
HADOOPBIN("hadoop.bin.path", System.getenv("HADOOP_HOME") + "/bin/hadoop"),
HADOOPCONF("hadoop.config.dir", System.getenv("HADOOP_HOME") + "/conf"),
HADOOPFS("fs.default.name", null),
HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem"),
HADOOPMAPFILENAME("map.input.file", null),
HADOOPMAPREDINPUTDIR("mapred.input.dir", null),
HADOOPMAPREDINPUTDIRRECURSIVE("mapred.input.dir.recursive", false),
HADOOPJT("mapred.job.tracker", null),
MAPREDMAXSPLITSIZE("mapred.max.split.size", 256000000L),
MAPREDMINSPLITSIZE("mapred.min.split.size", 1L),
MAPREDMINSPLITSIZEPERNODE("mapred.min.split.size.per.rack", 1L),
MAPREDMINSPLITSIZEPERRACK("mapred.min.split.size.per.node", 1L),
// The number of reduce tasks per job. Hadoop sets this value to 1 by default
// By setting this property to -1, Hive will automatically determine the correct
// number of reducers.
HADOOPNUMREDUCERS("mapred.reduce.tasks", -1),
HADOOPJOBNAME("mapred.job.name", null),
HADOOPSPECULATIVEEXECREDUCERS("mapred.reduce.tasks.speculative.execution", true),
// Metastore stuff. Be sure to update HiveConf.metaVars when you add
// something here!
METASTOREDIRECTORY("hive.metastore.metadb.dir", ""),
METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse"),
METASTOREURIS("hive.metastore.uris", ""),
// Number of times to retry a connection to a Thrift metastore server
METASTORETHRIFTRETRIES("hive.metastore.connect.retries", 5),
// Number of seconds the client should wait between connection attempts
METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", 1),
// Socket timeout for the client connection (in seconds)
METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", 20),
METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine"),
// Class name of JDO connection url hook
METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", ""),
METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true),
// Name of the connection url in the configuration
METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL",
"jdbc:derby:;databaseName=metastore_db;create=true"),
// Number of attempts to retry connecting after there is a JDO datastore err
METASTOREATTEMPTS("hive.metastore.ds.retry.attempts", 1),
// Number of miliseconds to wait between attepting
METASTOREINTERVAL("hive.metastore.ds.retry.interval", 1000),
// Whether to force reloading of the metastore configuration (including
// the connection URL, before the next metastore query that accesses the
// datastore. Once reloaded, the this value is reset to false. Used for
// testing only.
METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false),
METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200),
METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 100000),
METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true),
// Intermediate dir suffixes used for archiving. Not important what they
// are, as long as collisions are avoided
METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original",
"_INTERMEDIATE_ORIGINAL"),
METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived",
"_INTERMEDIATE_ARCHIVED"),
METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted",
"_INTERMEDIATE_EXTRACTED"),
METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", ""),
METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal",
"hive-metastore/_HOST@EXAMPLE.COM"),
METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false),
METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS(
"hive.cluster.delegation.token.store.class",
"org.apache.hadoop.hive.thrift.MemoryTokenStore"),
METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR(
"hive.cluster.delegation.token.store.zookeeper.connectString", ""),
METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE(
"hive.cluster.delegation.token.store.zookeeper.znode", "/hive/cluster/delegation"),
METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL(
"hive.cluster.delegation.token.store.zookeeper.acl", ""),
METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"),
METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "DBCP"),
METASTORE_VALIDATE_TABLES("datanucleus.validateTables", false),
METASTORE_VALIDATE_COLUMNS("datanucleus.validateColumns", false),
METASTORE_VALIDATE_CONSTRAINTS("datanucleus.validateConstraints", false),
METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms"),
METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", true),
METASTORE_AUTO_START_MECHANISM_MODE("datanucleus.autoStartMechanismMode", "checked"),
METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed"),
METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false),
METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none"),
METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus"),
METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG"),
METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300),
METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", ""),
METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", ""),
// should we do checks against the storage (usually hdfs) for operations like drop_partition
METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false),
METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq",0L),
METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration",0L),
METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", false),
METASTORE_MODE("hive.metastore.local",true),
// Default parameters for creating tables
NEWTABLEDEFAULTPARA("hive.table.parameters.default", ""),
METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl",
"org.apache.hadoop.hive.metastore.ObjectStore"),
METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName",
"org.apache.derby.jdbc.EmbeddedDriver"),
METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass",
"org.datanucleus.jdo.JDOPersistenceManagerFactory"),
METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true),
METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true),
METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP"),
METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", ""),
METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties",""),
// CLI
CLIIGNOREERRORS("hive.cli.errors.ignore", false),
CLIPRINTCURRENTDB("hive.cli.print.current.db", false),
HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl"),
// Things we log in the jobconf
// session identifier
HIVESESSIONID("hive.session.id", ""),
// whether session is running in silent mode or not
HIVESESSIONSILENT("hive.session.silent", false),
// query being executed (multiple per session)
HIVEQUERYSTRING("hive.query.string", ""),
// id of query being executed (multiple per session)
HIVEQUERYID("hive.query.id", ""),
// id of the mapred plan being executed (multiple per query)
HIVEPLANID("hive.query.planid", ""),
// max jobname length
HIVEJOBNAMELENGTH("hive.jobname.length", 50),
// hive jar
HIVEJAR("hive.jar.path", ""),
HIVEAUXJARS("hive.aux.jars.path", ""),
// hive added files and jars
HIVEADDEDFILES("hive.added.files.path", ""),
HIVEADDEDJARS("hive.added.jars.path", ""),
HIVEADDEDARCHIVES("hive.added.archives.path", ""),
// for hive script operator
HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", 0),
HIVETABLENAME("hive.table.name", ""),
HIVEPARTITIONNAME("hive.partition.name", ""),
HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false),
HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"),
HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"),
HIVEALIAS("hive.alias", ""),
HIVEMAPSIDEAGGREGATE("hive.map.aggr", true),
HIVEGROUPBYSKEW("hive.groupby.skewindata", false),
HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
HIVEMAPJOINROWSIZE("hive.mapjoin.size.key", 10000),
HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 25000),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5),
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),
HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9),
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true),
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
// Default file format for CREATE TABLE statement
// Options: TextFile, SequenceFile
HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile"),
HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile"),
HIVECHECKFILEFORMAT("hive.fileformat.check", true),
//Location of Hive run time structured log file
HIVEHISTORYFILELOC("hive.querylog.location", "/tmp/" + System.getProperty("user.name")),
// Default serde and record reader for user scripts
HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
HIVESCRIPTRECORDREADER("hive.script.recordreader",
"org.apache.hadoop.hive.ql.exec.TextRecordReader"),
HIVESCRIPTRECORDWRITER("hive.script.recordwriter",
"org.apache.hadoop.hive.ql.exec.TextRecordWriter"),
// HWI
HIVEHWILISTENHOST("hive.hwi.listen.host", "0.0.0.0"),
HIVEHWILISTENPORT("hive.hwi.listen.port", "9999"),
HIVEHWIWARFILE("hive.hwi.war.file", System.getenv("HWI_WAR_FILE")),
// mapper/reducer memory in local mode
HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0),
//small table file size
HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize",25000000L), //25M
// random number for split sampling
HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0),
// test mode in hive mode
HIVETESTMODE("hive.test.mode", false),
HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_"),
HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32),
HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", ""),
HIVEMERGEMAPFILES("hive.merge.mapfiles", true),
HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false),
HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000)),
HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000)),
HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true),
HIVEMERGEINPUTFORMATBLOCKLEVEL("hive.merge.input.format.block.level",
"org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat"),
HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS(
"hive.merge.current.job.has.dynamic.partitions", false),
HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true),
HIVESKEWJOIN("hive.optimize.skewjoin", false),
HIVECONVERTJOIN("hive.auto.convert.join", false),
HIVESKEWJOINKEY("hive.skewjoin.key", 100000),
HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000),
HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M
HIVEMERGEMAPONLY("hive.mergejob.maponly", true),
HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000),
HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L),
HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10),
HIVELIMITOPTENABLE("hive.limit.optimize.enable", false),
HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000),
HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000),
HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75),
HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55),
HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90),
HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000),
HIVEDEBUGLOCALTASK("hive.debug.localtask",false),
HIVEJOBPROGRESS("hive.task.progress", false),
HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"),
HIVEENFORCEBUCKETING("hive.enforce.bucketing", false),
HIVEENFORCESORTING("hive.enforce.sorting", false),
HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"),
HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false),
HIVEROWOFFSET("hive.exec.rowoffset", false),
HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE("hive.hadoop.supports.splittable.combineinputformat", false),
// Optimizer
HIVEOPTCP("hive.optimize.cp", true), // column pruner
HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes
HIVEINDEXAUTOUPDATE("hive.optimize.index.autoupdate", false), //automatically update stale indexes
HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown
HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true), // predicate pushdown
HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true),
HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true),
// push predicates down to storage handlers
HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true),
HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by
HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join
HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
// Indexes
HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minsize", (long) 5 * 1024 * 1024 * 1024), // 5G
HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxsize", (long) -1), // infinity
HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES("hive.index.compact.query.max.entries", (long) 10000000), // 10M
HIVE_INDEX_COMPACT_QUERY_MAX_SIZE("hive.index.compact.query.max.size", (long) 10 * 1024 * 1024 * 1024), // 10G
HIVE_INDEX_COMPACT_BINARY_SEARCH("hive.index.compact.binary.search", true),
// Statistics
HIVESTATSAUTOGATHER("hive.stats.autogather", true),
HIVESTATSDBCLASS("hive.stats.dbclass",
"jdbc:derby"), // other options are jdbc:mysql and hbase as defined in StatsSetupConst.java
HIVESTATSJDBCDRIVER("hive.stats.jdbcdriver",
"org.apache.derby.jdbc.EmbeddedDriver"), // JDBC driver specific to the dbclass
HIVESTATSDBCONNECTIONSTRING("hive.stats.dbconnectionstring",
"jdbc:derby:;databaseName=TempStatsStore;create=true"), // automatically create database
HIVE_STATS_DEFAULT_PUBLISHER("hive.stats.default.publisher",
""), // default stats publisher if none of JDBC/HBase is specified
HIVE_STATS_DEFAULT_AGGREGATOR("hive.stats.default.aggregator",
""), // default stats aggregator if none of JDBC/HBase is specified
HIVE_STATS_JDBC_TIMEOUT("hive.stats.jdbc.timeout",
30), // default timeout in sec for JDBC connection & SQL statements
HIVE_STATS_ATOMIC("hive.stats.atomic",
false), // whether to update metastore stats only if all stats are available
HIVE_STATS_RETRIES_MAX("hive.stats.retries.max",
0), // maximum # of retries to insert/select/delete the stats DB
HIVE_STATS_RETRIES_WAIT("hive.stats.retries.wait",
3000), // # milliseconds to wait before the next retry
HIVE_STATS_COLLECT_RAWDATASIZE("hive.stats.collect.rawdatasize", true),
// should the raw data size be collected when analayzing tables
CLIENT_STATS_COUNTERS("hive.client.stats.counters", ""),
//Subset of counters that should be of interest for hive.client.stats.publishers (when one wants to limit their publishing). Non-display names should be used".
// Concurrency
HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false),
HIVE_LOCK_MANAGER("hive.lock.manager", "org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager"),
HIVE_LOCK_NUMRETRIES("hive.lock.numretries", 100),
HIVE_UNLOCK_NUMRETRIES("hive.unlock.numretries", 10),
HIVE_LOCK_SLEEP_BETWEEN_RETRIES("hive.lock.sleep.between.retries", 60),
HIVE_LOCK_MAPRED_ONLY("hive.lock.mapred.only.operation", false),
HIVE_ZOOKEEPER_QUORUM("hive.zookeeper.quorum", ""),
HIVE_ZOOKEEPER_CLIENT_PORT("hive.zookeeper.client.port", "2181"),
HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", 600*1000),
HIVE_ZOOKEEPER_NAMESPACE("hive.zookeeper.namespace", "hive_zookeeper_namespace"),
HIVE_ZOOKEEPER_CLEAN_EXTRA_NODES("hive.zookeeper.clean.extra.nodes", false),
// For HBase storage handler
HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true),
// For har files
HIVEARCHIVEENABLED("hive.archive.enabled", false),
HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false),
//Enable/Disable gbToIdx rewrite rule
HIVEOPTGBYUSINGINDEX("hive.optimize.index.groupby", false),
HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true),
// Serde for FetchTask
HIVEFETCHOUTPUTSERDE("hive.fetch.output.serde", "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"),
// Hive Variables
HIVEVARIABLESUBSTITUTE("hive.variable.substitute", true),
SEMANTIC_ANALYZER_HOOK("hive.semantic.analyzer.hook", ""),
HIVE_AUTHORIZATION_ENABLED("hive.security.authorization.enabled", false),
HIVE_AUTHORIZATION_MANAGER("hive.security.authorization.manager",
"org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider"),
HIVE_AUTHENTICATOR_MANAGER("hive.security.authenticator.manager",
"org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator"),
HIVE_AUTHORIZATION_TABLE_USER_GRANTS("hive.security.authorization.createtable.user.grants", ""),
HIVE_AUTHORIZATION_TABLE_GROUP_GRANTS("hive.security.authorization.createtable.group.grants",
""),
HIVE_AUTHORIZATION_TABLE_ROLE_GRANTS("hive.security.authorization.createtable.role.grants", ""),
HIVE_AUTHORIZATION_TABLE_OWNER_GRANTS("hive.security.authorization.createtable.owner.grants",
""),
// Print column names in output
HIVE_CLI_PRINT_HEADER("hive.cli.print.header", false),
HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false),
HIVE_INDEX_IGNORE_HDFS_LOC("hive.index.compact.file.ignore.hdfs", false),
HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", "hdfs,pfile"),
// temporary variable for testing. This is added just to turn off this feature in case of a bug in
// deployment. It has not been documented in hive-default.xml intentionally, this should be removed
// once the feature is stable
HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS("hive.mapper.cannot.span.multiple.partitions", false),
HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false),
HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true),
HIVE_IO_EXCEPTION_HANDLERS("hive.io.exception.handlers", ""),
//prefix used to auto generated column aliases
HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c"),
HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME(
"hive.autogen.columnalias.prefix.includefuncname", false),
// The class responsible for logging client side performance metrics
// Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"),
// Whether to delete the scratchdir while startup
HIVE_START_CLEANUP_SCRATCHDIR("hive.start.cleanup.scratchdir", false),
HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false),
HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS("hive.warehouse.subdir.inherit.perms", false),
// whether insert into external tables is allowed
HIVE_INSERT_INTO_EXTERNAL_TABLES("hive.insert.into.external.tables", true),
// A comma separated list of hooks which implement HiveDriverRunHook and will be run at the
// beginning and end of Driver.run, these will be run in the order specified
HIVE_DRIVER_RUN_HOOKS("hive.exec.driver.run.hooks", ""),
HIVE_DDL_OUTPUT_FORMAT("hive.ddl.output.format", null),
//TODO(sameerag): Need to deprecate this for release 0.1
//@sameerag: QuicksilverDB configuration parameters
QUICKSILVER_SAMPLING_ENABLED("quicksilver.sample.enable", true),
SAMPLES_PER_TABLE("quicksilver.sample.num", 3),
SAMPLE_SIZE_LEVEL_1("quicksilver.sample.level.size.1", 2),
SAMPLE_SIZE_LEVEL_2("quicksilver.sample.level.size.2", 4),
SAMPLE_SIZE_LEVEL_3("quicksilver.sample.level.size.3", 8),
SAMPLE_SIZE_LEVEL_4("quicksilver.sample.level.size.4", 16),
SAMPLE_SIZE_LEVEL_5("quicksilver.sample.level.size.5", 32),
;
;
public final String varname;
public final String defaultVal;
public final int defaultIntVal;
public final long defaultLongVal;
public final float defaultFloatVal;
public final Class<?> valClass;
public final boolean defaultBoolVal;
ConfVars(String varname, String defaultVal) {
this.varname = varname;
this.valClass = String.class;
this.defaultVal = defaultVal;
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
}
ConfVars(String varname, int defaultIntVal) {
this.varname = varname;
this.valClass = Integer.class;
this.defaultVal = Integer.toString(defaultIntVal);
this.defaultIntVal = defaultIntVal;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
}
ConfVars(String varname, long defaultLongVal) {
this.varname = varname;
this.valClass = Long.class;
this.defaultVal = Long.toString(defaultLongVal);
this.defaultIntVal = -1;
this.defaultLongVal = defaultLongVal;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
}
ConfVars(String varname, float defaultFloatVal) {
this.varname = varname;
this.valClass = Float.class;
this.defaultVal = Float.toString(defaultFloatVal);
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = defaultFloatVal;
this.defaultBoolVal = false;
}
ConfVars(String varname, boolean defaultBoolVal) {
this.varname = varname;
this.valClass = Boolean.class;
this.defaultVal = Boolean.toString(defaultBoolVal);
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = defaultBoolVal;
}
@Override
public String toString() {
return varname;
}
}
/**
* Writes the default ConfVars out to a temporary File and returns
* a URL pointing to the temporary file.
* We need this in order to initialize the ConfVar properties
* in the underling Configuration object using the addResource(URL)
* method.
*
* Using Configuration.addResource(InputStream) would be a preferable
* approach, but it turns out that method is broken since Configuration
* tries to read the entire contents of the same InputStream repeatedly.
*/
private static synchronized URL getConfVarURL() {
if (confVarURL == null) {
try {
Configuration conf = new Configuration();
File confVarFile = File.createTempFile("hive-default-", ".xml");
confVarFile.deleteOnExit();
applyDefaultNonNullConfVars(conf);
FileOutputStream fout = new FileOutputStream(confVarFile);
conf.writeXml(fout);
fout.close();
confVarURL = confVarFile.toURI().toURL();
} catch (Exception e) {
// We're pretty screwed if we can't load the default conf vars
throw new RuntimeException("Failed to initialize default Hive configuration variables!", e);
}
}
return confVarURL;
}
public static int getIntVar(Configuration conf, ConfVars var) {
assert (var.valClass == Integer.class);
return conf.getInt(var.varname, var.defaultIntVal);
}
public static void setIntVar(Configuration conf, ConfVars var, int val) {
assert (var.valClass == Integer.class);
conf.setInt(var.varname, val);
}
public int getIntVar(ConfVars var) {
return getIntVar(this, var);
}
public void setIntVar(ConfVars var, int val) {
setIntVar(this, var, val);
}
public static long getLongVar(Configuration conf, ConfVars var) {
assert (var.valClass == Long.class);
return conf.getLong(var.varname, var.defaultLongVal);
}
public static long getLongVar(Configuration conf, ConfVars var, long defaultVal) {
return conf.getLong(var.varname, defaultVal);
}
public static void setLongVar(Configuration conf, ConfVars var, long val) {
assert (var.valClass == Long.class);
conf.setLong(var.varname, val);
}
public long getLongVar(ConfVars var) {
return getLongVar(this, var);
}
public void setLongVar(ConfVars var, long val) {
setLongVar(this, var, val);
}
public static float getFloatVar(Configuration conf, ConfVars var) {
assert (var.valClass == Float.class);
return conf.getFloat(var.varname, var.defaultFloatVal);
}
public static float getFloatVar(Configuration conf, ConfVars var, float defaultVal) {
return conf.getFloat(var.varname, defaultVal);
}
public static void setFloatVar(Configuration conf, ConfVars var, float val) {
assert (var.valClass == Float.class);
ShimLoader.getHadoopShims().setFloatConf(conf, var.varname, val);
}
public float getFloatVar(ConfVars var) {
return getFloatVar(this, var);
}
public void setFloatVar(ConfVars var, float val) {
setFloatVar(this, var, val);
}
public static boolean getBoolVar(Configuration conf, ConfVars var) {
assert (var.valClass == Boolean.class);
return conf.getBoolean(var.varname, var.defaultBoolVal);
}
public static boolean getBoolVar(Configuration conf, ConfVars var, boolean defaultVal) {
return conf.getBoolean(var.varname, defaultVal);
}
public static void setBoolVar(Configuration conf, ConfVars var, boolean val) {
assert (var.valClass == Boolean.class);
conf.setBoolean(var.varname, val);
}
public boolean getBoolVar(ConfVars var) {
return getBoolVar(this, var);
}
public void setBoolVar(ConfVars var, boolean val) {
setBoolVar(this, var, val);
}
public static String getVar(Configuration conf, ConfVars var) {
assert (var.valClass == String.class);
return conf.get(var.varname, var.defaultVal);
}
public static String getVar(Configuration conf, ConfVars var, String defaultVal) {
return conf.get(var.varname, defaultVal);
}
public static void setVar(Configuration conf, ConfVars var, String val) {
assert (var.valClass == String.class);
conf.set(var.varname, val);
}
public String getVar(ConfVars var) {
return getVar(this, var);
}
public void setVar(ConfVars var, String val) {
setVar(this, var, val);
}
public void logVars(PrintStream ps) {
for (ConfVars one : ConfVars.values()) {
ps.println(one.varname + "=" + ((get(one.varname) != null) ? get(one.varname) : ""));
}
}
public HiveConf() {
super();
initialize(this.getClass());
}
public HiveConf(Class<?> cls) {
super();
initialize(cls);
}
public HiveConf(Configuration other, Class<?> cls) {
super(other);
initialize(cls);
}
/**
* Copy constructor
*/
public HiveConf(HiveConf other) {
super(other);
hiveJar = other.hiveJar;
auxJars = other.auxJars;
origProp = (Properties)other.origProp.clone();
}
public Properties getAllProperties() {
return getProperties(this);
}
private static Properties getProperties(Configuration conf) {
Iterator<Map.Entry<String, String>> iter = conf.iterator();
Properties p = new Properties();
while (iter.hasNext()) {
Map.Entry<String, String> e = iter.next();
p.setProperty(e.getKey(), e.getValue());
}
return p;
}
private void initialize(Class<?> cls) {
hiveJar = (new JobConf(cls)).getJar();
// preserve the original configuration
origProp = getAllProperties();
// Overlay the ConfVars. Note that this ignores ConfVars with null values
addResource(getConfVarURL());
// Overlay hive-site.xml if it exists
if (hiveSiteURL != null) {
addResource(hiveSiteURL);
}
//@sameerag: Adding BlinkDB config file
URL qsconfurl = getClassLoader().getResource("quicksilver-conf.xml");
if (qsconfurl == null) {
l4j.debug("quicksilver-conf.xml not found.");
} else {
addResource(qsconfurl);
}
// if hadoop configuration files are already in our path - then define
// the containing directory as the configuration directory
URL hadoopconfurl = getClassLoader().getResource("core-site.xml");
if (hadoopconfurl != null) {
String conffile = hadoopconfurl.getPath();
this.setVar(ConfVars.HADOOPCONF, conffile.substring(0, conffile.lastIndexOf('/')));
}
// Overlay the values of any system properties whose names appear in the list of ConfVars
applySystemProperties();
// if the running class was loaded directly (through eclipse) rather than through a
// jar then this would be needed
if (hiveJar == null) {
hiveJar = this.get(ConfVars.HIVEJAR.varname);
}
if (auxJars == null) {
auxJars = this.get(ConfVars.HIVEAUXJARS.varname);
}
}
/**
* Apply system properties to this object if the property name is defined in ConfVars
* and the value is non-null and not an empty string.
*/
private void applySystemProperties() {
Map<String, String> systemProperties = getConfSystemProperties();
for (Entry<String, String> systemProperty : systemProperties.entrySet()) {
this.set(systemProperty.getKey(), systemProperty.getValue());
}
}
/**
* This method returns a mapping from config variable name to its value for all config variables
* which have been set using System properties
*/
public static Map<String, String> getConfSystemProperties() {
Map<String, String> systemProperties = new HashMap<String, String>();
for (ConfVars oneVar : ConfVars.values()) {
if (System.getProperty(oneVar.varname) != null) {
if (System.getProperty(oneVar.varname).length() > 0) {
systemProperties.put(oneVar.varname, System.getProperty(oneVar.varname));
}
}
}
return systemProperties;
}
/**
* Overlays ConfVar properties with non-null values
*/
private static void applyDefaultNonNullConfVars(Configuration conf) {
for (ConfVars var : ConfVars.values()) {
if (var.defaultVal == null) {
// Don't override ConfVars with null values
continue;
}
if (conf.get(var.varname) != null) {
l4j.debug("Overriding Hadoop conf property " + var.varname + "='" + conf.get(var.varname)
+ "' with Hive default value '" + var.defaultVal +"'");
}
conf.set(var.varname, var.defaultVal);
}
}
public Properties getChangedProperties() {
Properties ret = new Properties();
Properties newProp = getAllProperties();
for (Object one : newProp.keySet()) {
String oneProp = (String) one;
String oldValue = origProp.getProperty(oneProp);
if (!StringUtils.equals(oldValue, newProp.getProperty(oneProp))) {
ret.setProperty(oneProp, newProp.getProperty(oneProp));
}
}
return (ret);
}
public String getHiveSitePath() {
return hiveSiteURL.getPath();
}
public String getJar() {
return hiveJar;
}
/**
* @return the auxJars
*/
public String getAuxJars() {
return auxJars;
}
/**
* @param auxJars the auxJars to set
*/
public void setAuxJars(String auxJars) {
this.auxJars = auxJars;
setVar(this, ConfVars.HIVEAUXJARS, auxJars);
}
/**
* @return the user name set in hadoop.job.ugi param or the current user from System
* @throws IOException
*/
public String getUser() throws IOException {
try {
UserGroupInformation ugi = ShimLoader.getHadoopShims()
.getUGIForConf(this);
return ugi.getUserName();
} catch (LoginException le) {
throw new IOException(le);
}
}
public static String getColumnInternalName(int pos) {
return "_col" + pos;
}
public static int getPositionFromInternalName(String internalName) {
char pos = internalName.charAt(internalName.length()-1);
if (Character.isDigit(pos)) {
return Character.digit(pos, 10);
} else{
return -1;
}
}
}