/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.common; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; public final class HCatConstants { public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; public static final String HCAT_PIG_STORER = "hcat.pig.storer"; public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; /** * {@value} (default: null) * When the property is set in the UDFContext of the org.apache.hive.hcatalog.pig.HCatStorer, HCatStorer writes * to the location it specifies instead of the default HCatalog location format. An example can be found * in org.apache.hive.hcatalog.pig.HCatStorerWrapper. */ public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; //The keys used to store info into the job Configuration public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; // hcatalog specific configurations, that can be put in hive-site.xml public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; // config parameter that suggests to hcat that metastore clients not be cached - default is false // this parameter allows highly-parallel hcat usescases to not gobble up too many connections that // sit in the cache, while not in use. public static final String HCAT_HIVE_CLIENT_DISABLE_CACHE = "hcatalog.hive.client.cache.disabled"; // Indicates the initial capacity of the cache. public static final String HCAT_HIVE_CLIENT_CACHE_INITIAL_CAPACITY = "hcatalog.hive.client.cache.initial.capacity"; // Indicates the maximum capacity of the cache. Minimum value should be the number of threads. public static final String HCAT_HIVE_CLIENT_CACHE_MAX_CAPACITY = "hcatalog.hive.client.cache.max.capacity"; // Indicates whether cache statistics should be collected. public static final String HCAT_HIVE_CLIENT_CACHE_STATS_ENABLED = "hcatalog.hive.client.cache.stats.enabled"; private HCatConstants() { // restrict instantiation } public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; public static final String HCAT_PERMS = "hcat.perms"; public static final String HCAT_GROUP = "hcat.group"; public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; public static final String HCAT_METASTORE_PRINCIPAL = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; /** * The desired number of input splits produced for each partition. When the * input files are large and few, we want to split them into many splits, * so as to increase the parallelizm of loading the splits. Try also two * other parameters, mapred.min.split.size and mapred.max.split.size for * hadoop 1.x, or mapreduce.input.fileinputformat.split.minsize and * mapreduce.input.fileinputformat.split.maxsize in hadoop 2.x to * control the number of input splits. */ public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = "hcat.desired.partition.num.splits"; /** * hcat.append.limit allows a hcat user to specify a custom append limit. * By default, while appending to an existing directory, hcat will attempt * to avoid naming clashes and try to append _a_NNN where NNN is a number to * the desired filename to avoid clashes. However, by default, it only tries * for NNN from 0 to 999 before giving up. This can cause an issue for some * tables with an extraordinarily large number of files. Ideally, this should * be fixed by the user changing their usage pattern and doing some manner of * compaction, but in the meanwhile, until they can, setting this parameter * can be used to bump that limit. */ public static final String HCAT_APPEND_LIMIT = "hcat.append.limit"; // IMPORTANT IMPORTANT IMPORTANT!!!!! //The keys used to store info into the job Configuration. //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer //updates the job configuration in the backend to insert these keys to avoid //having to call setOutput from the backend (which would cause a metastore call //from the map jobs) public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; public static final String[] OUTPUT_CONFS_TO_SAVE = { HCAT_KEY_OUTPUT_INFO, HCAT_KEY_HIVE_CONF, HCAT_KEY_TOKEN_SIGNATURE }; public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; public static final String HCAT_OUTPUT_ID_HASH = HCAT_KEY_OUTPUT_BASE + ".id"; public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + ".dynamic.jobid"; public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; public static final String HCAT_DYNAMIC_CUSTOM_PATTERN = "hcat.dynamic.partitioning.custom.pattern"; // Message Bus related properties. public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; public static final String HCAT_EVENT = "HCAT_EVENT"; public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; public static final String HCAT_ALTER_PARTITION_EVENT = "ALTER_PARTITION"; public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; public static final String HCAT_ALTER_TABLE_EVENT = "ALTER_TABLE"; public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; public static final String HCAT_CREATE_FUNCTION_EVENT = "CREATE_FUNCTION"; public static final String HCAT_DROP_FUNCTION_EVENT = "DROP_FUNCTION"; public static final String HCAT_CREATE_INDEX_EVENT = "CREATE_INDEX"; public static final String HCAT_DROP_INDEX_EVENT = "DROP_INDEX"; public static final String HCAT_ALTER_INDEX_EVENT = "ALTER_INDEX"; public static final String HCAT_INSERT_EVENT = "INSERT"; public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; // System environment variables public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; // Hadoop Conf Var Names public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; //*************************************************************************** // Data-related configuration properties. //*************************************************************************** /** * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions * will not expect boolean values when upgrading Pig. For integration the option is offered to * convert boolean fields to integers by setting this Hadoop configuration key. */ public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = "hcat.data.convert.boolean.to.integer"; public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; /** * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). * Hive tables support tinyint and smallint columns, while not all processing frameworks support * these types (Pig only has integer for example). Enable this property to promote tinyint and * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns * enforce bounds checking and jobs will fail if attempting to write values outside the column * bounds. */ public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = "hcat.data.tiny.small.int.promotion"; public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; /** * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). * Threshold for the ratio of bad records that will be silently skipped without causing a task * failure. This is useful when processing large data sets with corrupt records, when its * acceptable to skip some bad records. */ public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; /** * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). * Number of bad records that will be accepted before applying * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad * record from causing a task failure. */ public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; public static final String HCAT_INPUT_IGNORE_INVALID_PATH_KEY = "hcat.input.ignore.invalid.path"; public static final boolean HCAT_INPUT_IGNORE_INVALID_PATH_DEFAULT = false; }