/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.conf;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.TajoConstants;
import org.apache.tajo.util.NetUtils;
import org.apache.tajo.util.TUtil;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.util.Map;
public class TajoConf extends Configuration {
static {
Configuration.addDefaultResource("catalog-default.xml");
Configuration.addDefaultResource("catalog-site.xml");
Configuration.addDefaultResource("storage-default.xml");
Configuration.addDefaultResource("storage-site.xml");
Configuration.addDefaultResource("tajo-default.xml");
Configuration.addDefaultResource("tajo-site.xml");
}
private static final String EMPTY_VALUE = "";
private static final Map<String, ConfVars> vars = TUtil.newHashMap();
public TajoConf() {
super();
}
public TajoConf(Configuration conf) {
super(conf);
}
public TajoConf(Path path) {
super();
addResource(path);
}
public static enum ConfVars {
//////////////////////////////////
// Tajo System Configuration
//////////////////////////////////
// a username for a running Tajo cluster
ROOT_DIR("tajo.rootdir", "file:///tmp/tajo-${user.name}/"),
USERNAME("tajo.username", "${user.name}"),
// Configurable System Directories
WAREHOUSE_DIR("tajo.warehouse.directory", EMPTY_VALUE),
STAGING_ROOT_DIR("tajo.staging.directory", "/tmp/tajo-${user.name}/staging"),
SYSTEM_CONF_PATH("tajo.system-conf.path", EMPTY_VALUE),
SYSTEM_CONF_REPLICA_COUNT("tajo.system-conf.replica-count", 20),
// Tajo Master Service Addresses
TAJO_MASTER_UMBILICAL_RPC_ADDRESS("tajo.master.umbilical-rpc.address", "localhost:26001"),
TAJO_MASTER_CLIENT_RPC_ADDRESS("tajo.master.client-rpc.address", "localhost:26002"),
TAJO_MASTER_INFO_ADDRESS("tajo.master.info-http.address", "0.0.0.0:26080"),
// QueryMaster resource
TAJO_QUERYMASTER_DISK_SLOT("tajo.qm.resource.disk.slots", 0.0f),
TAJO_QUERYMASTER_MEMORY_MB("tajo.qm.resource.memory-mb", 512),
// Tajo Worker Service Addresses
WORKER_INFO_ADDRESS("tajo.worker.info-http.address", "0.0.0.0:28080"),
WORKER_QM_INFO_ADDRESS("tajo.worker.qm-info-http.address", "0.0.0.0:28081"),
WORKER_PEER_RPC_ADDRESS("tajo.worker.peer-rpc.address", "0.0.0.0:28091"),
WORKER_CLIENT_RPC_ADDRESS("tajo.worker.client-rpc.address", "0.0.0.0:28092"),
WORKER_QM_RPC_ADDRESS("tajo.worker.qm-rpc.address", "0.0.0.0:28093"),
// Tajo Worker Temporal Directories
WORKER_TEMPORAL_DIR("tajo.worker.tmpdir.locations", "/tmp/tajo-${user.name}/tmpdir"),
WORKER_TEMPORAL_DIR_CLEANUP("tajo.worker.tmpdir.cleanup-at-startup", false),
// Tajo Worker Resources
WORKER_RESOURCE_AVAILABLE_CPU_CORES("tajo.worker.resource.cpu-cores", 1),
WORKER_RESOURCE_AVAILABLE_MEMORY_MB("tajo.worker.resource.memory-mb", 1024),
WORKER_RESOURCE_AVAILABLE_DISKS("tajo.worker.resource.disks", 1.0f),
WORKER_EXECUTION_MAX_SLOTS("tajo.worker.parallel-execution.max-num", 2),
WORKER_RESOURCE_DFS_DIR_AWARE("tajo.worker.resource.dfs-dir-aware", false),
// Tajo Worker Dedicated Resources
WORKER_RESOURCE_DEDICATED("tajo.worker.resource.dedicated", false),
WORKER_RESOURCE_DEDICATED_MEMORY_RATIO("tajo.worker.resource.dedicated-memory-ratio", 0.8f),
// Tajo Worker History
WORKER_HISTORY_EXPIRE_PERIOD("tajo.worker.history.expire-interval-minutes", 12 * 60), // 12 hours
WORKER_HEARTBEAT_TIMEOUT("tajo.worker.heartbeat.timeout", 120 * 1000), //120 sec
// Resource Manager
RESOURCE_MANAGER_CLASS("tajo.resource.manager", "org.apache.tajo.master.rm.TajoWorkerResourceManager"),
// Catalog
CATALOG_ADDRESS("tajo.catalog.client-rpc.address", "localhost:26005"),
//////////////////////////////////
// for Yarn Resource Manager
//////////////////////////////////
/** how many launching TaskRunners in parallel */
YARN_RM_QUERY_MASTER_MEMORY_MB("tajo.querymaster.memory-mb", 512),
YARN_RM_QUERY_MASTER_DISKS("tajo.yarn-rm.querymaster.disks", 1),
YARN_RM_TASKRUNNER_LAUNCH_PARALLEL_NUM("tajo.yarn-rm.parallel-task-runner-launcher-num", 16),
YARN_RM_WORKER_NUMBER_PER_NODE("tajo.yarn-rm.max-worker-num-per-node", 8),
//////////////////////////////////
// Query Configuration
//////////////////////////////////
QUERY_SESSION_TIMEOUT("tajo.query.session.timeout-sec", 60),
//////////////////////////////////
// Shuffle Configuration
//////////////////////////////////
PULLSERVER_PORT("tajo.pullserver.port", 0),
SHUFFLE_SSL_ENABLED_KEY("tajo.pullserver.ssl.enabled", false),
SHUFFLE_FILE_FORMAT("tajo.shuffle.file-format", "RAW"),
SHUFFLE_FETCHER_PARALLEL_EXECUTION_MAX_NUM("tajo.shuffle.fetcher.parallel-execution.max-num", 2),
//////////////////////////////////
// Storage Configuration
//////////////////////////////////
RAWFILE_SYNC_INTERVAL("rawfile.sync.interval", null),
MINIMUM_SPLIT_SIZE("tajo.min.split.size", (long) 1),
// for RCFile
HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true),
// for Storage Manager v2
STORAGE_MANAGER_VERSION_2("tajo.storage-manager.v2", false),
STORAGE_MANAGER_DISK_SCHEDULER_MAX_READ_BYTES_PER_SLOT("tajo.storage-manager.max-read-bytes", 8 * 1024 * 1024),
STORAGE_MANAGER_DISK_SCHEDULER_REPORT_INTERVAL("tajo.storage-manager.disk-scheduler.report-interval", 60 * 1000),
STORAGE_MANAGER_CONCURRENCY_PER_DISK("tajo.storage-manager.disk-scheduler.per-disk-concurrency", 2),
//////////////////////////////////////////
// Distributed Query Execution Parameters
//////////////////////////////////////////
DIST_QUERY_BROADCAST_JOIN_AUTO("tajo.dist-query.join.auto-broadcast", true),
DIST_QUERY_BROADCAST_JOIN_THRESHOLD("tajo.dist-query.join.broadcast.threshold-bytes", (long)5 * 1048576),
DIST_QUERY_JOIN_TASK_VOLUME("tajo.dist-query.join.task-volume-mb", 128),
DIST_QUERY_SORT_TASK_VOLUME("tajo.dist-query.sort.task-volume-mb", 128),
DIST_QUERY_GROUPBY_TASK_VOLUME("tajo.dist-query.groupby.task-volume-mb", 128),
DIST_QUERY_JOIN_PARTITION_VOLUME("tajo.dist-query.join.partition-volume-mb", 128),
DIST_QUERY_SORT_PARTITION_VOLUME("tajo.dist-query.sort.partition-volume-mb", 256),
DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256),
//////////////////////////////////
// Physical Executors
//////////////////////////////////
EXECUTOR_EXTERNAL_SORT_THREAD_NUM("tajo.executor.external-sort.thread-num", 1),
EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE("tajo.executor.external-sort.buffer-mb", 200L),
EXECUTOR_EXTERNAL_SORT_FANOUT("tajo.executor.external-sort.fanout-num", 8),
EXECUTOR_INNER_JOIN_INMEMORY_HASH_TABLE_SIZE("tajo.executor.join.inner.in-memory-table-num", (long)1000000),
EXECUTOR_INNER_JOIN_INMEMORY_HASH_THRESHOLD("tajo.executor.join.inner.in-memory-hash-threshold-bytes",
(long)256 * 1048576),
EXECUTOR_OUTER_JOIN_INMEMORY_HASH_THRESHOLD("tajo.executor.join.outer.in-memory-hash-threshold-bytes",
(long)256 * 1048576),
EXECUTOR_GROUPBY_INMEMORY_HASH_THRESHOLD("tajo.executor.groupby.in-memory-hash-threshold-bytes",
(long)256 * 1048576),
//////////////////////////////////
// RPC
//////////////////////////////////
RPC_POOL_MAX_IDLE("tajo.rpc.pool.idle.max", 10),
//Internal RPC Client
INTERNAL_RPC_CLIENT_WORKER_THREAD_NUM("tajo.internal.rpc.client.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
//Internal RPC Server
MASTER_RPC_SERVER_WORKER_THREAD_NUM("tajo.master.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
QUERY_MASTER_RPC_SERVER_WORKER_THREAD_NUM("tajo.querymaster.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
WORKER_RPC_SERVER_WORKER_THREAD_NUM("tajo.worker.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
CATALOG_RPC_SERVER_WORKER_THREAD_NUM("tajo.catalog.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
SHUFFLE_RPC_SERVER_WORKER_THREAD_NUM("tajo.shuffle.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 2),
// Client RPC
RPC_CLIENT_WORKER_THREAD_NUM("tajo.rpc.client.worker-thread-num", 4),
//Client service RPC Server
MASTER_SERVICE_RPC_SERVER_WORKER_THREAD_NUM("tajo.master.service.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 1),
WORKER_SERVICE_RPC_SERVER_WORKER_THREAD_NUM("tajo.worker.service.rpc.server.worker-thread-num",
Runtime.getRuntime().availableProcessors() * 1),
//////////////////////////////////
// The Below is reserved
//////////////////////////////////
// GeoIP
GEOIP_DATA("tajo.function.geoip-database-location", ""),
//////////////////////////////////
// Hive Configuration
//////////////////////////////////
HIVE_QUERY_MODE("tajo.hive.query.mode", false),
//////////////////////////////////
// Task Configuration
TASK_DEFAULT_MEMORY("tajo.task.memory-slot-mb.default", 512),
TASK_DEFAULT_DISK("tajo.task.disk-slot.default", 0.5f),
TASK_DEFAULT_SIZE("tajo.task.size-mb", 128),
//////////////////////////////////
//////////////////////////////////
// User Session Configuration
//////////////////////////////////
CLIENT_SESSION_EXPIRY_TIME("tajo.client.session.expiry-time-sec", 3600), // default time is one hour.
// Metrics
METRICS_PROPERTY_FILENAME("tajo.metrics.property.file", "tajo-metrics.properties"),
//CLI
CLI_MAX_COLUMN("tajo.cli.max_columns", 120)
;
public final String varname;
public final String defaultVal;
public final int defaultIntVal;
public final long defaultLongVal;
public final float defaultFloatVal;
public final Class<?> valClass;
public final boolean defaultBoolVal;
private final VarType type;
ConfVars(String varname, String defaultVal) {
this.varname = varname;
this.valClass = String.class;
this.defaultVal = defaultVal;
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
this.type = VarType.STRING;
}
ConfVars(String varname, int defaultIntVal) {
this.varname = varname;
this.valClass = Integer.class;
this.defaultVal = Integer.toString(defaultIntVal);
this.defaultIntVal = defaultIntVal;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
this.type = VarType.INT;
}
ConfVars(String varname, long defaultLongVal) {
this.varname = varname;
this.valClass = Long.class;
this.defaultVal = Long.toString(defaultLongVal);
this.defaultIntVal = -1;
this.defaultLongVal = defaultLongVal;
this.defaultFloatVal = -1;
this.defaultBoolVal = false;
this.type = VarType.LONG;
}
ConfVars(String varname, float defaultFloatVal) {
this.varname = varname;
this.valClass = Float.class;
this.defaultVal = Float.toString(defaultFloatVal);
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = defaultFloatVal;
this.defaultBoolVal = false;
this.type = VarType.FLOAT;
}
ConfVars(String varname, boolean defaultBoolVal) {
this.varname = varname;
this.valClass = Boolean.class;
this.defaultVal = Boolean.toString(defaultBoolVal);
this.defaultIntVal = -1;
this.defaultLongVal = -1;
this.defaultFloatVal = -1;
this.defaultBoolVal = defaultBoolVal;
this.type = VarType.BOOLEAN;
}
enum VarType {
STRING { void checkType(String value) throws Exception { } },
INT { void checkType(String value) throws Exception { Integer.valueOf(value); } },
LONG { void checkType(String value) throws Exception { Long.valueOf(value); } },
FLOAT { void checkType(String value) throws Exception { Float.valueOf(value); } },
BOOLEAN { void checkType(String value) throws Exception { Boolean.valueOf(value); } };
boolean isType(String value) {
try { checkType(value); } catch (Exception e) { return false; }
return true;
}
String typeString() { return name().toUpperCase();}
abstract void checkType(String value) throws Exception;
}
}
public static int getIntVar(Configuration conf, ConfVars var) {
assert (var.valClass == Integer.class);
return conf.getInt(var.varname, var.defaultIntVal);
}
public static void setIntVar(Configuration conf, ConfVars var, int val) {
assert (var.valClass == Integer.class);
conf.setInt(var.varname, val);
}
public int getIntVar(ConfVars var) {
return getIntVar(this, var);
}
public void setIntVar(ConfVars var, int val) {
setIntVar(this, var, val);
}
public static long getLongVar(Configuration conf, ConfVars var) {
assert (var.valClass == Long.class);
return conf.getLong(var.varname, var.defaultLongVal);
}
public static long getLongVar(Configuration conf, ConfVars var, long defaultVal) {
return conf.getLong(var.varname, defaultVal);
}
public static void setLongVar(Configuration conf, ConfVars var, long val) {
assert (var.valClass == Long.class);
conf.setLong(var.varname, val);
}
public long getLongVar(ConfVars var) {
return getLongVar(this, var);
}
public void setLongVar(ConfVars var, long val) {
setLongVar(this, var, val);
}
public static float getFloatVar(Configuration conf, ConfVars var) {
assert (var.valClass == Float.class);
return conf.getFloat(var.varname, var.defaultFloatVal);
}
public static float getFloatVar(Configuration conf, ConfVars var, float defaultVal) {
return conf.getFloat(var.varname, defaultVal);
}
public static void setFloatVar(Configuration conf, ConfVars var, float val) {
assert (var.valClass == Float.class);
conf.setFloat(var.varname, val);
}
public float getFloatVar(ConfVars var) {
return getFloatVar(this, var);
}
public void setFloatVar(ConfVars var, float val) {
setFloatVar(this, var, val);
}
public static boolean getBoolVar(Configuration conf, ConfVars var) {
assert (var.valClass == Boolean.class);
return conf.getBoolean(var.varname, var.defaultBoolVal);
}
public static boolean getBoolVar(Configuration conf, ConfVars var, boolean defaultVal) {
return conf.getBoolean(var.varname, defaultVal);
}
public static void setBoolVar(Configuration conf, ConfVars var, boolean val) {
assert (var.valClass == Boolean.class);
conf.setBoolean(var.varname, val);
}
public boolean getBoolVar(ConfVars var) {
return getBoolVar(this, var);
}
public void setBoolVar(ConfVars var, boolean val) {
setBoolVar(this, var, val);
}
public static String getVar(Configuration conf, ConfVars var) {
assert (var.valClass == String.class);
return conf.get(var.varname, var.defaultVal);
}
public static String getVar(Configuration conf, ConfVars var, String defaultVal) {
return conf.get(var.varname, defaultVal);
}
public static void setVar(Configuration conf, ConfVars var, String val) {
assert (var.valClass == String.class);
conf.set(var.varname, val);
}
public static ConfVars getConfVars(String name) {
return vars.get(name);
}
public String getVar(ConfVars var) {
return getVar(this, var);
}
public void setVar(ConfVars var, String val) {
setVar(this, var, val);
}
public void logVars(PrintStream ps) {
for (ConfVars one : ConfVars.values()) {
ps.println(one.varname + "=" + ((get(one.varname) != null) ? get(one.varname) : ""));
}
}
public InetSocketAddress getSocketAddrVar(ConfVars var) {
final String address = getVar(var);
return NetUtils.createSocketAddr(address);
}
/////////////////////////////////////////////////////////////////////////////
// Tajo System Specific Methods
/////////////////////////////////////////////////////////////////////////////
public static Path getTajoRootDir(TajoConf conf) {
String rootPath = conf.getVar(ConfVars.ROOT_DIR);
Preconditions.checkNotNull(rootPath,
ConfVars.ROOT_DIR.varname + " must be set before a Tajo Cluster starts up");
return new Path(rootPath);
}
public static Path getWarehouseDir(TajoConf conf) {
String warehousePath = conf.getVar(ConfVars.WAREHOUSE_DIR);
if (warehousePath == null || warehousePath.equals("")) {
Path rootDir = getTajoRootDir(conf);
warehousePath = new Path(rootDir, TajoConstants.WAREHOUSE_DIR_NAME).toUri().toString();
conf.setVar(ConfVars.WAREHOUSE_DIR, warehousePath);
return new Path(warehousePath);
} else {
return new Path(warehousePath);
}
}
public static Path getSystemDir(TajoConf conf) {
Path rootPath = getTajoRootDir(conf);
return new Path(rootPath, TajoConstants.SYSTEM_DIR_NAME);
}
public static Path getSystemResourceDir(TajoConf conf) {
return new Path(getSystemDir(conf), TajoConstants.SYSTEM_RESOURCE_DIR_NAME);
}
private static boolean hasScheme(String path) {
return path.indexOf("file:/") == 0 || path.indexOf("hdfs:/") == 0;
}
public static Path getStagingDir(TajoConf conf) throws IOException {
String stagingDirString = conf.getVar(ConfVars.STAGING_ROOT_DIR);
if (!hasScheme(stagingDirString)) {
Path warehousePath = getWarehouseDir(conf);
FileSystem fs = warehousePath.getFileSystem(conf);
Path path = new Path(fs.getUri().toString(), stagingDirString);
conf.setVar(ConfVars.STAGING_ROOT_DIR, path.toString());
return path;
}
return new Path(stagingDirString);
}
public static Path getSystemConfPath(TajoConf conf) {
String systemConfPathStr = conf.getVar(ConfVars.SYSTEM_CONF_PATH);
if (systemConfPathStr == null || systemConfPathStr.equals("")) {
Path systemResourcePath = getSystemResourceDir(conf);
Path systemConfPath = new Path(systemResourcePath, TajoConstants.SYSTEM_CONF_FILENAME);
conf.setVar(ConfVars.SYSTEM_CONF_PATH, systemConfPath.toString());
return systemConfPath;
} else {
return new Path(systemConfPathStr);
}
}
}