/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.llap.cli; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Collection; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration; import org.apache.hadoop.hive.llap.daemon.impl.LlapConstants; import org.apache.hadoop.hive.llap.daemon.impl.StaticPermanentFunctionChecker; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos; import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.tez.dag.api.TezConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hive.common.CompressionUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions; import org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.ResourceUri; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.util.ResourceDownloader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.eclipse.jetty.rewrite.handler.Rule; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.joda.time.DateTime; import org.json.JSONException; import org.json.JSONObject; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; public class LlapServiceDriver { protected static final Logger LOG = LoggerFactory.getLogger(LlapServiceDriver.class.getName()); private static final String[] DEFAULT_AUX_CLASSES = new String[] { "org.apache.hive.hcatalog.data.JsonSerDe","org.apache.hadoop.hive.druid.DruidStorageHandler", "org.apache.hive.storage.jdbc.JdbcStorageHandler" }; private static final String HBASE_SERDE_CLASS = "org.apache.hadoop.hive.hbase.HBaseSerDe"; private static final String[] NEEDED_CONFIGS = LlapDaemonConfiguration.DAEMON_CONFIGS; private static final String[] OPTIONAL_CONFIGS = LlapDaemonConfiguration.SSL_DAEMON_CONFIGS; private static final String OUTPUT_DIR_PREFIX = "llap-slider-"; // This is not a config that users set in hive-site. It's only use is to share information // between the java component of the service driver and the python component. private static final String CONFIG_CLUSTER_NAME = "private.hive.llap.servicedriver.cluster.name"; /** * This is a working configuration for the instance to merge various variables. * It is not written out for llap server usage */ private final HiveConf conf; public LlapServiceDriver() { SessionState ss = SessionState.get(); conf = (ss != null) ? ss.getConf() : new HiveConf(SessionState.class); } public static void main(String[] args) throws Exception { LOG.info("LLAP service driver invoked with arguments={}", args); int ret = 0; try { ret = new LlapServiceDriver().run(args); } catch (Throwable t) { System.err.println("Failed: " + t.getMessage()); t.printStackTrace(); ret = 3; } finally { LOG.info("LLAP service driver finished"); } if (LOG.isDebugEnabled()) { LOG.debug("Completed processing - exiting with " + ret); } System.exit(ret); } private static Configuration resolve(Configuration configured, Properties direct, Properties hiveconf) { Configuration conf = new Configuration(false); populateConf(configured, conf, hiveconf, "CLI hiveconf"); populateConf(configured, conf, direct, "CLI direct"); return conf; } private static void populateConf(Configuration configured, Configuration target, Properties properties, String source) { for (Entry<Object, Object> entry : properties.entrySet()) { String key = (String) entry.getKey(); String val = configured.get(key); if (val != null) { target.set(key, val, source); } } } static void populateConfWithLlapProperties(Configuration conf, Properties properties) { for(Entry<Object, Object> props : properties.entrySet()) { String key = (String) props.getKey(); if (HiveConf.getLlapDaemonConfVars().contains(key)) { conf.set(key, (String) props.getValue()); } else { if (key.startsWith(HiveConf.PREFIX_LLAP) || key.startsWith(HiveConf.PREFIX_HIVE_LLAP)) { LOG.warn("Adding key [{}] even though it is not in the set of known llap-server keys"); conf.set(key, (String) props.getValue()); } else { LOG.warn("Ignoring unknown llap server parameter: [{}]", key); } } } } private static abstract class NamedCallable<T> implements Callable<T> { public final String taskName; public NamedCallable (String name) { this.taskName = name; } public String getName() { return taskName; } } private int run(String[] args) throws Exception { LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor(); final LlapOptions options = optionsProcessor.processOptions(args); final Properties propsDirectOptions = new Properties(); if (options == null) { // help return 1; } // Working directory. Path tmpDir = new Path(options.getDirectory()); if (conf == null) { throw new Exception("Cannot load any configuration to run command"); } final long t0 = System.nanoTime(); final FileSystem fs = FileSystem.get(conf); final FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); int threadCount = Math.max(1, Runtime.getRuntime().availableProcessors() / 2); final ExecutorService executor = Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder().setNameFormat("llap-pkg-%d").build()); final CompletionService<Void> asyncRunner = new ExecutorCompletionService<Void>(executor); int rc = 0; try { // needed so that the file is actually loaded into configuration. for (String f : NEEDED_CONFIGS) { conf.addResource(f); if (conf.getResource(f) == null) { throw new Exception("Unable to find required config file: " + f); } } for (String f : OPTIONAL_CONFIGS) { conf.addResource(f); } conf.reloadConfiguration(); populateConfWithLlapProperties(conf, options.getConfig()); if (options.getName() != null) { // update service registry configs - caveat: this has nothing to do with the actual settings // as read by the AM // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between // instances conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); } if (options.getLogger() != null) { HiveConf.setVar(conf, ConfVars.LLAP_DAEMON_LOGGER, options.getLogger()); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_LOGGER.varname, options.getLogger()); } boolean isDirect = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT); if (options.getSize() != -1) { if (options.getCache() != -1) { if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED) == false) { // direct heap allocations need to be safer Preconditions.checkArgument(options.getCache() < options.getSize(), "Cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller" + " than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")"); } else if (options.getCache() < options.getSize()) { LOG.warn("Note that this might need YARN physical memory monitoring to be turned off " + "(yarn.nodemanager.pmem-check-enabled=false)"); } } if (options.getXmx() != -1) { Preconditions.checkArgument(options.getXmx() < options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") has to be" + " smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")"); } if (isDirect && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) { // direct and not memory mapped Preconditions.checkArgument(options.getXmx() + options.getCache() <= options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") + cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")"); } } if (options.getExecutors() != -1) { conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors()); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, String.valueOf(options.getExecutors())); // TODO: vcpu settings - possibly when DRFA works right } if (options.getIoThreads() != -1) { conf.setLong(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, options.getIoThreads()); propsDirectOptions.setProperty(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, String.valueOf(options.getIoThreads())); } long cache = -1, xmx = -1; if (options.getCache() != -1) { cache = options.getCache(); conf.set(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache)); propsDirectOptions.setProperty(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache)); } if (options.getXmx() != -1) { // Needs more explanation here // Xmx is not the max heap value in JDK8. You need to subtract 50% of the survivor fraction // from this, to get actual usable memory before it goes into GC xmx = options.getXmx(); long xmxMb = (xmx / (1024L * 1024L)); conf.setLong(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, xmxMb); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, String.valueOf(xmxMb)); } long size = options.getSize(); if (size == -1) { long heapSize = xmx; if (!isDirect) { heapSize += cache; } size = Math.min((long)(heapSize * 1.2), heapSize + 1024L*1024*1024); if (isDirect) { size += cache; } } long containerSize = size / (1024 * 1024); final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); Preconditions.checkArgument(containerSize >= minAlloc, "Container size (" + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater" + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")"); conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, String.valueOf(containerSize)); LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {}", LlapUtil.humanReadableByteCount(options.getSize()), LlapUtil.humanReadableByteCount(options.getXmx()), LlapUtil.humanReadableByteCount(options.getCache())); if (options.getLlapQueueName() != null && !options.getLlapQueueName().isEmpty()) { conf.set(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName()); propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName()); } final URL logger = conf.getResource(LlapConstants.LOG4j2_PROPERTIES_FILE); if (null == logger) { throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties"); } Path home = new Path(System.getenv("HIVE_HOME")); Path scriptParent = new Path(new Path(home, "scripts"), "llap"); Path scripts = new Path(scriptParent, "bin"); if (!lfs.exists(home)) { throw new Exception("Unable to find HIVE_HOME:" + home); } else if (!lfs.exists(scripts)) { LOG.warn("Unable to find llap scripts:" + scripts); } final Path libDir = new Path(tmpDir, "lib"); final Path tezDir = new Path(libDir, "tez"); final Path udfDir = new Path(libDir, "udfs"); final Path confPath = new Path(tmpDir, "conf"); if (!lfs.mkdirs(confPath)) { LOG.warn("mkdirs for " + confPath + " returned false"); } if (!lfs.mkdirs(tezDir)) { LOG.warn("mkdirs for " + tezDir + " returned false"); } if (!lfs.mkdirs(udfDir)) { LOG.warn("mkdirs for " + udfDir + " returned false"); } NamedCallable<Void> downloadTez = new NamedCallable<Void>("downloadTez") { @Override public Void call() throws Exception { synchronized (fs) { String tezLibs = conf.get(TezConfiguration.TEZ_LIB_URIS); if (tezLibs == null) { LOG.warn("Missing tez.lib.uris in tez-site.xml"); } if (LOG.isDebugEnabled()) { LOG.debug("Copying tez libs from " + tezLibs); } lfs.mkdirs(tezDir); fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz")); CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), tezDir.toString(), true); lfs.delete(new Path(libDir, "tez.tar.gz"), false); } return null; } }; NamedCallable<Void> copyLocalJars = new NamedCallable<Void>("copyLocalJars") { @Override public Void call() throws Exception { Class<?>[] dependencies = new Class<?>[] { LlapDaemonProtocolProtos.class, // llap-common LlapTezUtils.class, // llap-tez LlapInputFormat.class, // llap-server HiveInputFormat.class, // hive-exec SslContextFactory.class, // hive-common (https deps) Rule.class, // Jetty rewrite class RegistryUtils.ServiceRecordMarshal.class, // ZK registry // log4j2 com.lmax.disruptor.RingBuffer.class, // disruptor org.apache.logging.log4j.Logger.class, // log4j-api org.apache.logging.log4j.core.Appender.class, // log4j-core org.apache.logging.slf4j.Log4jLogger.class, // log4j-slf4j // log4j-1.2-API needed for NDC org.apache.log4j.NDC.class, }; for (Class<?> c : dependencies) { Path jarPath = new Path(Utilities.jarFinderGetJar(c)); lfs.copyFromLocalFile(jarPath, libDir); if (LOG.isDebugEnabled()) { LOG.debug("Copying " + jarPath + " to " + libDir); } } return null; } }; // copy default aux classes (json/hbase) NamedCallable<Void> copyAuxJars = new NamedCallable<Void>("copyAuxJars") { @Override public Void call() throws Exception { for (String className : DEFAULT_AUX_CLASSES) { localizeJarForClass(lfs, libDir, className, false); } Collection<String> codecs = conf.getStringCollection("io.compression.codecs"); if (codecs != null) { for (String codecClassName : codecs) { localizeJarForClass(lfs, libDir, codecClassName, false); } } if (options.getIsHBase()) { try { localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true); Job fakeJob = new Job(new JobConf()); // HBase API is convoluted. TableMapReduceUtil.addDependencyJars(fakeJob); Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars"); for (String jarPath : hbaseJars) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } catch (Throwable t) { String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them"; LOG.error(err); System.err.println(err); throw new RuntimeException(t); } } HashSet<String> auxJars = new HashSet<>(); // There are many ways to have AUX jars in Hive... sigh if (options.getIsHiveAux()) { // Note: we don't add ADDED jars, RELOADABLE jars, etc. That is by design; there are too many ways // to add jars in Hive, some of which are session/etc. specific. Env + conf + arg should be enough. addAuxJarsToSet(auxJars, conf.getAuxJars()); addAuxJarsToSet(auxJars, System.getenv("HIVE_AUX_JARS_PATH")); LOG.info("Adding the following aux jars from the environment and configs: " + auxJars); } addAuxJarsToSet(auxJars, options.getAuxJars()); for (String jarPath : auxJars) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } return null; } private void addAuxJarsToSet(HashSet<String> auxJarSet, String auxJars) { if (auxJars != null && !auxJars.isEmpty()) { // TODO: transitive dependencies warning? String[] jarPaths = auxJars.split(","); for (String jarPath : jarPaths) { if (!jarPath.isEmpty()) { auxJarSet.add(jarPath); } } } } }; NamedCallable<Void> copyUdfJars = new NamedCallable<Void>("copyUdfJars") { @Override public Void call() throws Exception { // UDFs final Set<String> allowedUdfs; if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOW_PERMANENT_FNS)) { synchronized (fs) { allowedUdfs = downloadPermanentFunctions(conf, udfDir); } } else { allowedUdfs = Collections.emptySet(); } PrintWriter udfStream = new PrintWriter(lfs.create(new Path(confPath, StaticPermanentFunctionChecker.PERMANENT_FUNCTIONS_LIST))); for (String udfClass : allowedUdfs) { udfStream.println(udfClass); } udfStream.close(); return null; } }; String java_home; if (options.getJavaPath() == null || options.getJavaPath().isEmpty()) { java_home = System.getenv("JAVA_HOME"); String jre_home = System.getProperty("java.home"); if (java_home == null) { java_home = jre_home; } else if (!java_home.equals(jre_home)) { LOG.warn("Java versions might not match : JAVA_HOME=[{}],process jre=[{}]", java_home, jre_home); } } else { java_home = options.getJavaPath(); } if (java_home == null || java_home.isEmpty()) { throw new RuntimeException( "Could not determine JAVA_HOME from command line parameters, environment or system properties"); } LOG.info("Using [{}] for JAVA_HOME", java_home); NamedCallable<Void> copyConfigs = new NamedCallable<Void>("copyConfigs") { @Override public Void call() throws Exception { // Copy over the mandatory configs for the package. for (String f : NEEDED_CONFIGS) { copyConfig(lfs, confPath, f); } for (String f : OPTIONAL_CONFIGS) { try { copyConfig(lfs, confPath, f); } catch (Throwable t) { LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage()); } } createLlapDaemonConfig(lfs, confPath, conf, propsDirectOptions, options.getConfig()); setUpLogAndMetricConfigs(lfs, logger, confPath); return null; } }; @SuppressWarnings("unchecked") final NamedCallable<Void>[] asyncWork = new NamedCallable[] { downloadTez, copyUdfJars, copyLocalJars, copyAuxJars, copyConfigs }; @SuppressWarnings("unchecked") final Future<Void>[] asyncResults = new Future[asyncWork.length]; for (int i = 0; i < asyncWork.length; i++) { asyncResults[i] = asyncRunner.submit(asyncWork[i]); } // TODO: need to move from Python to Java for the rest of the script. JSONObject configs = createConfigJson(containerSize, cache, xmx, java_home); writeConfigJson(tmpDir, lfs, configs); if (LOG.isDebugEnabled()) { LOG.debug("Config generation took " + (System.nanoTime() - t0) + " ns"); } for (int i = 0; i < asyncWork.length; i++) { final long t1 = System.nanoTime(); asyncResults[i].get(); final long t2 = System.nanoTime(); if (LOG.isDebugEnabled()) { LOG.debug(asyncWork[i].getName() + " waited for " + (t2 - t1) + " ns"); } } if (options.isStarting()) { String version = System.getenv("HIVE_VERSION"); if (version == null || version.isEmpty()) { version = DateTime.now().toString("ddMMMyyyy"); } String outputDir = options.getOutput(); Path packageDir = null; if (outputDir == null) { outputDir = OUTPUT_DIR_PREFIX + version; packageDir = new Path(Paths.get(".").toAbsolutePath().toString(), OUTPUT_DIR_PREFIX + version); } else { packageDir = new Path(outputDir); } rc = runPackagePy(args, tmpDir, scriptParent, version, outputDir); if (rc == 0) { LlapSliderUtils.startCluster(conf, options.getName(), "llap-" + version + ".zip", packageDir, HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME)); } } else { rc = 0; } } finally { executor.shutdown(); lfs.close(); fs.close(); } if (rc == 0) { if (LOG.isDebugEnabled()) { LOG.debug("Exiting successfully"); } } else { LOG.info("Exiting with rc = " + rc); } return rc; } private int runPackagePy(String[] args, Path tmpDir, Path scriptParent, String version, String outputDir) throws IOException, InterruptedException { Path scriptPath = new Path(new Path(scriptParent, "slider"), "package.py"); List<String> scriptArgs = new ArrayList<>(args.length + 7); scriptArgs.add("python"); scriptArgs.add(scriptPath.toString()); scriptArgs.add("--input"); scriptArgs.add(tmpDir.toString()); scriptArgs.add("--output"); scriptArgs.add(outputDir); scriptArgs.add("--javaChild"); for (String arg : args) { scriptArgs.add(arg); } LOG.debug("Calling package.py via: " + scriptArgs); ProcessBuilder builder = new ProcessBuilder(scriptArgs); builder.redirectError(ProcessBuilder.Redirect.INHERIT); builder.redirectOutput(ProcessBuilder.Redirect.INHERIT); builder.environment().put("HIVE_VERSION", version); return builder.start().waitFor(); } private void writeConfigJson(Path tmpDir, final FileSystem lfs, JSONObject configs) throws IOException, JSONException { FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json")); OutputStreamWriter w = new OutputStreamWriter(os); configs.write(w); w.close(); os.close(); } private JSONObject createConfigJson(long containerSize, long cache, long xmx, String java_home) throws JSONException { // extract configs for processing by the python fragments in Slider JSONObject configs = new JSONObject(); configs.put("java.home", java_home); configs.put(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB)); configs.put(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); configs.put(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, HiveConf.getSizeVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE)); configs.put(HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname, HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)); configs.put(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB)); configs.put(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE)); configs.put(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS)); // Let YARN pick the queue name, if it isn't provided in hive-site, or via the command-line if (HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME) != null) { configs.put(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME)); } // Propagate the cluster name to the script. String clusterHosts = HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS); if (!StringUtils.isEmpty(clusterHosts) && clusterHosts.startsWith("@") && clusterHosts.length() > 1) { configs.put(CONFIG_CLUSTER_NAME, clusterHosts.substring(1)); } configs.put(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); configs.put(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1)); long maxDirect = (xmx > 0 && cache > 0 && xmx < cache * 1.25) ? (long) (cache * 1.25) : -1; configs.put("max_direct_memory", Long.toString(maxDirect)); return configs; } private Set<String> downloadPermanentFunctions(Configuration conf, Path udfDir) throws HiveException, URISyntaxException, IOException { Map<String,String> udfs = new HashMap<String, String>(); HiveConf hiveConf = new HiveConf(); // disable expensive operations on the metastore hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_INIT_METADATA_COUNT_ENABLED, false); hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_METRICS, false); // performance problem: ObjectStore does its own new HiveConf() Hive hive = Hive.getWithFastCheck(hiveConf, false); ResourceDownloader resourceDownloader = new ResourceDownloader(conf, udfDir.toUri().normalize().getPath()); List<Function> fns = hive.getAllFunctions(); Set<URI> srcUris = new HashSet<>(); for (Function fn : fns) { String fqfn = fn.getDbName() + "." + fn.getFunctionName(); if (udfs.containsKey(fn.getClassName())) { LOG.warn("Duplicate function names found for " + fn.getClassName() + " with " + fqfn + " and " + udfs.get(fn.getClassName())); } udfs.put(fn.getClassName(), fqfn); List<ResourceUri> resources = fn.getResourceUris(); if (resources == null || resources.isEmpty()) { LOG.warn("Missing resources for " + fqfn); continue; } for (ResourceUri resource : resources) { srcUris.add(ResourceDownloader.createURI(resource.getUri())); } } for (URI srcUri : srcUris) { List<URI> localUris = resourceDownloader.downloadExternal(srcUri, null, false); for(URI dst : localUris) { LOG.warn("Downloaded " + dst + " from " + srcUri); } } return udfs.keySet(); } private void localizeJarForClass(FileSystem lfs, Path libDir, String className, boolean doThrow) throws IOException { String jarPath = null; boolean hasException = false; try { Class<?> auxClass = Class.forName(className); jarPath = Utilities.jarFinderGetJar(auxClass); } catch (Throwable t) { if (doThrow) { throw (t instanceof IOException) ? (IOException)t : new IOException(t); } hasException = true; String err = "Cannot find a jar for [" + className + "] due to an exception (" + t.getMessage() + "); not packaging the jar"; LOG.error(err); System.err.println(err); } if (jarPath != null) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } else if (!hasException) { String err = "Cannot find a jar for [" + className + "]; not packaging the jar"; if (doThrow) { throw new IOException(err); } LOG.error(err); System.err.println(err); } } /** * * @param lfs filesystem on which file will be generated * @param confPath path wher the config will be generated * @param configured the base configuration instances * @param direct properties specified directly - i.e. using the properties exact option * @param hiveconf properties specifried via --hiveconf * @throws IOException */ private void createLlapDaemonConfig(FileSystem lfs, Path confPath, Configuration configured, Properties direct, Properties hiveconf) throws IOException { FSDataOutputStream confStream = lfs.create(new Path(confPath, LlapDaemonConfiguration.LLAP_DAEMON_SITE)); Configuration llapDaemonConf = resolve(configured, direct, hiveconf); llapDaemonConf.writeXml(confStream); confStream.close(); } private void copyConfig(FileSystem lfs, Path confPath, String f) throws IOException { HiveConf.getBoolVar(new Configuration(false), ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS); // they will be file:// URLs lfs.copyFromLocalFile(new Path(conf.getResource(f).toString()), confPath); } private void setUpLogAndMetricConfigs(final FileSystem lfs, final URL logger, final Path confPath) throws IOException { // logger can be a resource stream or a real file (cannot use copy) InputStream loggerContent = logger.openStream(); IOUtils.copyBytes(loggerContent, lfs.create(new Path(confPath, "llap-daemon-log4j2.properties"), true), conf, true); String metricsFile = LlapConstants.LLAP_HADOOP_METRICS2_PROPERTIES_FILE; URL metrics2 = conf.getResource(metricsFile); if (metrics2 == null) { LOG.warn(LlapConstants.LLAP_HADOOP_METRICS2_PROPERTIES_FILE + " cannot be found." + " Looking for " + LlapConstants.HADOOP_METRICS2_PROPERTIES_FILE); metricsFile = LlapConstants.HADOOP_METRICS2_PROPERTIES_FILE; metrics2 = conf.getResource(metricsFile); } if (metrics2 != null) { InputStream metrics2FileStream = metrics2.openStream(); IOUtils.copyBytes(metrics2FileStream, lfs.create(new Path(confPath, metricsFile), true), conf, true); LOG.info("Copied hadoop metrics2 properties file from " + metrics2); } else { LOG.warn("Cannot find " + LlapConstants.LLAP_HADOOP_METRICS2_PROPERTIES_FILE + " or " + LlapConstants.HADOOP_METRICS2_PROPERTIES_FILE + " in classpath."); } } }