/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.llap.daemon.impl;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.ResourceUri;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.FunctionTask;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionResource;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.util.ResourceDownloader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class localizes and manages jars for the functions allowed inside LLAP.
*/
public class FunctionLocalizer implements GenericUDFBridge.UdfWhitelistChecker {
private static final String DIR_NAME = "fnresources";
private static final Logger LOG = LoggerFactory.getLogger(FunctionLocalizer.class);
private ResourceDownloader resourceDownloader;
private final LinkedBlockingQueue<LocalizerWork> workQueue = new LinkedBlockingQueue<>();
private volatile boolean isClosed = false;
private final List<String> recentlyLocalizedJars = new LinkedList<String>();
private final List<String> recentlyLocalizedClasses = new LinkedList<String>();
private final Thread workThread;
private final File localDir;
private final Configuration conf;
private final URLClassLoader executorClassloader;
private final IdentityHashMap<Class<?>, Boolean> allowedUdfClasses = new IdentityHashMap<>();
private final ConcurrentHashMap<String, FnResources> resourcesByFn = new ConcurrentHashMap<>();
private final ConcurrentHashMap<URI, RefCountedResource> localFiles = new ConcurrentHashMap<>();
public FunctionLocalizer(Configuration conf, String localDir) {
this.conf = conf;
this.localDir = new File(localDir, DIR_NAME);
this.executorClassloader = (URLClassLoader)Utilities.createUDFClassLoader(
(URLClassLoader)Thread.currentThread().getContextClassLoader(), new String[]{});
this.workThread = new Thread(new Runnable() {
@Override
public void run() {
runWorkThread();
}
});
}
public void init() throws IOException {
if (localDir.exists()) {
// TODO: We don't want some random jars of unknown provenance sitting around. Or do we care?
// Ideally, we should try to reuse jars and verify using some checksum.
FileUtils.deleteDirectory(localDir);
}
this.resourceDownloader = new ResourceDownloader(conf, localDir.getAbsolutePath());
workThread.start();
}
public boolean isUdfAllowed(Class<?> clazz) {
return FunctionRegistry.isBuiltInFuncClass(clazz) || allowedUdfClasses.containsKey(clazz);
}
public ClassLoader getClassLoader() {
return executorClassloader;
}
public void startLocalizeAllFunctions() throws HiveException {
Hive hive = Hive.get(false);
// Do not allow embedded metastore in LLAP unless we are in test.
try {
hive.getMSC(HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST), true);
} catch (MetaException e) {
throw new HiveException(e);
}
List<Function> fns = hive.getAllFunctions();
for (Function fn : fns) {
String fqfn = fn.getDbName() + "." + fn.getFunctionName();
List<ResourceUri> resources = fn.getResourceUris();
if (resources == null || resources.isEmpty()) continue; // Nothing to localize.
FnResources result = new FnResources();
resourcesByFn.put(fqfn, result);
workQueue.add(new LocalizeFn(fqfn, resources, result, fn.getClassName(), false));
}
workQueue.add(new RefreshClassloader());
}
public void close() {
isClosed = true;
workThread.interrupt();
try {
workThread.join(1000); // Give it some time, then don't delay shutdown too much.
} catch (InterruptedException e) {
LOG.info("Interrupted during close");
}
}
private void runWorkThread() {
while (true) {
if (isClosed) {
deleteAllLocalResources();
return;
}
LocalizerWork lw = null;
try {
lw = workQueue.take();
} catch (InterruptedException ex) {
LOG.debug("Localizer thread interrupted");
isClosed = true;
}
if (isClosed) {
deleteAllLocalResources();
return;
}
try {
lw.run(this);
} catch (InterruptedException ex) {
LOG.debug("Localizer thread interrupted");
isClosed = true;
} catch (Exception ex) {
LOG.error("Failed to run " + lw, ex);
}
}
}
private interface LocalizerWork {
void run(FunctionLocalizer parent)
throws URISyntaxException, IOException, InterruptedException;
}
private static class LocalizeFn implements LocalizerWork {
private final List<ResourceUri> resources;
private final FnResources result;
private final String fqfn;
private final boolean doRefreshClassloader;
private final String className;
public LocalizeFn(String fqfn, List<ResourceUri> resources, FnResources result,
String className, boolean doRefreshClassloader) {
this.resources = resources;
this.result = result;
this.fqfn = fqfn;
this.className = className;
this.doRefreshClassloader = doRefreshClassloader;
}
public void run(FunctionLocalizer parent) throws URISyntaxException, IOException {
parent.localizeFunctionResources(fqfn, resources, className, result, doRefreshClassloader);
}
public String toString() {
return "localize " + resources.size() + " resources for " + fqfn;
}
}
private static class RefreshClassloader implements LocalizerWork {
public void run(FunctionLocalizer parent) throws URISyntaxException, IOException {
parent.refreshClassloader();
}
public String toString() {
return "load the recently localized jars";
}
}
private void deleteAllLocalResources() {
try {
executorClassloader.close();
} catch (Exception ex) {
LOG.info("Failed to close the classloader", ex.getMessage());
}
resourcesByFn.clear();
for (RefCountedResource rcr : localFiles.values()) {
for (FunctionResource fr : rcr.resources) {
// We ignore refcounts (and errors) for now.
File file = new File(fr.getResourceURI());
try {
if (!file.delete()) {
LOG.info("Failed to delete " + file);
}
} catch (Exception ex) {
LOG.info("Failed to delete " + file + ": " + ex.getMessage());
}
}
}
}
public void refreshClassloader() throws IOException {
if (recentlyLocalizedJars.isEmpty()) return;
String[] jars = recentlyLocalizedJars.toArray(new String[0]);
recentlyLocalizedJars.clear();
ClassLoader updatedCl = null;
try {
updatedCl = Utilities.addToClassPath(executorClassloader, jars);
if (LOG.isInfoEnabled()) {
LOG.info("Added " + jars.length + " jars to classpath");
}
} catch (Throwable t) {
// TODO: we could fall back to trying one by one and only ignore the failed ones.
logRefreshError("Unable to localize jars: ", jars, t);
return; // logRefreshError always throws.
}
if (updatedCl != executorClassloader) {
throw new AssertionError("Classloader was replaced despite using UDFClassLoader: new "
+ updatedCl + ", old " + executorClassloader);
}
String[] classNames = recentlyLocalizedClasses.toArray(jars);
recentlyLocalizedClasses.clear();
try {
for (String className : classNames) {
allowedUdfClasses.put(Class.forName(className, false, executorClassloader), Boolean.TRUE);
}
} catch (Throwable t) {
// TODO: we could fall back to trying one by one and only ignore the failed ones.
logRefreshError("Unable to instantiate localized classes: ", classNames, t);
return; // logRefreshError always throws.
}
}
private void logRefreshError(String what, String[] items, Throwable t) throws IOException {
for (String item : items) {
what += (item + ", ");
}
throw new IOException(what, t);
}
private void localizeFunctionResources(String fqfn, List<ResourceUri> resources,
String className, FnResources result, boolean doRefreshClassloader) throws URISyntaxException, IOException {
// We will download into fn-scoped subdirectories to avoid name collisions (we assume there
// are no collisions within the same fn). That doesn't mean we download for every fn.
if (LOG.isInfoEnabled()) {
LOG.info("Localizing " + resources.size() + " resources for " + fqfn);
}
for (ResourceUri resource : resources) {
URI srcUri = ResourceDownloader.createURI(resource.getUri());
ResourceType rt = FunctionTask.getResourceType(resource.getResourceType());
localizeOneResource(fqfn, srcUri, rt, result);
}
recentlyLocalizedClasses.add(className);
if (doRefreshClassloader) {
refreshClassloader();
}
}
private void localizeOneResource(String fqfn, URI srcUri, ResourceType rt, FnResources result)
throws URISyntaxException, IOException {
RefCountedResource rcr = localFiles.get(srcUri);
if (rcr != null && rcr.refCount > 0) {
logFilesUsed("Reusing", fqfn, srcUri, rcr);
++rcr.refCount;
result.addResources(rcr);
return;
}
rcr = new RefCountedResource();
List<URI> localUris = resourceDownloader.downloadExternal(srcUri, fqfn, false);
if (localUris == null || localUris.isEmpty()) {
LOG.error("Cannot download " + srcUri + " for " + fqfn);
return;
}
rcr.resources = new ArrayList<>();
for (URI uri : localUris) {
// Reuse the same type for all. Only Ivy can return more than one, probably all jars.
String path = uri.getPath();
rcr.resources.add(new FunctionResource(rt, path));
if (rt == ResourceType.JAR) {
recentlyLocalizedJars.add(path);
}
}
++rcr.refCount;
logFilesUsed("Using", fqfn, srcUri, rcr);
localFiles.put(srcUri, rcr);
result.addResources(rcr);
}
private void logFilesUsed(String what, String fqfn, URI srcUri, RefCountedResource rcr) {
if (!LOG.isInfoEnabled()) return;
String desc = (rcr.resources.size() == 1
? rcr.resources.get(0).toString() : (rcr.resources.size() + " files"));
LOG.info(what + " files [" + desc + "] for [" + srcUri + "] resource for " + fqfn);
}
private static class RefCountedResource {
List<FunctionResource> resources;
int refCount = 0;
}
private static class FnResources {
final List<FunctionResource> localResources = new ArrayList<>();
final List<RefCountedResource> originals = new ArrayList<>();
public void addResources(RefCountedResource rcr) {
localResources.addAll(rcr.resources);
originals.add(rcr);
}
}
}