/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.utils.DirUtils; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.filecache.DistributedCache; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.FSDownload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicLong; /** * A helper class for managing the distributed cache for {@link LocalJobRunner}. * * CDAP fix is applied on the ClassLoader so that it doesn't keep opened file when the ClassLoader * is pending for GC. */ @SuppressWarnings("deprecation") class LocalDistributedCacheManagerWithFix { public static final Logger LOG = LoggerFactory.getLogger(LocalDistributedCacheManagerWithFix.class); private List<String> localArchives = new ArrayList<>(); private List<String> localFiles = new ArrayList<>(); private List<String> localClasspaths = new ArrayList<>(); private List<File> jarExpandDirs = new ArrayList<>(); private List<File> symlinksCreated = new ArrayList<>(); private boolean setupCalled = false; private JobID jobId; public LocalDistributedCacheManagerWithFix(JobID jobId) { this.jobId = jobId; } /** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf * @throws IOException */ public void setup(JobConf conf) throws IOException { File workDir = new File(new File(conf.get(Constants.CFG_LOCAL_DATA_DIR)), conf.get(Constants.AppFabric.OUTPUT_DIR)); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<>(); MRApps.setupDistributedCache(conf, localResources); // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManagerWithFix Downloader #%d") .build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, jobId.toString() + "_" + Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException("Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info("Localized {} as {}", resourcePath, path); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils .arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils .arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; } /** * Utility method for creating a symlink and warning on errors. * * If link is null, does nothing. */ private void symlink(File workDir, String target, String link) throws IOException { if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { LOG.info("Creating symlink: {} <- {}", target, link); if (0 != FileUtil.symLink(target, link)) { LOG.warn("Failed to create symlink: {} <- {}", target, link); } else { symlinksCreated.add(new File(link)); } } } } /** * Are the resources that should be added to the classpath? * Should be calle after setup(). * */ public boolean hasLocalClasspaths() { if (!setupCalled) { throw new IllegalStateException( "hasLocalClasspaths() should be called after setup()"); } return !localClasspaths.isEmpty(); } /** * Creates a class loader that includes the designated * files and archives. * * Cask fix : The ClassLoader has been setup through the MapReduceRuntimeService already. * Hence just return the parent. */ public ClassLoader makeClassLoader(final ClassLoader parent) throws MalformedURLException { return parent; } public void close() throws IOException { for (File symlink : symlinksCreated) { if (!symlink.delete()) { LOG.warn("Failed to delete symlink created by the local job runner: {}", symlink); } } FileContext localFSFileContext = FileContext.getLocalFSFileContext(); for (String archive : localArchives) { localFSFileContext.delete(new Path(archive), true); } for (String file : localFiles) { localFSFileContext.delete(new Path(file), true); } for (File dir : jarExpandDirs) { try { DirUtils.deleteDirectoryContents(dir); } catch (IOException e) { LOG.warn("Failed to delete jar directory " + dir); } } } }