/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.filecache; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; import java.net.URLClassLoader; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.TrackerDistributedCacheManager.CacheStatus; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobLocalizer; /** * Helper class of {@link TrackerDistributedCacheManager} that represents * the cached files of a single job. * * <b>This class is internal to Hadoop, and should not be treated as a public * interface.</b> */ public class TaskDistributedCacheManager { private final TrackerDistributedCacheManager distributedCacheManager; private final List<CacheFile> cacheFiles = new ArrayList<CacheFile>(); private final List<String> classPaths = new ArrayList<String>(); private boolean setupCalled = false; /** * Struct representing a single cached file. * There are four permutations (archive, file) and * (don't put in classpath, do put in classpath). */ static class CacheFile { /** URI as in the configuration */ final URI uri; enum FileType { REGULAR, ARCHIVE } boolean isPublic = true; /** Whether to decompress */ final FileType type; final long timestamp; /** Whether this is to be added to the classpath */ final boolean shouldBeAddedToClassPath; boolean localized = false; /** The owner of the localized file. Relevant only on the tasktrackers */ final String owner; private CacheStatus status; CacheFile(URI uri, FileType type, boolean isPublic, long timestamp, boolean classPath) throws IOException { this.uri = uri; this.type = type; this.isPublic = isPublic; this.timestamp = timestamp; this.shouldBeAddedToClassPath = classPath; this.owner = TrackerDistributedCacheManager.getLocalizedCacheOwner(isPublic); } /** * Set the status for this cache file. * @param status */ public void setStatus(CacheStatus status) { this.status = status; } /** * Get the status for this cache file. * @return the status object */ public CacheStatus getStatus() { return status; } /** * Converts the scheme used by DistributedCache to serialize what files to * cache in the configuration into CacheFile objects that represent those * files. */ private static List<CacheFile> makeCacheFiles(URI[] uris, long[] timestamps, boolean cacheVisibilities[], Path[] paths, FileType type) throws IOException { List<CacheFile> ret = new ArrayList<CacheFile>(); if (uris != null) { if (uris.length != timestamps.length) { throw new IllegalArgumentException("Mismatched uris and timestamps."); } Map<String, Path> classPaths = new HashMap<String, Path>(); if (paths != null) { for (Path p : paths) { classPaths.put(p.toUri().getPath().toString(), p); } } for (int i = 0; i < uris.length; ++i) { URI u = uris[i]; boolean isClassPath = (null != classPaths.get(u.getPath())); ret.add(new CacheFile(u, type, cacheVisibilities[i], timestamps[i], isClassPath)); } } return ret; } boolean getLocalized() { return localized; } void setLocalized(boolean val) { localized = val; } } TaskDistributedCacheManager( TrackerDistributedCacheManager distributedCacheManager, Configuration taskConf) throws IOException { this.distributedCacheManager = distributedCacheManager; this.cacheFiles.addAll( CacheFile.makeCacheFiles(DistributedCache.getCacheFiles(taskConf), DistributedCache.getFileTimestamps(taskConf), TrackerDistributedCacheManager.getFileVisibilities(taskConf), DistributedCache.getFileClassPaths(taskConf), CacheFile.FileType.REGULAR)); this.cacheFiles.addAll( CacheFile.makeCacheFiles(DistributedCache.getCacheArchives(taskConf), DistributedCache.getArchiveTimestamps(taskConf), TrackerDistributedCacheManager.getArchiveVisibilities(taskConf), DistributedCache.getArchiveClassPaths(taskConf), CacheFile.FileType.ARCHIVE)); } /** * Retrieve public distributed cache files into the local cache and updates * the task configuration (which has been passed in via the constructor). * The private distributed cache is just looked at and the paths where the * files/archives should go to is decided here. The actual localization is * done by {@link JobLocalizer}. * * It is the caller's responsibility to re-write the task configuration XML * file, if necessary. */ public void setupCache(Configuration taskConf, String publicCacheSubdir, String privateCacheSubdir) throws IOException { setupCalled = true; ArrayList<Path> localArchives = new ArrayList<Path>(); ArrayList<Path> localFiles = new ArrayList<Path>(); for (CacheFile cacheFile : cacheFiles) { URI uri = cacheFile.uri; FileSystem fileSystem = FileSystem.get(uri, taskConf); FileStatus fileStatus = fileSystem.getFileStatus(new Path(uri.getPath())); Path p; if (cacheFile.isPublic) { p = distributedCacheManager.getLocalCache(uri, taskConf, publicCacheSubdir, fileStatus, cacheFile.type == CacheFile.FileType.ARCHIVE, cacheFile.timestamp, cacheFile.isPublic, cacheFile); } else { p = distributedCacheManager.getLocalCache(uri, taskConf, privateCacheSubdir, fileStatus, cacheFile.type == CacheFile.FileType.ARCHIVE, cacheFile.timestamp, cacheFile.isPublic, cacheFile); } cacheFile.setLocalized(true); if (cacheFile.type == CacheFile.FileType.ARCHIVE) { localArchives.add(p); } else { localFiles.add(p); } if (cacheFile.shouldBeAddedToClassPath) { classPaths.add(p.toString()); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { DistributedCache.addLocalArchives(taskConf, stringifyPathList(localArchives)); } if (!localFiles.isEmpty()) { DistributedCache.addLocalFiles(taskConf, stringifyPathList(localFiles)); } } /* * This method is called from unit tests. */ List<CacheFile> getCacheFiles() { return cacheFiles; } private static String stringifyPathList(List<Path> p){ if (p == null || p.isEmpty()) { return null; } StringBuilder str = new StringBuilder(p.get(0).toString()); for (int i = 1; i < p.size(); i++){ str.append(","); str.append(p.get(i).toString()); } return str.toString(); } /** * Retrieves class paths (as local references) to add. * Should be called after setup(). * */ public List<String> getClassPaths() throws IOException { if (!setupCalled) { throw new IllegalStateException( "getClassPaths() should be called after setup()"); } return classPaths; } /** * Releases the cached files/archives, so that space * can be reclaimed by the {@link TrackerDistributedCacheManager}. */ public void release() throws IOException { for (CacheFile c : cacheFiles) { if (c.getLocalized() && c.status != null) { distributedCacheManager.releaseCache(c.status); } } } public void setSizes(long[] sizes) throws IOException { int i = 0; for (CacheFile c: cacheFiles) { if (!c.isPublic && c.status != null) { distributedCacheManager.setSize(c.status, sizes[i]); } i++; } } /** * Creates a class loader that includes the designated * files and archives. */ public ClassLoader makeClassLoader(final ClassLoader parent) throws MalformedURLException { final URL[] urls = new URL[classPaths.size()]; for (int i = 0; i < classPaths.size(); ++i) { urls[i] = new File(classPaths.get(i)).toURI().toURL(); } return AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { @Override public ClassLoader run() { return new URLClassLoader(urls, parent); } }); } }