/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.spark; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.utils.DirUtils; import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource; import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.PathMatcher; import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; /** * A utility class to help determine Spark supports and locating Spark jar. * TODO: CDAP-5506. Ideally this class shouldn't be in app-fabric, but should be in spark-core. * */ public final class SparkUtils { private static final Logger LOG = LoggerFactory.getLogger(SparkUtils.class); // Environment variable name for locating spark assembly jar file private static final String SPARK_ASSEMBLY_JAR = "SPARK_ASSEMBLY_JAR"; // Environment variable name for locating spark home directory private static final String SPARK_HOME = Constants.SPARK_HOME; // File name of the Spark conf directory as defined by the Spark framework // This is for the Hack to workaround CDAP-5019 (SPARK-13441) public static final String LOCALIZED_CONF_DIR = "__spark_conf__"; private static File sparkAssemblyJar; /** * Locates the spark-assembly jar from the local file system. * * @return the spark-assembly jar location * @throws IllegalStateException if cannot locate the spark assembly jar */ public static synchronized File locateSparkAssemblyJar() { if (sparkAssemblyJar != null) { return sparkAssemblyJar; } // If someone explicitly set the location, use it. // It's useful for overridding what being set for SPARK_HOME String jarEnv = System.getenv(SPARK_ASSEMBLY_JAR); if (jarEnv != null) { File file = new File(jarEnv); if (file.isFile()) { LOG.info("Located Spark Assembly JAR in {}", file); sparkAssemblyJar = file; return file; } LOG.warn("Env $" + SPARK_ASSEMBLY_JAR + "=" + jarEnv + " is not a file. " + "Will locate Spark Assembly JAR with $" + SPARK_HOME); } String sparkHome = System.getenv(SPARK_HOME); if (sparkHome == null) { throw new IllegalStateException("Spark library not found. " + "Please set environment variable " + SPARK_HOME + " or " + SPARK_ASSEMBLY_JAR); } // Look for spark-assembly.jar symlink Path assemblyJar = Paths.get(sparkHome, "lib", "spark-assembly.jar"); if (Files.isSymbolicLink(assemblyJar)) { sparkAssemblyJar = assemblyJar.toFile(); return sparkAssemblyJar; } // No symbolic link exists. Search for spark-assembly*.jar in the lib directory Path sparkLib = Paths.get(sparkHome, "lib"); final PathMatcher pathMatcher = sparkLib.getFileSystem().getPathMatcher("glob:spark-assembly*.jar"); try { Files.walkFileTree(sparkLib, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { // Take the first file match if (attrs.isRegularFile() && pathMatcher.matches(file.getFileName())) { sparkAssemblyJar = file.toFile(); return FileVisitResult.TERMINATE; } return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { // Ignore error return FileVisitResult.CONTINUE; } }); } catch (IOException e) { // Just log, don't throw. // If we already located the Spark Assembly jar during visiting, we can still use the jar. LOG.warn("Exception raised while inspecting {}", sparkLib, e); } Preconditions.checkState(sparkAssemblyJar != null, "Failed to locate Spark library from %s", sparkHome); LOG.info("Located Spark Assembly JAR in {}", sparkAssemblyJar); return sparkAssemblyJar; } /** * Prepares the resources that need to be localized to the Spark client container. * * @param tempDir a temporary directory for file creation * @param localizeResources A map from localized name to {@link LocalizeResource} for this method to update * @return localized name of the Spark assembly jar file */ public static String prepareSparkResources(File tempDir, Map<String, LocalizeResource> localizeResources) { File sparkAssemblyJar = locateSparkAssemblyJar(); localizeResources.put(sparkAssemblyJar.getName(), new LocalizeResource(sparkAssemblyJar)); // Shallow copy all files under directory defined by $HADOOP_CONF_DIR // If $HADOOP_CONF_DIR is not defined, use the location of "yarn-site.xml" to determine the directory // This is part of workaround for CDAP-5019 (SPARK-13441). File hadoopConfDir = null; if (System.getenv().containsKey(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())) { hadoopConfDir = new File(System.getenv(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())); } else { URL yarnSiteLocation = SparkUtils.class.getClassLoader().getResource("yarn-site.xml"); if (yarnSiteLocation != null) { try { hadoopConfDir = new File(yarnSiteLocation.toURI()).getParentFile(); } catch (URISyntaxException e) { // Shouldn't happen LOG.warn("Failed to derive HADOOP_CONF_DIR from yarn-site.xml"); } } } if (hadoopConfDir != null && hadoopConfDir.isDirectory()) { try { final File targetFile = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", tempDir); try ( ZipOutputStream zipOutput = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetFile))) ) { for (File file : DirUtils.listFiles(hadoopConfDir)) { // Shallow copy of files under the hadoop conf dir. Ignore files that cannot be read if (file.isFile() && file.canRead()) { zipOutput.putNextEntry(new ZipEntry(file.getName())); Files.copy(file.toPath(), zipOutput); } } } localizeResources.put(LOCALIZED_CONF_DIR, new LocalizeResource(targetFile, true)); } catch (IOException e) { LOG.warn("Failed to create archive from {}", hadoopConfDir, e); } } return sparkAssemblyJar.getName(); } private SparkUtils() { } }