/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.app.runtime.spark; import co.cask.cdap.api.spark.JavaSparkExecutionContext; import co.cask.cdap.api.spark.Spark; import co.cask.cdap.api.spark.SparkClientContext; import co.cask.cdap.api.spark.SparkExecutionContext; import co.cask.cdap.common.lang.ClassLoaders; import co.cask.cdap.common.lang.CombineClassLoader; import co.cask.cdap.common.lang.ProgramClassLoader; import co.cask.cdap.internal.app.runtime.plugin.PluginClassLoaders; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import org.apache.hadoop.yarn.api.ApplicationConstants; import java.io.File; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.annotation.Nullable; /** * ClassLoader being used in Spark execution context. It is used in driver as well as in executor node. * It load classes from {@link ProgramClassLoader} followed by Plugin classes and then CDAP system ClassLoader. */ public class SparkClassLoader extends CombineClassLoader { private final SparkRuntimeContext runtimeContext; private final SparkExecutionContextFactory contextFactory; /** * Finds the SparkClassLoader from the context ClassLoader hierarchy. * * @return the SparkClassLoader found * @throws IllegalStateException if no SparkClassLoader was found */ public static SparkClassLoader findFromContext() { ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); SparkClassLoader sparkClassLoader = ClassLoaders.find(contextClassLoader, SparkClassLoader.class); // Should found the Spark ClassLoader Preconditions.checkState(sparkClassLoader != null, "Cannot find SparkClassLoader from context ClassLoader %s", contextClassLoader); return sparkClassLoader; } /** * Creates a new SparkClassLoader from the execution context. It should only be called in distributed mode. */ public static SparkClassLoader create() { return new SparkClassLoader(SparkRuntimeContextProvider.get(), createSparkExecutionContextFactory()); } /** * Creates a new SparkClassLoader from the given {@link SparkRuntimeContext} without the ability to create * {@link SparkExecutionContext}. It is used in {@link Spark#beforeSubmit(SparkClientContext)} and * {@link Spark#onFinish(boolean, SparkClientContext)} methods. */ public SparkClassLoader(SparkRuntimeContext runtimeContext) { this(runtimeContext, null); } /** * Creates a new SparkClassLoader with the given {@link SparkRuntimeContext}. */ public SparkClassLoader(SparkRuntimeContext runtimeContext, @Nullable SparkExecutionContextFactory contextFactory) { super(null, createDelegateClassLoaders(runtimeContext)); this.runtimeContext = runtimeContext; this.contextFactory = contextFactory; } /** * Returns the program ClassLoader. */ public ClassLoader getProgramClassLoader() { return runtimeContext.getProgram().getClassLoader(); } /** * Returns the {@link SparkRuntimeContext}. */ public SparkRuntimeContext getRuntimeContext() { return runtimeContext; } /** * Creates a new instance of {@link SparkExecutionContext}. */ public SparkExecutionContext createExecutionContext() { if (contextFactory == null) { // This shouldn't happen, but to safeguard throw new IllegalStateException("Creation of SparkExecutionContext is not allowed in the current context."); } return contextFactory.create(runtimeContext); } /** * Creates a new instance of {@link JavaSparkExecutionContext} by wrapping the given {@link SparkExecutionContext}. */ public JavaSparkExecutionContext createJavaExecutionContext(SparkExecutionContext sec) { return new DefaultJavaSparkExecutionContext(sec); } /** * Creates the delegating list of ClassLoader. Used by constructor only. */ private static List<ClassLoader> createDelegateClassLoaders(SparkRuntimeContext context) { return Arrays.asList( context.getProgram().getClassLoader(), PluginClassLoaders.createFilteredPluginsClassLoader(context.getApplicationSpecification().getPlugins(), context.getPluginInstantiator()), SparkClassLoader.class.getClassLoader() ); } /** * Creates a {@link SparkExecutionContextFactory} to be used in distributed mode. This method only gets called * from the driver node if running in yarn-cluster mode. */ private static SparkExecutionContextFactory createSparkExecutionContextFactory() { return new SparkExecutionContextFactory() { @Override public SparkExecutionContext create(SparkRuntimeContext runtimeContext) { SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(runtimeContext.getConfiguration()); Map<String, File> localizeResources = new HashMap<>(); for (String name : contextConfig.getLocalizedResourceNames()) { // In distributed mode, files will be localized to the container local directory localizeResources.put(name, new File(name)); } // Try to determine the hostname from the NM_HOST environment variable, which is set by NM String host = System.getenv(ApplicationConstants.Environment.NM_HOST.key()); if (host == null) { // If it is missing, use the current hostname try { host = InetAddress.getLocalHost().getCanonicalHostName(); } catch (UnknownHostException e) { // Nothing much we can do. Just throw exception since // we need the hostname to start the SparkTransactionService throw Throwables.propagate(e); } } return new DefaultSparkExecutionContext(runtimeContext, localizeResources, host); } }; } }