/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.app.runtime.spark; import co.cask.cdap.app.runtime.ProgramRunner; import co.cask.cdap.app.runtime.ProgramRuntimeProvider; import co.cask.cdap.app.runtime.spark.distributed.DistributedSparkProgramRunner; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.lang.ClassLoaders; import co.cask.cdap.proto.ProgramType; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.io.Closeables; import com.google.inject.Inject; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.ProvisionException; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.Type; import java.net.URL; /** * A {@link ProgramRuntimeProvider} that provides runtime system support for {@link ProgramType#SPARK} program. * This class shouldn't have dependency on Spark classes. */ @ProgramRuntimeProvider.SupportedProgramType(ProgramType.SPARK) public class SparkProgramRuntimeProvider implements ProgramRuntimeProvider { private URL[] classLoaderUrls; @Override public ProgramRunner createProgramRunner(ProgramType type, Mode mode, Injector injector) { Preconditions.checkArgument(type == ProgramType.SPARK, "Unsupported program type %s. Only %s is supported", type, ProgramType.SPARK); switch (mode) { case LOCAL: // Rewrite YarnClient based on config. The LOCAL runner is used in both SDK and distributed mode // The actual mode that Spark is running is determined by the cdap.spark.cluster.mode attribute // in the hConf boolean rewriteYarnClient = injector.getInstance(CConfiguration.class) .getBoolean(Constants.AppFabric.SPARK_YARN_CLIENT_REWRITE); return createSparkProgramRunner(injector, SparkProgramRunner.class.getName(), rewriteYarnClient, true); case DISTRIBUTED: // The distributed program runner is only used by the CDAP master to launch the twill container // hence it doesn't need to do any class rewrite. return createSparkProgramRunner(injector, DistributedSparkProgramRunner.class.getName(), false, false); default: throw new IllegalArgumentException("Unsupported Spark execution mode " + mode); } } /** * Creates a {@link ProgramRunner} that execute Spark program from the given {@link Injector}. */ private ProgramRunner createSparkProgramRunner(Injector injector, String programRunnerClassName, boolean rewriteYarnClient, boolean rewriteDStreamGraph) { try { SparkRunnerClassLoader classLoader = createClassLoader(rewriteYarnClient, rewriteDStreamGraph); try { ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(classLoader); try { // Closing of the SparkRunnerClassLoader is done by the SparkProgramRunner when the program execution finished // The current CDAP call run right after it get a ProgramRunner and never reuse a ProgramRunner. // TODO: CDAP-5506 to refactor the program runtime architecture to remove the need of this assumption return createInstance(injector, classLoader.loadClass(programRunnerClassName), classLoader); } finally { ClassLoaders.setContextClassLoader(oldClassLoader); } } catch (Throwable t) { // If there is any exception, close the SparkRunnerClassLoader Closeables.closeQuietly(classLoader); throw t; } } catch (Throwable t) { throw Throwables.propagate(t); } } /** * Create a new instance of the given {@link Type} from the given {@link Injector}. This method * is doing Guice injection manually through the @Inject constructor to avoid ClassLoader leakage * due to the just-in-time binding map inside the Guice Injector that holds a strong reference to the type, * hence the ClassLoader of that type * * @param injector The Guice Injector for acquiring CDAP system instances * @param type the {@link Class} of the instance to create * @return a new instance of the given {@link Type} */ private <T> T createInstance(Injector injector, Type type, ClassLoader sparkClassLoader) throws Exception { Key<?> typeKey = Key.get(type); @SuppressWarnings("unchecked") Class<T> rawType = (Class<T>) typeKey.getTypeLiteral().getRawType(); Constructor<T> constructor = findInjectableConstructor(rawType); constructor.setAccessible(true); // Acquire the instances for each parameter for the constructor Type[] paramTypes = constructor.getGenericParameterTypes(); Object[] args = new Object[paramTypes.length]; int i = 0; for (Type paramType : paramTypes) { Key<?> paramTypeKey = Key.get(paramType); // If the classloader of the parameter is the same as the Spark ClassLoader, we need to create the // instance manually instead of getting through the Guice Injector to avoid ClassLoader leakage if (paramTypeKey.getTypeLiteral().getRawType().getClassLoader() == sparkClassLoader) { args[i++] = createInstance(injector, paramType, sparkClassLoader); } else { args[i++] = injector.getInstance(paramTypeKey); } } return constructor.newInstance(args); } /** * Finds the constructor of the given type that is suitable for Guice injection. If the given type has * a constructor annotated with {@link Inject}, then it will be returned. Otherwise, the default constructor * will be returned. * * @throws ProvisionException if failed to locate a constructor for the injection */ @SuppressWarnings("unchecked") private <T> Constructor<T> findInjectableConstructor(Class<T> type) throws ProvisionException { for (Constructor<?> constructor : type.getDeclaredConstructors()) { // Find the @Inject constructor if (constructor.isAnnotationPresent(Inject.class)) { return (Constructor<T>) constructor; } } // If no @Inject constructor, use the default constructor try { return type.getDeclaredConstructor(); } catch (NoSuchMethodException e) { throw new ProvisionException("No constructor is annotated with @Inject and there is no default constructor", e); } } /** * Returns an array of {@link URL} being used by the {@link ClassLoader} of this {@link Class}. */ private synchronized SparkRunnerClassLoader createClassLoader(boolean rewriteYarnClient, boolean rewriteDStreamGraph) throws IOException { SparkRunnerClassLoader classLoader; if (classLoaderUrls == null) { classLoader = new SparkRunnerClassLoader(getClass().getClassLoader(), rewriteYarnClient, rewriteDStreamGraph); classLoaderUrls = classLoader.getURLs(); } else { classLoader = new SparkRunnerClassLoader(classLoaderUrls, getClass().getClassLoader(), rewriteYarnClient, rewriteDStreamGraph); } return classLoader; } }