/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.app.runtime.spark.submit; import co.cask.cdap.api.spark.SparkSpecification; import co.cask.cdap.app.runtime.spark.SparkClassLoader; import co.cask.cdap.app.runtime.spark.SparkExecutionContextFactory; import co.cask.cdap.app.runtime.spark.SparkMainWrapper; import co.cask.cdap.app.runtime.spark.SparkRuntimeContext; import co.cask.cdap.app.runtime.spark.SparkRuntimeUtils; import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.AbstractFuture; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.spark.deploy.SparkSubmit; import org.apache.twill.common.Cancellable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicBoolean; /** * Provides common implementation for different {@link SparkSubmitter}. */ public abstract class AbstractSparkSubmitter implements SparkSubmitter { private static final Logger LOG = LoggerFactory.getLogger(AbstractSparkSubmitter.class); // Filter for getting archive resources only private static final Predicate<LocalizeResource> ARCHIVE_FILTER = new Predicate<LocalizeResource>() { @Override public boolean apply(LocalizeResource input) { return input.isArchive(); } }; // Transforms LocalizeResource to URI string private static final Function<LocalizeResource, String> RESOURCE_TO_PATH = new Function<LocalizeResource, String>() { @Override public String apply(LocalizeResource input) { return input.getURI().toString(); } }; @Override public final <V> ListenableFuture<V> submit(final SparkRuntimeContext runtimeContext, final SparkExecutionContextFactory contextFactory, Map<String, String> configs, List<LocalizeResource> resources, File jobJar, final V result) { final SparkSpecification spec = runtimeContext.getSparkSpecification(); final List<String> args = createSubmitArguments(spec, configs, resources, jobJar); // Spark submit is called from this executor // Use an executor to simplify logic that is needed to interrupt the running thread on stopping final ExecutorService executor = Executors.newSingleThreadExecutor(new ThreadFactory() { @Override public Thread newThread(Runnable r) { return new Thread(r, "spark-submitter-" + spec.getName() + "-" + runtimeContext.getRunId()); } }); // Latch for the Spark job completion final CountDownLatch completion = new CountDownLatch(1); final SparkJobFuture<V> resultFuture = new SparkJobFuture<V>(runtimeContext) { @Override protected void cancelTask() { // Try to shutdown the running spark job. triggerShutdown(); // Wait for the Spark-Submit returns Uninterruptibles.awaitUninterruptibly(completion); } }; // Submit the Spark job executor.submit(new Runnable() { @Override public void run() { List<String> extraArgs = beforeSubmit(); try { String[] submitArgs = Iterables.toArray(Iterables.concat(args, extraArgs), String.class); submit(runtimeContext, contextFactory, submitArgs); onCompleted(true); resultFuture.set(result); } catch (Throwable t) { onCompleted(false); resultFuture.setException(t); } finally { completion.countDown(); } } }); // Shutdown the executor right after submit since the thread is only used for one submission. executor.shutdown(); return resultFuture; } /** * Returns the value for the {@code --master} argument for the Spark submission. */ protected abstract String getMaster(Map<String, String> configs); /** * Invoked for stopping the Spark job explicitly. */ protected abstract void triggerShutdown(); protected List<String> beforeSubmit() { return Collections.emptyList(); } protected void onCompleted(boolean succeeded) { // no-op } /** * Returns configs that are specific to the submission context. */ protected Map<String, String> getSubmitConf() { return Collections.emptyMap(); } /** * Submits the Spark job using {@link SparkSubmit}. * * @param runtimeContext context representing the Spark program * @param args arguments for the {@link SparkSubmit#main(String[])} method. */ private void submit(SparkRuntimeContext runtimeContext, SparkExecutionContextFactory contextFactory, String[] args) { Cancellable cancellable = SparkRuntimeUtils.setContextClassLoader(new SparkClassLoader(runtimeContext, contextFactory)); try { LOG.debug("Calling SparkSubmit for {} {}: {}", runtimeContext.getProgram().getId(), runtimeContext.getRunId(), Arrays.toString(args)); // Explicitly set the SPARK_SUBMIT property as it is no longer set on the System properties by the SparkSubmit // after the class rewrite. This property only control logging of a warning when submitting the Spark job, // hence it's harmless to just leave it there. System.setProperty("SPARK_SUBMIT", "true"); SparkSubmit.main(args); LOG.debug("SparkSubmit returned for {} {}", runtimeContext.getProgram().getId(), runtimeContext.getRunId()); } finally { cancellable.cancel(); } } /** * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}. * * @param spec the {@link SparkSpecification} of the program * @param configs set of Spark configurations * @param resources list of resources that needs to be localized to Spark containers * @param jobJar the job jar file for Spark * @return a list of arguments */ private List<String> createSubmitArguments(SparkSpecification spec, Map<String, String> configs, List<LocalizeResource> resources, File jobJar) { ImmutableList.Builder<String> builder = ImmutableList.builder(); builder.add("--master").add(getMaster(configs)); builder.add("--class").add(SparkMainWrapper.class.getName()); builder.add("--conf").add("spark.app.name=" + spec.getName()); for (Map.Entry<String, String> entry : configs.entrySet()) { builder.add("--conf").add(entry.getKey() + "=" + entry.getValue()); } for (Map.Entry<String, String> entry : getSubmitConf().entrySet()) { builder.add("--conf").add(entry.getKey() + "=" + entry.getValue()); } String archives = Joiner.on(',') .join(Iterables.transform(Iterables.filter(resources, ARCHIVE_FILTER), RESOURCE_TO_PATH)); String files = Joiner.on(',') .join(Iterables.transform(Iterables.filter(resources, Predicates.not(ARCHIVE_FILTER)), RESOURCE_TO_PATH)); if (!archives.isEmpty()) { builder.add("--archives").add(archives); } if (!files.isEmpty()) { builder.add("--files").add(files); } return builder .add(jobJar.getAbsolutePath()) .add("--" + SparkMainWrapper.ARG_USER_CLASS() + "=" + spec.getMainClassName()) .build(); } /** * A {@link Future} implementation for representing a Spark job execution, which allows cancelling the job through * the {@link #cancel(boolean)} method. When the job execution is completed, the {@link #set(Object)} should be * called for successful execution, or call the {@link #setException(Throwable)} for failure. To terminate the * job execution while it is running, call the {@link #cancel(boolean)} method. Sub-classes should override the * {@link #cancelTask()} method for cancelling the execution and the state of this {@link Future} will change * to cancelled after the {@link #cancelTask()} call returns. * * @param <V> type of object returned by the {@link #get()} method. */ private abstract static class SparkJobFuture<V> extends AbstractFuture<V> { private static final Logger LOG = LoggerFactory.getLogger(SparkJobFuture.class); private final AtomicBoolean done; private final SparkRuntimeContext context; protected SparkJobFuture(SparkRuntimeContext context) { this.done = new AtomicBoolean(); this.context = context; } @Override protected boolean set(V value) { if (done.compareAndSet(false, true)) { return super.set(value); } return false; } @Override protected boolean setException(Throwable throwable) { if (done.compareAndSet(false, true)) { return super.setException(throwable); } return false; } @Override public boolean cancel(boolean mayInterruptIfRunning) { if (!done.compareAndSet(false, true)) { return false; } try { cancelTask(); return super.cancel(mayInterruptIfRunning); } catch (Throwable t) { // Only log and reset state, but not propagate since Future.cancel() doesn't expect exception to be thrown. LOG.warn("Failed to cancel Spark execution for {}.", context, t); done.set(false); return false; } } @Override protected final void interruptTask() { // Final it so that it cannot be overridden. This method gets call after the Future state changed // to cancel, hence cannot have the caller block until cancellation is done. } /** * Will be called to cancel an executing task. Sub-class can override this method to provide * custom cancellation logic. This method will be called before the future changed to cancelled state. */ protected void cancelTask() { // no-op } } }