/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.app.runtime.spark;
import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.api.spark.Spark;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.app.runtime.spark.submit.DistributedSparkSubmitter;
import co.cask.cdap.app.runtime.spark.submit.LocalSparkSubmitter;
import co.cask.cdap.app.runtime.spark.submit.SparkSubmitter;
import co.cask.cdap.app.store.Store;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.lang.InstantiatorFactory;
import co.cask.cdap.common.lang.ProgramClassLoader;
import co.cask.cdap.common.lang.ProgramClassLoaderProvider;
import co.cask.cdap.common.lang.PropertyFieldSetter;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.metadata.writer.ProgramContextAware;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.internal.app.runtime.AbstractProgramRunnerWithPlugin;
import co.cask.cdap.internal.app.runtime.DataSetFieldSetter;
import co.cask.cdap.internal.app.runtime.MetricsFieldSetter;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.ProgramRunners;
import co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator;
import co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework;
import co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo;
import co.cask.cdap.internal.lang.Reflections;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.ProgramRunStatus;
import co.cask.cdap.proto.ProgramType;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.io.Closeables;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.Service;
import com.google.common.util.concurrent.Uninterruptibles;
import com.google.inject.Inject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.twill.api.RunId;
import org.apache.twill.common.Threads;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.internal.ServiceListenerAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Deque;
import java.util.LinkedList;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
/**
* The {@link ProgramRunner} that executes Spark program.
*/
final class SparkProgramRunner extends AbstractProgramRunnerWithPlugin
implements ProgramClassLoaderProvider, Closeable {
private static final Logger LOG = LoggerFactory.getLogger(SparkProgramRunner.class);
private final CConfiguration cConf;
private final Configuration hConf;
private final TransactionSystemClient txClient;
private final DatasetFramework datasetFramework;
private final MetricsCollectionService metricsCollectionService;
private final DiscoveryServiceClient discoveryServiceClient;
private final StreamAdmin streamAdmin;
private final Store store;
@Inject
SparkProgramRunner(CConfiguration cConf, Configuration hConf, TransactionSystemClient txClient,
DatasetFramework datasetFramework, MetricsCollectionService metricsCollectionService,
DiscoveryServiceClient discoveryServiceClient, StreamAdmin streamAdmin, Store store) {
super(cConf);
this.cConf = cConf;
this.hConf = hConf;
this.txClient = txClient;
this.datasetFramework = datasetFramework;
this.metricsCollectionService = metricsCollectionService;
this.discoveryServiceClient = discoveryServiceClient;
this.streamAdmin = streamAdmin;
this.store = store;
}
@Override
public ProgramController run(Program program, ProgramOptions options) {
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = RunIds.fromString(arguments.getOption(ProgramOptionConstants.RUN_ID));
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ?
datasetFramework :
NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
Id.Program programId = program.getId();
((ProgramContextAware) programDatasetFramework).initContext(new Id.Run(programId, runId.getId()));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, runId,
options.getUserArguments().asMap(),
txClient, programDatasetFramework,
discoveryServiceClient,
metricsCollectionService, streamAdmin, workflowInfo,
pluginInstantiator);
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
// Fields injection
Reflections.visit(spark, spark.getClass(),
new PropertyFieldSetter(spec.getProperties()),
new DataSetFieldSetter(runtimeContext.getDatasetCache()),
new MetricsFieldSetter(runtimeContext));
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
SparkSubmitter submitter = SparkRuntimeContextConfig.isLocal(hConf)
? new LocalSparkSubmitter()
: new DistributedSparkSubmitter(hConf, host, runtimeContext,
options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options),
runtimeContext, submitter, host);
sparkRuntimeService.addListener(
createRuntimeServiceListener(program.getId(), runId, arguments, options.getUserArguments(), closeables, store),
Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.info("Starting Spark Job: {}", runtimeContext);
if (SparkRuntimeContextConfig.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAll(closeables);
throw Throwables.propagate(t);
}
}
@Override
public ProgramClassLoader createProgramClassLoader(CConfiguration cConf, File dir) {
return SparkRuntimeUtils.createProgramClassLoader(cConf, dir, getClass().getClassLoader());
}
/**
* Closes the ClassLoader of this {@link SparkProgramRunner}. The
* ClassLoader needs to be closed because there is one such ClassLoader created per program execution by
* the {@link SparkProgramRuntimeProvider} to support concurrent Spark program execution in the same JVM.
*/
@Override
public void close() throws IOException {
final ClassLoader classLoader = getClass().getClassLoader();
Thread t = new Thread("spark-program-runner-delay-close") {
@Override
public void run() {
// Delay the closing of the ClassLoader because Spark, which uses akka, has an async cleanup process
// for shutting down threads. During shutdown, there are new classes being loaded.
Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS);
if (classLoader instanceof Closeable) {
Closeables.closeQuietly((Closeable) classLoader);
LOG.debug("Closed ClassLoader for SparkProgramRunner");
}
}
};
t.setDaemon(true);
t.start();
}
private void closeAll(Iterable<Closeable> closeables) {
for (Closeable closeable : closeables) {
Closeables.closeQuietly(closeable);
}
}
@Nullable
private File getPluginArchive(ProgramOptions options) {
if (!options.getArguments().hasOption(ProgramOptionConstants.PLUGIN_ARCHIVE)) {
return null;
}
return new File(options.getArguments().getOption(ProgramOptionConstants.PLUGIN_ARCHIVE));
}
/**
* Creates a service listener to reactor on state changes on {@link SparkRuntimeService}.
*/
private Service.Listener createRuntimeServiceListener(final Id.Program programId, final RunId runId,
final Arguments arguments, final Arguments userArgs,
final Iterable<Closeable> closeables, final Store store) {
final String twillRunId = arguments.getOption(ProgramOptionConstants.TWILL_RUN_ID);
return new ServiceListenerAdapter() {
@Override
public void starting() {
//Get start time from RunId
long startTimeInSeconds = RunIds.getTime(runId, TimeUnit.SECONDS);
if (startTimeInSeconds == -1) {
// If RunId is not time-based, use current time as start time
startTimeInSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
}
store.setStart(programId, runId.getId(), startTimeInSeconds, twillRunId, userArgs.asMap(), arguments.asMap());
}
@Override
public void terminated(Service.State from) {
closeAll(closeables);
ProgramRunStatus runStatus = ProgramController.State.COMPLETED.getRunStatus();
if (from == Service.State.STOPPING) {
// Service was killed
runStatus = ProgramController.State.KILLED.getRunStatus();
}
store.setStop(programId, runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
runStatus);
}
@Override
public void failed(Service.State from, @Nullable Throwable failure) {
closeAll(closeables);
store.setStop(programId, runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
ProgramController.State.ERROR.getRunStatus(), failure);
}
};
}
}