/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.app.runtime.spark.distributed;
import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.spark.Spark;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.app.runtime.spark.SparkRuntimeContextConfig;
import co.cask.cdap.app.runtime.spark.SparkRuntimeUtils;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.lang.ProgramClassLoader;
import co.cask.cdap.common.lang.ProgramClassLoaderProvider;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.distributed.AbstractDistributedProgramRunner;
import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource;
import co.cask.cdap.internal.app.runtime.spark.SparkUtils;
import co.cask.cdap.proto.ProgramType;
import co.cask.cdap.security.TokenSecureStoreUpdater;
import com.google.common.base.Preconditions;
import com.google.inject.Inject;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.twill.api.RunId;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Map;
/**
* A {@link ProgramRunner} for launching {@link Spark} program in distributed mode. It starts
* a YARN application to act as the Spark client. A second YARN application will be launched
* by Spark framework as the actual Spark program execution.
*/
public final class DistributedSparkProgramRunner extends AbstractDistributedProgramRunner
implements ProgramClassLoaderProvider {
private static final Logger LOG = LoggerFactory.getLogger(DistributedSparkProgramRunner.class);
@Inject
DistributedSparkProgramRunner(TwillRunner twillRunner, YarnConfiguration hConf, CConfiguration cConf,
TokenSecureStoreUpdater tokenSecureStoreUpdater) {
super(twillRunner, createConfiguration(hConf), cConf, tokenSecureStoreUpdater);
}
@Override
protected ProgramController launch(Program program, ProgramOptions options,
Map<String, LocalizeResource> localizeResources,
File tempDir, AbstractDistributedProgramRunner.ApplicationLauncher launcher) {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification for %s", program.getId());
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type for %s", program.getId());
Preconditions.checkArgument(processorType == ProgramType.SPARK,
"Only SPARK process type is supported. Program type is %s for %s",
processorType, program.getId());
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getId());
// Localize the spark-assembly jar and spark conf zip
String sparkAssemblyJarName = SparkUtils.prepareSparkResources(tempDir, localizeResources);
LOG.info("Launching Spark program: {}", program.getId());
TwillController controller = launcher.launch(
new SparkTwillApplication(program, spec, localizeResources, eventHandler), sparkAssemblyJarName);
RunId runId = RunIds.fromString(options.getArguments().getOption(ProgramOptionConstants.RUN_ID));
return new SparkTwillProgramController(program.getId().toEntityId(), controller, runId).startListen();
}
private static YarnConfiguration createConfiguration(YarnConfiguration hConf) {
YarnConfiguration configuration = new YarnConfiguration(hConf);
configuration.setBoolean(SparkRuntimeContextConfig.HCONF_ATTR_CLUSTER_MODE, true);
return configuration;
}
@Override
public ProgramClassLoader createProgramClassLoader(CConfiguration cConf, File dir) {
return SparkRuntimeUtils.createProgramClassLoader(cConf, dir, getClass().getClassLoader());
}
}