/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.distributed; import co.cask.cdap.api.Resources; import co.cask.cdap.api.app.ApplicationSpecification; import co.cask.cdap.api.mapreduce.MapReduceSpecification; import co.cask.cdap.api.schedule.SchedulableProgramType; import co.cask.cdap.api.spark.SparkSpecification; import co.cask.cdap.api.workflow.ScheduleProgramInfo; import co.cask.cdap.api.workflow.Workflow; import co.cask.cdap.api.workflow.WorkflowActionNode; import co.cask.cdap.api.workflow.WorkflowNode; import co.cask.cdap.api.workflow.WorkflowNodeType; import co.cask.cdap.api.workflow.WorkflowSpecification; import co.cask.cdap.app.program.Program; import co.cask.cdap.app.runtime.ProgramController; import co.cask.cdap.app.runtime.ProgramOptions; import co.cask.cdap.app.runtime.ProgramRunner; import co.cask.cdap.app.runtime.ProgramRuntimeProvider; import co.cask.cdap.common.app.RunIds; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.internal.app.runtime.ProgramOptionConstants; import co.cask.cdap.internal.app.runtime.ProgramRuntimeProviderLoader; import co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper; import co.cask.cdap.internal.app.runtime.spark.SparkUtils; import co.cask.cdap.proto.ProgramType; import co.cask.cdap.security.TokenSecureStoreUpdater; import com.google.common.base.Preconditions; import com.google.inject.Inject; import org.apache.hadoop.mapred.YarnClientProtocolProvider; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.twill.api.RunId; import org.apache.twill.api.TwillController; import org.apache.twill.api.TwillRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * A {@link ProgramRunner} to start a {@link Workflow} program in distributed mode. */ public final class DistributedWorkflowProgramRunner extends AbstractDistributedProgramRunner { private static final Logger LOG = LoggerFactory.getLogger(DistributedWorkflowProgramRunner.class); private static final String HCONF_ATTR_CLUSTER_MODE = "cdap.spark.cluster.mode"; private final ProgramRuntimeProviderLoader runtimeProviderLoader; @Inject public DistributedWorkflowProgramRunner(TwillRunner twillRunner, YarnConfiguration hConf, CConfiguration cConf, TokenSecureStoreUpdater tokenSecureStoreUpdater, ProgramRuntimeProviderLoader runtimeProviderLoader) { super(twillRunner, createConfiguration(hConf), cConf, tokenSecureStoreUpdater); this.runtimeProviderLoader = runtimeProviderLoader; } @Override protected ProgramController launch(Program program, ProgramOptions options, Map<String, LocalizeResource> localizeResources, File tempDir, ApplicationLauncher launcher) { // Extract and verify parameters ApplicationSpecification appSpec = program.getApplicationSpecification(); Preconditions.checkNotNull(appSpec, "Missing application specification."); ProgramType processorType = program.getType(); Preconditions.checkNotNull(processorType, "Missing processor type."); Preconditions.checkArgument(processorType == ProgramType.WORKFLOW, "Only WORKFLOW process type is supported."); WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(program.getName()); Preconditions.checkNotNull(workflowSpec, "Missing WorkflowSpecification for %s", program.getName()); // It the workflow has Spark, localize the spark-assembly jar List<String> extraClassPaths = new ArrayList<>(); List<Class<?>> extraDependencies = new ArrayList<>(); // Adds the extra classes that MapReduce needs extraDependencies.add(YarnClientProtocolProvider.class); // See if the Workflow has Spark or MapReduce in it DriverMeta driverMeta = findDriverResources(program.getApplicationSpecification().getSpark(), program.getApplicationSpecification().getMapReduce(), workflowSpec); if (driverMeta.hasSpark) { // Adds the extra class that Spark runtime needed ProgramRuntimeProvider provider = runtimeProviderLoader.get(ProgramType.SPARK); Preconditions.checkState(provider != null, "Missing Spark runtime system. Not able to run Spark program."); extraDependencies.add(provider.getClass()); // Localize the spark-assembly jar and spark conf zip String sparkAssemblyJarName = SparkUtils.prepareSparkResources(tempDir, localizeResources); extraClassPaths.add(sparkAssemblyJarName); } // Add classpaths for MR framework extraClassPaths.addAll(MapReduceContainerHelper.localizeFramework(hConf, localizeResources)); LOG.info("Launching distributed workflow: " + program.getName() + ":" + workflowSpec.getName()); TwillController controller = launcher.launch( new WorkflowTwillApplication(program, workflowSpec, localizeResources, eventHandler, driverMeta.resources), extraClassPaths, extraDependencies ); RunId runId = RunIds.fromString(options.getArguments().getOption(ProgramOptionConstants.RUN_ID)); return new WorkflowTwillProgramController(program.getId(), controller, runId).startListen(); } private static YarnConfiguration createConfiguration(YarnConfiguration hConf) { YarnConfiguration configuration = new YarnConfiguration(hConf); configuration.setBoolean(HCONF_ATTR_CLUSTER_MODE, true); return configuration; } /** * Returns the {@link DriverMeta} which includes the resource requirement for the workflow runnable due to spark * or MapReduce driver resources requirement. {@link DriverMeta} also contain the information about * whether the workflow contains spark. */ private DriverMeta findDriverResources(Map<String, SparkSpecification> sparkSpecs, Map<String, MapReduceSpecification> mrSpecs, WorkflowSpecification spec) { // Find the resource requirements from the workflow with 768MB as minimum. // It is the largest memory and cores from all Spark and MapReduce programs inside the workflow Resources resources = new Resources(768); boolean hasSpark = false; for (WorkflowNode node : spec.getNodeIdMap().values()) { if (WorkflowNodeType.ACTION == node.getType()) { ScheduleProgramInfo programInfo = ((WorkflowActionNode) node).getProgram(); SchedulableProgramType programType = programInfo.getProgramType(); if (programType == SchedulableProgramType.SPARK || programType == SchedulableProgramType.MAPREDUCE) { // The program spec shouldn't be null, otherwise the Workflow is not valid Resources driverResources; if (programType == SchedulableProgramType.SPARK) { hasSpark = true; driverResources = sparkSpecs.get(programInfo.getProgramName()).getDriverResources(); } else { driverResources = mrSpecs.get(programInfo.getProgramName()).getDriverResources(); } if (driverResources != null) { resources = max(resources, driverResources); } } } } return new DriverMeta(resources, hasSpark); } /** * Returns a {@link Resources} that has the maximum of memory and virtual cores among two Resources. */ private Resources max(Resources r1, Resources r2) { int memory1 = r1.getMemoryMB(); int memory2 = r2.getMemoryMB(); int vcores1 = r1.getVirtualCores(); int vcores2 = r2.getVirtualCores(); if (memory1 > memory2 && vcores1 > vcores2) { return r1; } if (memory1 < memory2 && vcores1 < vcores2) { return r2; } return new Resources(Math.max(memory1, memory2), Math.max(vcores1, vcores2)); } /** * Class representing the meta information for the driver. */ private static class DriverMeta { private final Resources resources; private final boolean hasSpark; DriverMeta(Resources resources, boolean hasSpark) { this.resources = resources; this.hasSpark = hasSpark; } } }