/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.app.runtime.spark.submit;
import co.cask.cdap.app.runtime.spark.SparkMainWrapper;
import co.cask.cdap.app.runtime.spark.SparkRuntimeContext;
import co.cask.cdap.app.runtime.spark.SparkRuntimeEnv;
import co.cask.cdap.app.runtime.spark.distributed.SparkExecutionService;
import co.cask.cdap.internal.app.runtime.workflow.BasicWorkflowToken;
import co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo;
import co.cask.cdap.proto.id.ProgramRunId;
import org.apache.hadoop.conf.Configuration;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
/**
* A {@link SparkSubmitter} to submit Spark job that runs on cluster.
*/
public class DistributedSparkSubmitter extends AbstractSparkSubmitter {
private final Configuration hConf;
private final String schedulerQueueName;
private final SparkExecutionService sparkExecutionService;
public DistributedSparkSubmitter(Configuration hConf, String hostname, SparkRuntimeContext runtimeContext,
@Nullable String schedulerQueueName) {
this.hConf = hConf;
this.schedulerQueueName = schedulerQueueName;
ProgramRunId programRunId = runtimeContext.getProgram().getId().toEntityId().run(runtimeContext.getRunId().getId());
WorkflowProgramInfo workflowInfo = runtimeContext.getWorkflowInfo();
BasicWorkflowToken workflowToken = workflowInfo == null ? null : workflowInfo.getWorkflowToken();
this.sparkExecutionService = new SparkExecutionService(hostname, programRunId, workflowToken);
}
@Override
protected Map<String, String> getSubmitConf() {
if (schedulerQueueName != null && !schedulerQueueName.isEmpty()) {
return Collections.singletonMap("spark.yarn.queue", schedulerQueueName);
}
return Collections.emptyMap();
}
@Override
protected String getMaster(Map<String, String> configs) {
return "yarn-cluster";
}
@Override
protected List<String> beforeSubmit() {
// Add all Hadoop configurations to the SparkRuntimeEnv, prefix with "spark.hadoop.". This is
// how Spark YARN client get hold of Hadoop configurations if those configurations are not in classpath,
// which is true in CM cluster due to private hadoop conf directory (SPARK-13441) and YARN-4727
for (Map.Entry<String, String> entry : hConf) {
SparkRuntimeEnv.setProperty("spark.hadoop." + entry.getKey(), hConf.get(entry.getKey()));
}
sparkExecutionService.startAndWait();
return Collections.singletonList("--" + SparkMainWrapper.ARG_EXECUTION_SERVICE_URI()
+ "=" + sparkExecutionService.getBaseURI());
}
@Override
protected void triggerShutdown() {
// Just stop the execution service and block on that.
// It will wait till the "completed" call from the Spark driver.
sparkExecutionService.stopAndWait();
}
@Override
protected void onCompleted(boolean succeeded) {
if (succeeded) {
sparkExecutionService.stopAndWait();
} else {
sparkExecutionService.shutdownNow();
}
}
}