/* * Copyright 2015 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.hadoop.batch.spark; import org.apache.hadoop.conf.Configuration; import org.apache.spark.SparkConf; import org.apache.spark.deploy.yarn.Client; import org.apache.spark.deploy.yarn.ClientArguments; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.StepContribution; import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.StepExecutionListener; import org.springframework.batch.core.scope.context.ChunkContext; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.InitializingBean; import org.springframework.util.Assert; import org.springframework.util.StringUtils; import java.util.ArrayList; import java.util.List; /** * Spark tasklet running Spark jobs on demand on YARN cluster. * * @author Thomas Risberg */ public class SparkYarnTasklet implements InitializingBean, Tasklet, StepExecutionListener { private boolean complete = false; private String sparkAssemblyJar; private Configuration hadoopConfiguration; private String appName; private String appClass; private String appJar; private String resourceFiles; private String resourceArchives; private String executorMemory; private int numExecutors; private String[] arguments; public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.yarn.jar", sparkAssemblyJar); List<String> submitArgs = new ArrayList<String>(); if (StringUtils.hasText(appName)) { submitArgs.add("--name"); submitArgs.add(appName); } submitArgs.add("--jar"); submitArgs.add(appJar); submitArgs.add("--class"); submitArgs.add(appClass); if (StringUtils.hasText(resourceFiles)) { submitArgs.add("--files"); submitArgs.add(resourceFiles); } if (StringUtils.hasText(resourceArchives)) { submitArgs.add("--archives"); submitArgs.add(resourceArchives); } submitArgs.add("--executor-memory"); submitArgs.add(executorMemory); submitArgs.add("--num-executors"); submitArgs.add("" + numExecutors); for (String arg : arguments) { submitArgs.add("--arg"); submitArgs.add(arg); } ClientArguments clientArguments = new ClientArguments(submitArgs.toArray(new String[submitArgs.size()]), sparkConf); Client client = new Client(clientArguments, hadoopConfiguration, sparkConf); System.setProperty("SPARK_YARN_MODE", "true"); client.run(); complete = true; return RepeatStatus.FINISHED; } @Override public void beforeStep(StepExecution stepExecution) { } @Override public ExitStatus afterStep(StepExecution stepExecution) { if (complete) { return ExitStatus.COMPLETED; } else { return ExitStatus.FAILED; } } @Override public void afterPropertiesSet() throws Exception { Assert.hasText(sparkAssemblyJar, "sparkAssemblyJar property was not set. " + "You must specify the path for the spark-assembly jar file. " + "It can either be a local file or stored in HDFS using an 'hdfs://' prefix."); Assert.notNull(hadoopConfiguration, "hadoopConfiguration property was not set. " + "You must provide a reference to the Hadoop configuration to be used."); Assert.hasText(appClass, "appClass property was not set. " + "You must specify the main class of the application to execute."); Assert.hasText(appJar, "appJar property was not set." + "You must specify the path to the jar that contains the app to execute."); if (!StringUtils.hasText(executorMemory)) { executorMemory = "1G"; } if (numExecutors == 0) { numExecutors = 1; } } public void setSparkAssemblyJar(String sparkAssemblyJar) { this.sparkAssemblyJar = sparkAssemblyJar; } public void setHadoopConfiguration(Configuration configuration) { this.hadoopConfiguration = configuration; } public void setAppName(String appName) { this.appName = appName; } public void setAppClass(String appClass) { this.appClass = appClass; } public void setAppJar(String appJar) { this.appJar = appJar; } public void setResourceFiles(String resourceFiles) { this.resourceFiles = resourceFiles; } public void setResourceArchives(String resourceArchives) { this.resourceArchives = resourceArchives; } public void setExecutorMemory(String executorMemory) { this.executorMemory = executorMemory; } public void setNumExecutors(int numExecutors) { this.numExecutors = numExecutors; } public void setArguments(String[] arguments) { this.arguments = arguments; } }