/* * Copyright 2013-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.xd.spark.tasklet; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLClassLoader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.StepContribution; import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.StepExecutionListener; import org.springframework.batch.core.scope.context.ChunkContext; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.context.EnvironmentAware; import org.springframework.core.env.ConfigurableEnvironment; import org.springframework.core.env.Environment; import org.springframework.core.io.Resource; import org.springframework.core.io.support.PathMatchingResourcePatternResolver; import org.springframework.core.io.support.ResourcePatternResolver; import org.springframework.util.Assert; import org.springframework.util.StringUtils; /** * {@link Tasklet} for running Spark application. * * @author Thomas Risberg * @author Ilayaperumal Gopinathan */ public class SparkTasklet implements Tasklet, EnvironmentAware, StepExecutionListener { private final Logger logger = LoggerFactory.getLogger(this.getClass()); private static final String MODULE_HOME = "xd.module.home"; private static final String LIB_PATTERN = "/job/sparkapp/lib/*.jar"; private static final String SPARK_SUBMIT_CLASS = "org.apache.spark.deploy.SparkSubmit"; /** * Exit code of Spark app */ private int exitCode = -1; /** * Spark application name */ private String name; /** * Spark master URL */ private String master; /** * Spark application's main class */ private String mainClass; /** * Path to a bundled jar that includes your application and its * dependencies excluding spark */ private String appJar; /** * Comma separated list of config key-value pairs to Spark application */ private String conf; /** * Comma separated list of files to be placed in the * working directory of each executor */ private String files; /** * Program arguments for the application main class. */ private String programArgs; private ConfigurableEnvironment environment; private ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(); public String getMaster() { return master; } public void setMaster(String master) { this.master = master; } public String getMainClass() { return mainClass; } public void setMainClass(String mainClass) { this.mainClass = mainClass; } public String getAppJar() { return appJar; } public void setAppJar(String appJar) { this.appJar = appJar; } public String getProgramArgs() { return programArgs; } public void setProgramArgs(String programArgs) { this.programArgs = programArgs; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getConf() { return conf; } public void setConf(String conf) { this.conf = conf; } public String getFiles() { return files; } public void setFiles(String files) { this.files = files; } @Override public void setEnvironment(Environment environment) { this.environment = (ConfigurableEnvironment) environment; } @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { StepExecution stepExecution = chunkContext.getStepContext().getStepExecution(); ExitStatus exitStatus = stepExecution.getExitStatus(); String moduleHome = environment.getProperty(MODULE_HOME); Assert.notNull(moduleHome, "Module home must not be null."); Resource[] resources = resolver.getResources(moduleHome + LIB_PATTERN); ArrayList<String> dependencies = new ArrayList<String>(); for (int i = 0; i < resources.length; i++) { dependencies.add(resources[i].getURL().getFile()); } ArrayList<String> args = new ArrayList<String>(); if (StringUtils.hasText(name)) { args.add("--name"); args.add(name); } args.add("--class"); args.add(mainClass); args.add("--master"); args.add(master); args.add("--deploy-mode"); args.add("client"); if (StringUtils.hasText(conf)) { Collection<String> configs = StringUtils.commaDelimitedListToSet(conf); for (String config : configs) { args.add("--conf"); args.add(config.trim()); } } if (StringUtils.hasText(files)) { args.add("--files"); args.add(files); } args.add("--jars"); args.add(StringUtils.collectionToCommaDelimitedString(dependencies)); if (StringUtils.hasText(appJar)) { args.add(appJar); } if (StringUtils.hasText(programArgs)) { args.addAll(StringUtils.commaDelimitedListToSet(programArgs)); } List<String> sparkCommand = new ArrayList<String>(); sparkCommand.add("java"); sparkCommand.add(SPARK_SUBMIT_CLASS); sparkCommand.addAll(args); URLClassLoader serverClassLoader; URLClassLoader taskletClassLoader; try { serverClassLoader = (URLClassLoader) Class.forName("org.springframework.xd.dirt.core.Job").getClassLoader(); taskletClassLoader = (URLClassLoader) this.getClass().getClassLoader(); } catch (Exception e) { throw new IllegalStateException("Unable to determine classpath from ClassLoader.", e); } List<String> classPath = new ArrayList<String>(); for (URL url : serverClassLoader.getURLs()) { String file = url.getFile().split("\\!/", 2)[0]; if (file.endsWith(".jar")) { classPath.add(file); } } for (URL url : taskletClassLoader.getURLs()) { String file = url.getFile().split("\\!/", 2)[0]; if (file.endsWith(".jar") && !classPath.contains(file)) { classPath.add(file); } } StringBuilder classPathBuilder = new StringBuilder(); String separator = System.getProperty("path.separator"); for (String url : classPath) { if (!url.contains("logback")) { if (classPathBuilder.length() > 0) { classPathBuilder.append(separator); } classPathBuilder.append(url); } } ProcessBuilder pb = new ProcessBuilder(sparkCommand).redirectErrorStream(true); Map<String, String> env = pb.environment(); env.put("CLASSPATH", classPathBuilder.toString()); String msg = "Spark application '" + mainClass + "' is being launched"; StringBuilder sparkCommandString = new StringBuilder(); for (String cmd : sparkCommand) { sparkCommandString.append(cmd).append(" "); } stepExecution.getExecutionContext().putString("spark.command", sparkCommandString.toString()); List<String> sparkLog = new ArrayList<String>(); try { Process p = pb.start(); p.waitFor(); exitCode = p.exitValue(); msg = "Spark application '" + mainClass + "' finished with exit code: " + exitCode; if (exitCode == 0) { logger.info(msg); } else { logger.error(msg); } sparkLog = getProcessOutput(p); p.destroy(); } catch (IOException e) { msg = "Starting Spark application '" + mainClass + "' failed with: " + e; logger.error(msg); } catch (InterruptedException e) { msg = "Executing Spark application '" + mainClass + "' failed with: " + e; logger.error(msg); } finally { printLog(sparkLog, exitCode); StringBuilder firstException = new StringBuilder(); if (exitCode != 0) { for (String line : sparkLog) { if (firstException.length() == 0) { if (line.contains("Exception")) { firstException.append(line).append("\n"); } } else { if (line.startsWith("\t")) { firstException.append(line).append("\n"); } else { break; } } } if (firstException.length() > 0) { msg = msg + "\n" + firstException.toString(); } } StringBuilder sparkLogMessages = new StringBuilder(); for (String line : sparkLog) { sparkLogMessages.append(line).append("</br>"); } stepExecution.getExecutionContext().putString("spark.log", sparkLogMessages.toString()); stepExecution.setExitStatus(exitStatus.addExitDescription(msg)); } return RepeatStatus.FINISHED; } @Override public ExitStatus afterStep(StepExecution stepExecution) { if (exitCode == 0) { return ExitStatus.COMPLETED; } else { return ExitStatus.FAILED; } } @Override public void beforeStep(StepExecution stepExecution) { } private List<String> getProcessOutput(Process p) { List<String> lines = new ArrayList<String>(); if (p == null) { return lines; } InputStream in = p.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line; try { while ((line = reader.readLine()) != null) { lines.add(line); } } catch (IOException ignore) { } finally { try { reader.close(); } catch (IOException e) { } } return lines; } private void printLog(List<String> lines, int exitCode) { if (exitCode != 0) { for (String line : lines) { logger.error("Spark Logger: " + line); } } else { if (logger.isDebugEnabled()) { for (String line : lines) { logger.debug("Spark Logger: " + line); } } } } }