/*
* Copyright 2015 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.xd.dirt.plugins.spark.streaming;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.io.FilenameUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.StreamingContext;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.dstream.ReceiverInputDStream;
import org.apache.spark.streaming.receiver.Receiver;
import org.apache.spark.streaming.scheduler.StreamingListener;
import org.apache.spark.streaming.scheduler.StreamingListenerBatchCompleted;
import org.apache.spark.streaming.scheduler.StreamingListenerBatchStarted;
import org.apache.spark.streaming.scheduler.StreamingListenerBatchSubmitted;
import org.apache.spark.streaming.scheduler.StreamingListenerReceiverError;
import org.apache.spark.streaming.scheduler.StreamingListenerReceiverStarted;
import org.apache.spark.streaming.scheduler.StreamingListenerReceiverStopped;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.NoSuchBeanDefinitionException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.core.env.ConfigurableEnvironment;
import org.springframework.core.env.EnumerablePropertySource;
import org.springframework.core.env.Environment;
import org.springframework.core.env.PropertySource;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.springframework.util.Assert;
import org.springframework.util.SocketUtils;
import org.springframework.util.StringUtils;
import org.springframework.xd.dirt.integration.bus.MessageBus;
import org.springframework.xd.dirt.plugins.AbstractStreamPlugin;
import org.springframework.xd.dirt.plugins.stream.ModuleTypeConversionSupport;
import org.springframework.xd.dirt.server.MessageBusClassLoaderFactory;
import org.springframework.xd.dirt.zookeeper.ZooKeeperConnection;
import org.springframework.xd.module.ModuleType;
import org.springframework.xd.module.core.Module;
import org.springframework.xd.module.core.ModuleFactory;
import org.springframework.xd.module.core.SimpleModule;
import org.springframework.xd.spark.streaming.SparkConfig;
import org.springframework.xd.spark.streaming.SparkMessageSender;
import org.springframework.xd.spark.streaming.SparkStreamingSupport;
import org.springframework.xd.spark.streaming.java.ModuleExecutor;
import org.springframework.xd.spark.streaming.java.Processor;
/**
* Plugin for Spark Streaming support. This plugin sets up the necessary beans for the spark streaming module
* that connects to the underlying {@link MessageBus} to receive/send messages.
*
* @author Ilayaperumal Gopinathan
* @author Mark Fisher
* @author Eric Bottard
* @since 1.1
*/
@SuppressWarnings("rawtypes")
public class SparkStreamingPlugin extends AbstractStreamPlugin {
/**
* Logger.
*/
private static final Logger logger = LoggerFactory.getLogger(SparkStreamingPlugin.class);
private static final String REDIS_CONNECTION_PROPERTY_PREFIX = "spring.redis";
private static final String RABBIT_CONNECTION_PROPERTY_PREFIX = "spring.rabbitmq";
private static final String MESSAGE_BUS_PROPERTY_PREFIX = "xd.messagebus.";
private PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
private Map<Module, JavaStreamingContext> streamingContexts = new HashMap<>();
@Autowired
public SparkStreamingPlugin(MessageBus messageBus) {
super(messageBus);
}
@Override
public boolean supports(Module module) {
String moduleExecutionFramework = module.getProperties().getProperty(ModuleFactory.MODULE_EXECUTION_FRAMEWORK_KEY);
return (SparkStreamingSupport.MODULE_EXECUTION_FRAMEWORK.equals(moduleExecutionFramework));
}
@Override
public void postProcessModule(Module module) {
ConfigurableApplicationContext moduleContext = module.getApplicationContext();
ConfigurableEnvironment env = moduleContext.getEnvironment();
String transport = env.getProperty("XD_TRANSPORT");
Properties messageBusProperties = getMessageBusProperties(module);
Properties inboundModuleProperties = this.extractConsumerProducerProperties(module)[0];
Properties outboundModuleProperties = this.extractConsumerProducerProperties(module)[1];
String defaultStorageLevel = env.getProperty(SparkStreamingSupport.SPARK_STORAGE_LEVEL_PROP);
StorageLevel configuredStorageLevel = StorageLevel.fromString(StringUtils.hasText(defaultStorageLevel) ?
defaultStorageLevel : SparkStreamingSupport.SPARK_DEFAULT_STORAGE_LEVEL);
String storageLevelFromModule = module.getProperties().getProperty(SparkStreamingSupport.SPARK_STORAGE_LEVEL_MODULE_OPTION);
StorageLevel storageLevel = StringUtils.hasText(storageLevelFromModule) ?
StorageLevel.fromString(storageLevelFromModule) : configuredStorageLevel;
MessageBusReceiver receiver = null;
if (transport.equals("local")) {
SparkStreamingSupport processor;
Properties sparkConfigs = null;
try {
processor = module.getComponent(SparkStreamingSupport.class);
Assert.notNull(processor, "Problem getting the spark streaming module. Is the module context active?");
sparkConfigs = getSparkModuleProperties(processor);
}
catch (NoSuchBeanDefinitionException e) {
throw new IllegalStateException("Either java or scala module should be present.");
}
String sparkMasterUrl = env.getProperty(SparkStreamingSupport.SPARK_MASTER_URL_PROP);
if (sparkConfigs != null &&
StringUtils.hasText(sparkConfigs.getProperty(SparkStreamingSupport.SPARK_MASTER_URL_PROP))) {
sparkMasterUrl = sparkConfigs.getProperty(SparkStreamingSupport.SPARK_MASTER_URL_PROP);
}
Assert.notNull(sparkMasterUrl, "Spark Master URL must be set.");
if (!sparkMasterUrl.startsWith("local")) {
throw new IllegalStateException("Spark cluster mode must be 'local' for 'local' XD transport.");
}
LocalMessageBusHolder messageBusHolder = new LocalMessageBusHolder();
LocalMessageBusHolder.set(module.getComponent(MessageBus.class));
receiver = new MessageBusReceiver(messageBusHolder, storageLevel, messageBusProperties,
inboundModuleProperties, ModuleTypeConversionSupport.getInputMimeType(module));
if (module.getType().equals(ModuleType.processor)) {
MessageBusSender sender = new MessageBusSender(messageBusHolder, getOutputChannelName(module),
buildTapChannelName(module), messageBusProperties, outboundModuleProperties,
ModuleTypeConversionSupport.getOutputMimeType(module), module.getProperties());
ConfigurableBeanFactory beanFactory = module.getApplicationContext().getBeanFactory();
beanFactory.registerSingleton("messageBusSender", sender);
}
}
else {
receiver = new MessageBusReceiver(storageLevel, messageBusProperties, inboundModuleProperties,
ModuleTypeConversionSupport.getInputMimeType(module));
if (module.getType().equals(ModuleType.processor)) {
ConfigurableBeanFactory beanFactory = module.getApplicationContext().getBeanFactory();
MessageBusSender sender = new MessageBusSender(getOutputChannelName(module), buildTapChannelName(module),
messageBusProperties, outboundModuleProperties,
ModuleTypeConversionSupport.getOutputMimeType(module), module.getProperties());
beanFactory.registerSingleton("messageBusSender", sender);
}
}
registerMessageBusReceiver(receiver, module);
// This used to be in SSDModule.start
try {
SparkStreamingSupport processor = module.getComponent(SparkStreamingSupport.class);
Assert.notNull(processor, "Problem getting the spark streaming module. Is the module context active?");
Properties sparkConfigs = getSparkModuleProperties(processor);
startSparkStreamingContext(sparkConfigs, processor, module);
}
catch (NoSuchBeanDefinitionException e) {
throw new IllegalStateException("Either java or scala module should be present.");
}
}
@Override
public void beforeShutdown(Module module) {
super.beforeShutdown(module);
logger.info("stopping SparkDriver");
try {
try {
streamingContexts.get(module).stop(true, false);
}
catch(Exception e) {
logger.warn("Error while stopping streaming context "+ e);
}
}
catch (Exception e) {
logger.warn("Exception when stopping the spark module " + e);
}
}
/**
* Get the configured message bus properties for the given transport.
* @param module
* @return the message bus properties for the spark streaming module.
*/
private Properties getMessageBusProperties(Module module) {
ConfigurableEnvironment env = module.getApplicationContext().getEnvironment();
Properties busProperties = new Properties();
busProperties.put("XD_TRANSPORT", env.getProperty("XD_TRANSPORT"));
Iterator<PropertySource<?>> i = env.getPropertySources().iterator();
while (i.hasNext()) {
PropertySource<?> p = i.next();
if (p instanceof EnumerablePropertySource) {
for (String name : ((EnumerablePropertySource) p).getPropertyNames()) {
if ((name.startsWith(REDIS_CONNECTION_PROPERTY_PREFIX)) ||
name.startsWith(RABBIT_CONNECTION_PROPERTY_PREFIX) ||
name.startsWith(MESSAGE_BUS_PROPERTY_PREFIX)) {
busProperties.put(name, env.getProperty(name));
}
}
}
}
return busProperties;
}
/**
* Register the messsage bus receiver.
* @param receiver the message bus receiver
* @param module the spark streaming module
*/
private void registerMessageBusReceiver(MessageBusReceiver receiver, Module module) {
receiver.setInputChannelName(getInputChannelName(module));
ConfigurableBeanFactory beanFactory = module.getApplicationContext().getBeanFactory();
beanFactory.registerSingleton("messageBusReceiver", receiver);
}
/**
* Retrieve spark configuration properties from the {@link org.springframework.xd.spark.streaming.java.Processor} implementation.
* This method uses {@link SparkConfig} annotation to derive the {@link Properties} returned
* from the annotated methods.
*
* @param processor the spark streaming processor
* @return the spark configuration properties (if defined) or empty properties
*/
private Properties getSparkModuleProperties(SparkStreamingSupport processor) {
Properties sparkConfigs = new Properties();
Method[] methods = processor.getClass().getDeclaredMethods();
for (Method method : methods) {
SparkConfig sparkConfig = method.getAnnotation(SparkConfig.class);
if (sparkConfig != null) {
try {
if (method.getReturnType().equals(Properties.class)) {
sparkConfigs.putAll((Properties) method.invoke(processor));
}
else {
logger.warn("@SparkConfig annotated method should return java.util.Properties type. " +
"Ignoring the method " + method.getName());
}
}
catch (InvocationTargetException ise) {
// ignore.
}
catch (IllegalAccessException ise) {
// ignore.
}
}
}
return sparkConfigs;
}
/**
* Start spark streaming context for the given streaming processor.
*
* @param sparkConfigs the spark configuration properties
* @param sparkStreamingSupport the underlying processor implementation
*/
private void startSparkStreamingContext(Properties sparkConfigs,
final SparkStreamingSupport sparkStreamingSupport,
final Module module) {
final Receiver receiver = module.getComponent(Receiver.class);
Environment env = this.getApplicationContext().getEnvironment();
String masterURL = env.getProperty(SparkStreamingSupport.SPARK_MASTER_URL_PROP,
SparkStreamingSupport.SPARK_DEFAULT_MASTER_URL);
final SparkConf sparkConf = setupSparkConf(module, masterURL, sparkConfigs);
final String batchInterval = env.getProperty(SparkStreamingSupport.SPARK_STREAMING_BATCH_INTERVAL_MODULE_OPTION,
env.getProperty(SparkStreamingSupport.SPARK_STREAMING_BATCH_INTERVAL_PROP,
SparkStreamingSupport.SPARK_STREAMING_DEFAULT_BATCH_INTERVAL));
final SparkStreamingListener streamingListener = new SparkStreamingListener();
final SparkMessageSender sender =
(module.getType()==ModuleType.processor) ? module.getComponent(SparkMessageSender.class) : null;
final StreamingContext streamingContext = new StreamingContext(sparkConf, new Duration(Long.valueOf(batchInterval)));
streamingContext.addStreamingListener(streamingListener);
Executors.newSingleThreadExecutor().execute(new Runnable() {
@Override
@SuppressWarnings("unchecked")
public void run() {
try {
JavaStreamingContext javaStreamingContext = new JavaStreamingContext(streamingContext);
streamingContexts.put(module, javaStreamingContext);
JavaReceiverInputDStream javaInputDStream = javaStreamingContext.receiverStream(receiver);
if (sparkStreamingSupport instanceof Processor) {
new ModuleExecutor().execute(javaInputDStream, (Processor) sparkStreamingSupport, sender);
}
if (sparkStreamingSupport instanceof org.springframework.xd.spark.streaming.scala.Processor) {
ReceiverInputDStream receiverInput = javaInputDStream.receiverInputDStream();
new org.springframework.xd.spark.streaming.scala.ModuleExecutor().execute(receiverInput,
(org.springframework.xd.spark.streaming.scala.Processor) sparkStreamingSupport, sender);
}
javaStreamingContext.start();
javaStreamingContext.awaitTermination();
}
catch (Exception e) {
throw new IllegalStateException("Exception when running Spark Streaming application.", e);
}
}
});
try {
boolean started = streamingListener.receiverStartLatch.await(30, TimeUnit.SECONDS);
if (!started) {
logger.warn("Deployment timed out when deploying Spark Streaming module " + sparkStreamingSupport);
}
if (!streamingListener.receiverStartSuccess.get()) {
throw new IllegalStateException("Failed to start Spark Streaming Receiver");
}
}
catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
}
/**
* Setup {@link org.apache.spark.SparkConf} for the given spark configuration properties.
*
* @param masterURL the spark cluster master URL
* @param sparkConfigs the spark configuration properties
* @return SparkConf for this spark streaming module
*/
private SparkConf setupSparkConf(Module module, String masterURL, Properties sparkConfigs) {
SparkConf sparkConf = new SparkConf()
// Set spark UI port to random available port to support multiple spark modules on the same host.
.set("spark.ui.port", String.valueOf(SocketUtils.findAvailableTcpPort()))
// Set the cores max so that multiple (at least a few) spark modules can be deployed on the same host.
.set("spark.cores.max", "3")
.setMaster(masterURL)
.setAppName(module.getDescriptor().getGroup() + "-" + module.getDescriptor().getModuleLabel());
if (sparkConfigs != null) {
for (String property : sparkConfigs.stringPropertyNames()) {
sparkConf.set(property, sparkConfigs.getProperty(property));
}
}
List<String> sparkJars = new ArrayList<>();
// Add jars from spark.jars (if any) set from spark module.
try {
String jarsFromConf = sparkConf.get("spark.jars");
if (StringUtils.hasText(jarsFromConf)) {
sparkJars.addAll(Arrays.asList(jarsFromConf.split("\\s*,\\s*")));
}
}
catch (NoSuchElementException e) {
// should ignore
}
sparkJars.addAll(getApplicationJars(module));
sparkConf.setJars(sparkJars.toArray(new String[sparkJars.size()]));
return sparkConf;
}
/**
* Get the list of jars that this spark module requires.
*
* @return the list of spark application jars
*/
private List<String> getApplicationJars(Module module) {
// Get jars from module classpath
URLClassLoader classLoader = (URLClassLoader) ((SimpleModule)module).getClassLoader();
List<String> jars = new ArrayList<String>();
for (URL url : classLoader.getURLs()) {
String file = url.getFile().split("\\!", 2)[0];
if (file.endsWith(".jar")) {
jars.add(file);
}
}
// Get message bus libraries
Environment env = this.getApplicationContext().getEnvironment();
String jarsLocation = env.resolvePlaceholders(MessageBusClassLoaderFactory.MESSAGE_BUS_JARS_LOCATION);
try {
Resource[] resources = resolver.getResources(jarsLocation);
for (Resource resource : resources) {
URL url = resource.getURL();
jars.add(url.getFile());
}
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// Get necessary dependencies from XD DIRT.
URLClassLoader parentClassLoader = (URLClassLoader) classLoader.getParent();
URL[] urls = parentClassLoader.getURLs();
for (URL url : urls) {
String file = FilenameUtils.getName(url.getFile());
String fileToAdd = url.getFile().split("\\!", 2)[0];
if (file.endsWith(".jar") && (// Add spark jars
file.contains("spark") ||
// Add SpringXD dependencies
file.contains("spring-xd-") ||
// Add Spring dependencies
file.contains("spring-core") ||
file.contains("spring-integration-core") ||
file.contains("spring-beans") ||
file.contains("spring-context") ||
file.contains("spring-boot") ||
file.contains("spring-aop") ||
file.contains("spring-expression") ||
file.contains("spring-messaging") ||
file.contains("spring-retry") ||
file.contains("spring-tx") ||
file.contains("spring-data-commons") ||
file.contains("spring-data-redis") ||
file.contains("commons-pool") ||
file.contains("jedis") ||
// Add codec dependency
file.contains("kryo") ||
file.contains("gs-collections"))) {
jars.add(fileToAdd);
}
}
return jars;
}
/**
* StreamingListener that processes spark {@link org.apache.spark.streaming.scheduler.StreamingListener} events.
*/
private static class SparkStreamingListener implements StreamingListener {
private final CountDownLatch receiverStartLatch = new CountDownLatch(1);
private final AtomicBoolean receiverStartSuccess = new AtomicBoolean();
@Override
/** Called when a receiver has been started */
public void onReceiverStarted(StreamingListenerReceiverStarted started) {
logger.info("Spark streaming receiver started " + started.receiverInfo());
receiverStartSuccess.set(true);
receiverStartLatch.countDown();
}
@Override
/** Called when a receiver has reported an error */
public void onReceiverError(StreamingListenerReceiverError receiverError) {
logger.info("Error starting spark streaming receiver " + receiverError.receiverInfo());
receiverStartSuccess.set(false);
receiverStartLatch.countDown();
}
@Override
/** Called when a receiver has been stopped */
public void onReceiverStopped(StreamingListenerReceiverStopped receiverStopped) {
logger.info("Spark streaming receiver stopped " + receiverStopped.receiverInfo());
}
/** Called when a batch of jobs has been submitted for processing. */
public void onBatchSubmitted(StreamingListenerBatchSubmitted batchSubmitted) {
}
/** Called when processing of a batch of jobs has started. */
public void onBatchStarted(StreamingListenerBatchStarted batchStarted) {
}
/** Called when processing of a batch of jobs has completed. */
public void onBatchCompleted(StreamingListenerBatchCompleted batchCompleted) {
}
}
}