/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.mesos.runtime.clusterframework;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.actor.Address;
import akka.actor.Props;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.configuration.IllegalConfigurationException;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.mesos.runtime.clusterframework.services.MesosServices;
import org.apache.flink.mesos.runtime.clusterframework.services.MesosServicesUtils;
import org.apache.flink.mesos.runtime.clusterframework.store.MesosWorkerStore;
import org.apache.flink.mesos.util.MesosArtifactServer;
import org.apache.flink.mesos.util.MesosConfiguration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.clusterframework.BootstrapTools;
import org.apache.flink.runtime.clusterframework.ContainerSpecification;
import org.apache.flink.runtime.clusterframework.overlays.CompositeContainerOverlay;
import org.apache.flink.runtime.clusterframework.overlays.FlinkDistributionOverlay;
import org.apache.flink.runtime.clusterframework.overlays.HadoopConfOverlay;
import org.apache.flink.runtime.clusterframework.overlays.HadoopUserOverlay;
import org.apache.flink.runtime.clusterframework.overlays.KeytabOverlay;
import org.apache.flink.runtime.clusterframework.overlays.Krb5ConfOverlay;
import org.apache.flink.runtime.clusterframework.overlays.SSLStoreOverlay;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils;
import org.apache.flink.runtime.jobmanager.JobManager;
import org.apache.flink.runtime.jobmanager.MemoryArchivist;
import org.apache.flink.runtime.jobmaster.JobMaster;
import org.apache.flink.runtime.process.ProcessReaper;
import org.apache.flink.runtime.security.SecurityUtils;
import org.apache.flink.runtime.util.EnvironmentInformation;
import org.apache.flink.runtime.util.ExecutorThreadFactory;
import org.apache.flink.runtime.util.Hardware;
import org.apache.flink.runtime.util.JvmShutdownSafeguard;
import org.apache.flink.runtime.util.SignalHandler;
import org.apache.flink.runtime.webmonitor.WebMonitor;
import org.apache.mesos.Protos;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;
import scala.concurrent.duration.Duration;
import scala.concurrent.duration.FiniteDuration;
import java.io.IOException;
import java.net.InetAddress;
import java.net.URL;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static org.apache.flink.util.Preconditions.checkState;
/**
* This class is the executable entry point for the Mesos Application Master.
* It starts actor system and the actors for {@link JobManager}
* and {@link MesosFlinkResourceManager}.
*
* The JobManager handles Flink job execution, while the MesosFlinkResourceManager handles container
* allocation and failure detection.
*/
public class MesosApplicationMasterRunner {
/** Logger */
protected static final Logger LOG = LoggerFactory.getLogger(MesosApplicationMasterRunner.class);
/** The maximum time that TaskManagers may be waiting to register at the JobManager,
* before they quit */
private static final FiniteDuration TASKMANAGER_REGISTRATION_TIMEOUT = new FiniteDuration(5, TimeUnit.MINUTES);
/** The process environment variables */
private static final Map<String, String> ENV = System.getenv();
/** The exit code returned if the initialization of the application master failed */
private static final int INIT_ERROR_EXIT_CODE = 31;
/** The exit code returned if the process exits because a critical actor died */
private static final int ACTOR_DIED_EXIT_CODE = 32;
// ------------------------------------------------------------------------
// Command-line options
// ------------------------------------------------------------------------
private static final Options ALL_OPTIONS;
static {
ALL_OPTIONS =
new Options()
.addOption(BootstrapTools.newDynamicPropertiesOption());
}
// ------------------------------------------------------------------------
// Program entry point
// ------------------------------------------------------------------------
/**
* The entry point for the Mesos AppMaster.
*
* @param args The command line arguments.
*/
public static void main(String[] args) {
EnvironmentInformation.logEnvironmentInfo(LOG, "Mesos AppMaster", args);
SignalHandler.register(LOG);
JvmShutdownSafeguard.installAsShutdownHook(LOG);
// run and exit with the proper return code
int returnCode = new MesosApplicationMasterRunner().run(args);
System.exit(returnCode);
}
/**
* The instance entry point for the Mesos AppMaster. Obtains user group
* information and calls the main work method {@link #runPrivileged(Configuration,Configuration)} as a
* privileged action.
*
* @param args The command line arguments.
* @return The process exit code.
*/
protected int run(final String[] args) {
try {
LOG.debug("All environment variables: {}", ENV);
// loading all config values here has the advantage that the program fails fast, if any
// configuration problem occurs
CommandLineParser parser = new PosixParser();
CommandLine cmd = parser.parse(ALL_OPTIONS, args);
final Configuration dynamicProperties = BootstrapTools.parseDynamicProperties(cmd);
GlobalConfiguration.setDynamicProperties(dynamicProperties);
final Configuration config = GlobalConfiguration.loadConfiguration();
// configure the default filesystem
try {
FileSystem.setDefaultScheme(config);
} catch (IOException e) {
throw new IOException("Error while setting the default " +
"filesystem scheme from configuration.", e);
}
// configure security
SecurityUtils.SecurityConfiguration sc = new SecurityUtils.SecurityConfiguration(config);
SecurityUtils.install(sc);
// run the actual work in the installed security context
return SecurityUtils.getInstalledContext().runSecured(new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return runPrivileged(config, dynamicProperties);
}
});
}
catch (Throwable t) {
// make sure that everything whatever ends up in the log
LOG.error("Mesos AppMaster initialization failed", t);
return INIT_ERROR_EXIT_CODE;
}
}
// ------------------------------------------------------------------------
// Core work method
// ------------------------------------------------------------------------
/**
* The main work method, must run as a privileged action.
*
* @return The return code for the Java process.
*/
protected int runPrivileged(Configuration config, Configuration dynamicProperties) {
ActorSystem actorSystem = null;
WebMonitor webMonitor = null;
MesosArtifactServer artifactServer = null;
ScheduledExecutorService futureExecutor = null;
ExecutorService ioExecutor = null;
MesosServices mesosServices = null;
HighAvailabilityServices highAvailabilityServices = null;
try {
// ------- (1) load and parse / validate all configurations -------
final String appMasterHostname = config.getString(
JobManagerOptions.ADDRESS,
InetAddress.getLocalHost().getHostName());
LOG.info("App Master Hostname to use: {}", appMasterHostname);
// Mesos configuration
final MesosConfiguration mesosConfig = createMesosConfig(config, appMasterHostname);
// JM configuration
int numberProcessors = Hardware.getNumberCPUCores();
futureExecutor = Executors.newScheduledThreadPool(
numberProcessors,
new ExecutorThreadFactory("mesos-jobmanager-future"));
ioExecutor = Executors.newFixedThreadPool(
numberProcessors,
new ExecutorThreadFactory("mesos-jobmanager-io"));
mesosServices = MesosServicesUtils.createMesosServices(config);
// TM configuration
final MesosTaskManagerParameters taskManagerParameters = MesosTaskManagerParameters.create(config);
LOG.info("TaskManagers will be created with {} task slots",
taskManagerParameters.containeredParameters().numSlots());
LOG.info("TaskManagers will be started with container size {} MB, JVM heap size {} MB, " +
"JVM direct memory limit {} MB, {} cpus",
taskManagerParameters.containeredParameters().taskManagerTotalMemoryMB(),
taskManagerParameters.containeredParameters().taskManagerHeapSizeMB(),
taskManagerParameters.containeredParameters().taskManagerDirectMemoryLimitMB(),
taskManagerParameters.cpus());
// JM endpoint, which should be explicitly configured based on acquired net resources
final int listeningPort = config.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);
checkState(listeningPort >= 0 && listeningPort <= 65536, "Config parameter \"" +
ConfigConstants.JOB_MANAGER_IPC_PORT_KEY + "\" is invalid, it must be between 0 and 65536");
// ----------------- (2) start the actor system -------------------
// try to start the actor system, JobManager and JobManager actor system
// using the configured address and ports
actorSystem = BootstrapTools.startActorSystem(config, appMasterHostname, listeningPort, LOG);
Address address = AkkaUtils.getAddress(actorSystem);
final String akkaHostname = address.host().get();
final int akkaPort = (Integer) address.port().get();
LOG.info("Actor system bound to hostname {}.", akkaHostname);
// try to start the artifact server
LOG.debug("Starting Artifact Server");
final int artifactServerPort = config.getInteger(ConfigConstants.MESOS_ARTIFACT_SERVER_PORT_KEY,
ConfigConstants.DEFAULT_MESOS_ARTIFACT_SERVER_PORT);
final String artifactServerPrefix = UUID.randomUUID().toString();
artifactServer = new MesosArtifactServer(artifactServerPrefix, akkaHostname, artifactServerPort, config);
// ----------------- (3) Generate the configuration for the TaskManagers -------------------
// generate a container spec which conveys the artifacts/vars needed to launch a TM
ContainerSpecification taskManagerContainerSpec = new ContainerSpecification();
// propagate the AM dynamic configuration to the TM
taskManagerContainerSpec.getDynamicConfiguration().addAll(dynamicProperties);
// propagate newly-generated configuration elements
final Configuration taskManagerConfig = BootstrapTools.generateTaskManagerConfiguration(
new Configuration(), akkaHostname, akkaPort, taskManagerParameters.containeredParameters().numSlots(),
TASKMANAGER_REGISTRATION_TIMEOUT);
taskManagerContainerSpec.getDynamicConfiguration().addAll(taskManagerConfig);
// apply the overlays
applyOverlays(config, taskManagerContainerSpec);
// configure the artifact server to serve the specified artifacts
configureArtifactServer(artifactServer, taskManagerContainerSpec);
// ----------------- (4) start the actors -------------------
// 1) JobManager & Archive (in non-HA case, the leader service takes this)
// 2) Web Monitor (we need its port to register)
// 3) Resource Master for Mesos
// 4) Process reapers for the JobManager and Resource Master
// 0: Start the JobManager services
highAvailabilityServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(
config,
ioExecutor,
HighAvailabilityServicesUtils.AddressResolution.NO_ADDRESS_RESOLUTION);
// 1: the JobManager
LOG.debug("Starting JobManager actor");
// we start the JobManager with its standard name
ActorRef jobManager = JobManager.startJobManagerActors(
config,
actorSystem,
futureExecutor,
ioExecutor,
highAvailabilityServices,
Option.apply(JobMaster.JOB_MANAGER_NAME),
Option.apply(JobMaster.ARCHIVE_NAME),
getJobManagerClass(),
getArchivistClass())._1();
// 2: the web monitor
LOG.debug("Starting Web Frontend");
webMonitor = BootstrapTools.startWebMonitorIfConfigured(
config,
highAvailabilityServices,
actorSystem,
jobManager,
LOG);
if(webMonitor != null) {
final URL webMonitorURL = new URL("http", appMasterHostname, webMonitor.getServerPort(), "/");
mesosConfig.frameworkInfo().setWebuiUrl(webMonitorURL.toExternalForm());
}
// 3: Flink's Mesos ResourceManager
LOG.debug("Starting Mesos Flink Resource Manager");
// create the worker store to persist task information across restarts
MesosWorkerStore workerStore = mesosServices.createMesosWorkerStore(
config,
ioExecutor);
Props resourceMasterProps = MesosFlinkResourceManager.createActorProps(
getResourceManagerClass(),
config,
mesosConfig,
workerStore,
highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
taskManagerParameters,
taskManagerContainerSpec,
artifactServer,
LOG);
ActorRef resourceMaster = actorSystem.actorOf(resourceMasterProps, "Mesos_Resource_Master");
// 4: Process reapers
// The process reapers ensure that upon unexpected actor death, the process exits
// and does not stay lingering around unresponsive
LOG.debug("Starting process reapers for JobManager");
actorSystem.actorOf(
Props.create(ProcessReaper.class, resourceMaster, LOG, ACTOR_DIED_EXIT_CODE),
"Mesos_Resource_Master_Process_Reaper");
actorSystem.actorOf(
Props.create(ProcessReaper.class, jobManager, LOG, ACTOR_DIED_EXIT_CODE),
"JobManager_Process_Reaper");
}
catch (Throwable t) {
// make sure that everything whatever ends up in the log
LOG.error("Mesos JobManager initialization failed", t);
if (webMonitor != null) {
try {
webMonitor.stop();
} catch (Throwable ignored) {
LOG.warn("Failed to stop the web frontend", ignored);
}
}
if(artifactServer != null) {
try {
artifactServer.stop();
} catch (Throwable ignored) {
LOG.error("Failed to stop the artifact server", ignored);
}
}
if (actorSystem != null) {
try {
actorSystem.shutdown();
} catch (Throwable tt) {
LOG.error("Error shutting down actor system", tt);
}
}
if(futureExecutor != null) {
try {
futureExecutor.shutdownNow();
} catch (Throwable tt) {
LOG.error("Error shutting down future executor", tt);
}
}
if(ioExecutor != null) {
try {
ioExecutor.shutdownNow();
} catch (Throwable tt) {
LOG.error("Error shutting down io executor", tt);
}
}
if (mesosServices != null) {
try {
mesosServices.close(false);
} catch (Throwable tt) {
LOG.error("Error closing the mesos services.", tt);
}
}
return INIT_ERROR_EXIT_CODE;
}
// everything started, we can wait until all is done or the process is killed
LOG.info("Mesos JobManager started");
// wait until everything is done
actorSystem.awaitTermination();
// if we get here, everything work out jolly all right, and we even exited smoothly
if (webMonitor != null) {
try {
webMonitor.stop();
} catch (Throwable t) {
LOG.error("Failed to stop the web frontend", t);
}
}
try {
artifactServer.stop();
} catch (Throwable t) {
LOG.error("Failed to stop the artifact server", t);
}
if (highAvailabilityServices != null) {
try {
highAvailabilityServices.close();
} catch (Throwable t) {
LOG.error("Could not properly stop the high availability services.");
}
}
org.apache.flink.runtime.concurrent.Executors.gracefulShutdown(
AkkaUtils.getTimeout(config).toMillis(),
TimeUnit.MILLISECONDS,
futureExecutor,
ioExecutor);
try {
mesosServices.close(true);
} catch (Throwable t) {
LOG.error("Failed to clean up and close MesosServices.", t);
}
return 0;
}
// ------------------------------------------------------------------------
// For testing, this allows to override the actor classes used for
// JobManager and the archive of completed jobs
// ------------------------------------------------------------------------
protected Class<? extends MesosFlinkResourceManager> getResourceManagerClass() {
return MesosFlinkResourceManager.class;
}
protected Class<? extends JobManager> getJobManagerClass() {
return MesosJobManager.class;
}
protected Class<? extends MemoryArchivist> getArchivistClass() {
return MemoryArchivist.class;
}
/**
* Loads and validates the ResourceManager Mesos configuration from the given Flink configuration.
*/
public static MesosConfiguration createMesosConfig(Configuration flinkConfig, String hostname) {
Protos.FrameworkInfo.Builder frameworkInfo = Protos.FrameworkInfo.newBuilder()
.setHostname(hostname);
Protos.Credential.Builder credential = null;
if(!flinkConfig.containsKey(ConfigConstants.MESOS_MASTER_URL)) {
throw new IllegalConfigurationException(ConfigConstants.MESOS_MASTER_URL + " must be configured.");
}
String masterUrl = flinkConfig.getString(ConfigConstants.MESOS_MASTER_URL, null);
Duration failoverTimeout = FiniteDuration.apply(
flinkConfig.getInteger(
ConfigConstants.MESOS_FAILOVER_TIMEOUT_SECONDS,
ConfigConstants.DEFAULT_MESOS_FAILOVER_TIMEOUT_SECS),
TimeUnit.SECONDS);
frameworkInfo.setFailoverTimeout(failoverTimeout.toSeconds());
frameworkInfo.setName(flinkConfig.getString(
ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_NAME,
ConfigConstants.DEFAULT_MESOS_RESOURCEMANAGER_FRAMEWORK_NAME));
frameworkInfo.setRole(flinkConfig.getString(
ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_ROLE,
ConfigConstants.DEFAULT_MESOS_RESOURCEMANAGER_FRAMEWORK_ROLE));
frameworkInfo.setUser(flinkConfig.getString(
ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_USER,
ConfigConstants.DEFAULT_MESOS_RESOURCEMANAGER_FRAMEWORK_USER));
if(flinkConfig.containsKey(ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_PRINCIPAL)) {
frameworkInfo.setPrincipal(flinkConfig.getString(
ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_PRINCIPAL, null));
credential = Protos.Credential.newBuilder();
credential.setPrincipal(frameworkInfo.getPrincipal());
// some environments use a side-channel to communicate the secret to Mesos,
// and thus don't set the 'secret' configuration setting
if(flinkConfig.containsKey(ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_SECRET)) {
credential.setSecret(flinkConfig.getString(
ConfigConstants.MESOS_RESOURCEMANAGER_FRAMEWORK_SECRET, null));
}
}
MesosConfiguration mesos =
new MesosConfiguration(masterUrl, frameworkInfo, scala.Option.apply(credential));
return mesos;
}
/**
* Generate a container specification as a TaskManager template.
*
* <p>This code is extremely Mesos-specific and registers all the artifacts that the TaskManager
* needs (such as JAR file, config file, ...) and all environment variables into a container specification.
* The Mesos fetcher then ensures that those artifacts will be copied into the task's sandbox directory.
* A lightweight HTTP server serves the artifacts to the fetcher.
*/
private static void applyOverlays(
Configuration globalConfiguration, ContainerSpecification containerSpec) throws IOException {
// create the overlays that will produce the specification
CompositeContainerOverlay overlay = new CompositeContainerOverlay(
FlinkDistributionOverlay.newBuilder().fromEnvironment(globalConfiguration).build(),
HadoopConfOverlay.newBuilder().fromEnvironment(globalConfiguration).build(),
HadoopUserOverlay.newBuilder().fromEnvironment(globalConfiguration).build(),
KeytabOverlay.newBuilder().fromEnvironment(globalConfiguration).build(),
Krb5ConfOverlay.newBuilder().fromEnvironment(globalConfiguration).build(),
SSLStoreOverlay.newBuilder().fromEnvironment(globalConfiguration).build()
);
// apply the overlays
overlay.configure(containerSpec);
}
private static void configureArtifactServer(MesosArtifactServer server, ContainerSpecification container) throws IOException {
// serve the artifacts associated with the container environment
for(ContainerSpecification.Artifact artifact : container.getArtifacts()) {
server.addPath(artifact.source, artifact.dest);
}
}
}