/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.app;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.net.HostAndPort;
import com.google.inject.AbstractModule;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.util.Modules;
import org.apache.aurora.GuavaUtils.ServiceManagerIface;
import org.apache.aurora.common.application.Lifecycle;
import org.apache.aurora.common.args.Arg;
import org.apache.aurora.common.args.ArgScanner;
import org.apache.aurora.common.args.ArgScanner.ArgScanException;
import org.apache.aurora.common.args.CmdLine;
import org.apache.aurora.common.args.constraints.NotEmpty;
import org.apache.aurora.common.args.constraints.NotNull;
import org.apache.aurora.common.inject.Bindings;
import org.apache.aurora.common.stats.Stats;
import org.apache.aurora.common.zookeeper.SingletonService;
import org.apache.aurora.common.zookeeper.SingletonService.LeadershipListener;
import org.apache.aurora.gen.ServerInfo;
import org.apache.aurora.scheduler.AppStartup;
import org.apache.aurora.scheduler.SchedulerLifecycle;
import org.apache.aurora.scheduler.TierModule;
import org.apache.aurora.scheduler.configuration.executor.ExecutorModule;
import org.apache.aurora.scheduler.cron.quartz.CronModule;
import org.apache.aurora.scheduler.discovery.FlaggedZooKeeperConfig;
import org.apache.aurora.scheduler.discovery.ServiceDiscoveryModule;
import org.apache.aurora.scheduler.events.WebhookModule;
import org.apache.aurora.scheduler.http.HttpService;
import org.apache.aurora.scheduler.log.mesos.MesosLogStreamModule;
import org.apache.aurora.scheduler.mesos.CommandLineDriverSettingsModule;
import org.apache.aurora.scheduler.mesos.FrameworkInfoFactory.FrameworkInfoFactoryImpl.SchedulerProtocol;
import org.apache.aurora.scheduler.mesos.LibMesosLoadingModule;
import org.apache.aurora.scheduler.stats.StatsModule;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.backup.BackupModule;
import org.apache.aurora.scheduler.storage.db.DbModule;
import org.apache.aurora.scheduler.storage.entities.IServerInfo;
import org.apache.aurora.scheduler.storage.log.LogStorageModule;
import org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Launcher for the aurora scheduler.
*/
public class SchedulerMain {
private static final Logger LOG = LoggerFactory.getLogger(SchedulerMain.class);
@NotNull
@CmdLine(name = "cluster_name", help = "Name to identify the cluster being served.")
private static final Arg<String> CLUSTER_NAME = Arg.create();
@NotNull
@NotEmpty
@CmdLine(name = "serverset_path", help = "ZooKeeper ServerSet path to register at.")
private static final Arg<String> SERVERSET_PATH = Arg.create();
// TODO(zmanji): Consider making this an enum of HTTP or HTTPS.
@CmdLine(name = "serverset_endpoint_name",
help = "Name of the scheduler endpoint published in ZooKeeper.")
private static final Arg<String> SERVERSET_ENDPOINT_NAME = Arg.create("http");
// TODO(Suman Karumuri): Rename viz_job_url_prefix to stats_job_url_prefix for consistency.
@CmdLine(name = "viz_job_url_prefix", help = "URL prefix for job container stats.")
private static final Arg<String> STATS_URL_PREFIX = Arg.create("");
@CmdLine(name = "allow_gpu_resource", help = "Allow jobs to request Mesos GPU resource.")
private static final Arg<Boolean> ALLOW_GPU_RESOURCE = Arg.create(false);
public enum DriverKind {
// TODO(zmanji): Remove this option once V0_DRIVER has been proven out in production.
// This is the original driver that libmesos shipped with. Uses unversioned protobufs, and has
// minimal backwards compatability guarantees.
SCHEDULER_DRIVER,
// These are the new drivers that libmesos ships with. They use versioned (V1) protobufs for
// the Java API.
// V0 Driver offers the V1 API over the old Scheduler Driver. It does not fully support
// the V1 API (ie mesos maintenance).
V0_DRIVER,
// V1 Driver offers the V1 API over a full HTTP API implementation. It allows for maintenance
// primatives and other new features.
V1_DRIVER,
}
@CmdLine(name = "mesos_driver", help = "Which Mesos Driver to use")
private static final Arg<DriverKind> DRIVER_IMPL = Arg.create(DriverKind.SCHEDULER_DRIVER);
public static class ProtocolModule extends AbstractModule {
@Override
protected void configure() {
bind(String.class)
.annotatedWith(SchedulerProtocol.class)
.toInstance(SERVERSET_ENDPOINT_NAME.get());
}
}
@Inject private SingletonService schedulerService;
@Inject private HttpService httpService;
@Inject private SchedulerLifecycle schedulerLifecycle;
@Inject private Lifecycle appLifecycle;
@Inject
@AppStartup
private ServiceManagerIface startupServices;
private void stop() {
LOG.info("Stopping scheduler services.");
try {
startupServices.stopAsync().awaitStopped(5L, TimeUnit.SECONDS);
} catch (TimeoutException e) {
LOG.info("Shutdown did not complete in time: " + e);
}
appLifecycle.shutdown();
}
void run() {
startupServices.startAsync();
Runtime.getRuntime().addShutdownHook(new Thread(SchedulerMain.this::stop, "ShutdownHook"));
startupServices.awaitHealthy();
LeadershipListener leaderListener = schedulerLifecycle.prepare();
HostAndPort httpAddress = httpService.getAddress();
InetSocketAddress httpSocketAddress =
InetSocketAddress.createUnresolved(httpAddress.getHost(), httpAddress.getPort());
try {
schedulerService.lead(
httpSocketAddress,
ImmutableMap.of(SERVERSET_ENDPOINT_NAME.get(), httpSocketAddress),
leaderListener);
} catch (SingletonService.LeadException e) {
throw new IllegalStateException("Failed to lead service.", e);
} catch (InterruptedException e) {
throw new IllegalStateException("Interrupted while joining scheduler service group.", e);
}
appLifecycle.awaitShutdown();
stop();
}
@VisibleForTesting
static Module getUniversalModule() {
return Modules.combine(
new ProtocolModule(),
new LifecycleModule(),
new StatsModule(),
new AppModule(ALLOW_GPU_RESOURCE.get(), DRIVER_IMPL.get()),
new CronModule(),
new DbModule.MigrationManagerModule(),
DbModule.productionModule(Bindings.annotatedKeyFactory(Storage.Volatile.class)),
new DbModule.GarbageCollectorModule());
}
/**
* Runs the scheduler by including modules configured from command line arguments in
* addition to the provided environment-specific module.
*
* @param appEnvironmentModule Additional modules based on the execution environment.
*/
@VisibleForTesting
public static void flagConfiguredMain(Module appEnvironmentModule) {
AtomicLong uncaughtExceptions = Stats.exportLong("uncaught_exceptions");
Thread.setDefaultUncaughtExceptionHandler((t, e) -> {
uncaughtExceptions.incrementAndGet();
LOG.error("Uncaught exception from " + t + ":" + e, e);
});
Module module = Modules.combine(
appEnvironmentModule,
getUniversalModule(),
new ServiceDiscoveryModule(FlaggedZooKeeperConfig.create(), SERVERSET_PATH.get()),
new BackupModule(SnapshotStoreImpl.class),
new ExecutorModule(),
new AbstractModule() {
@Override
protected void configure() {
bind(IServerInfo.class).toInstance(
IServerInfo.build(
new ServerInfo()
.setClusterName(CLUSTER_NAME.get())
.setStatsUrlPrefix(STATS_URL_PREFIX.get())));
}
});
Lifecycle lifecycle = null;
try {
Injector injector = Guice.createInjector(module);
lifecycle = injector.getInstance(Lifecycle.class);
SchedulerMain scheduler = new SchedulerMain();
injector.injectMembers(scheduler);
try {
scheduler.run();
} finally {
LOG.info("Application run() exited.");
}
} finally {
if (lifecycle != null) {
lifecycle.shutdown();
}
}
}
public static void main(String... args) {
applyStaticArgumentValues(args);
List<Module> modules = ImmutableList.<Module>builder()
.add(
new CommandLineDriverSettingsModule(ALLOW_GPU_RESOURCE.get()),
new LibMesosLoadingModule(DRIVER_IMPL.get()),
new MesosLogStreamModule(FlaggedZooKeeperConfig.create()),
new LogStorageModule(),
new TierModule(),
new WebhookModule()
)
.build();
flagConfiguredMain(Modules.combine(modules));
}
private static void exit(String message, Exception error) {
LOG.error(message + "\n" + error, error);
System.exit(1);
}
/**
* Applies {@link CmdLine} arg values throughout the classpath. This must be invoked before
* attempting to read any argument values in the system.
*
* @param args Command line arguments.
*/
@VisibleForTesting
public static void applyStaticArgumentValues(String... args) {
try {
if (!new ArgScanner().parse(Arrays.asList(args))) {
System.exit(0);
}
} catch (ArgScanException e) {
exit("Failed to scan arguments", e);
} catch (IllegalArgumentException e) {
exit("Failed to apply arguments", e);
}
}
}