/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.app; import java.net.InetSocketAddress; import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import javax.inject.Inject; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.net.HostAndPort; import com.google.inject.AbstractModule; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Module; import com.google.inject.util.Modules; import org.apache.aurora.GuavaUtils.ServiceManagerIface; import org.apache.aurora.common.application.Lifecycle; import org.apache.aurora.common.args.Arg; import org.apache.aurora.common.args.ArgScanner; import org.apache.aurora.common.args.ArgScanner.ArgScanException; import org.apache.aurora.common.args.CmdLine; import org.apache.aurora.common.args.constraints.NotEmpty; import org.apache.aurora.common.args.constraints.NotNull; import org.apache.aurora.common.inject.Bindings; import org.apache.aurora.common.stats.Stats; import org.apache.aurora.common.zookeeper.SingletonService; import org.apache.aurora.common.zookeeper.SingletonService.LeadershipListener; import org.apache.aurora.gen.ServerInfo; import org.apache.aurora.scheduler.AppStartup; import org.apache.aurora.scheduler.SchedulerLifecycle; import org.apache.aurora.scheduler.TierModule; import org.apache.aurora.scheduler.configuration.executor.ExecutorModule; import org.apache.aurora.scheduler.cron.quartz.CronModule; import org.apache.aurora.scheduler.discovery.FlaggedZooKeeperConfig; import org.apache.aurora.scheduler.discovery.ServiceDiscoveryModule; import org.apache.aurora.scheduler.events.WebhookModule; import org.apache.aurora.scheduler.http.HttpService; import org.apache.aurora.scheduler.log.mesos.MesosLogStreamModule; import org.apache.aurora.scheduler.mesos.CommandLineDriverSettingsModule; import org.apache.aurora.scheduler.mesos.FrameworkInfoFactory.FrameworkInfoFactoryImpl.SchedulerProtocol; import org.apache.aurora.scheduler.mesos.LibMesosLoadingModule; import org.apache.aurora.scheduler.stats.StatsModule; import org.apache.aurora.scheduler.storage.Storage; import org.apache.aurora.scheduler.storage.backup.BackupModule; import org.apache.aurora.scheduler.storage.db.DbModule; import org.apache.aurora.scheduler.storage.entities.IServerInfo; import org.apache.aurora.scheduler.storage.log.LogStorageModule; import org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Launcher for the aurora scheduler. */ public class SchedulerMain { private static final Logger LOG = LoggerFactory.getLogger(SchedulerMain.class); @NotNull @CmdLine(name = "cluster_name", help = "Name to identify the cluster being served.") private static final Arg<String> CLUSTER_NAME = Arg.create(); @NotNull @NotEmpty @CmdLine(name = "serverset_path", help = "ZooKeeper ServerSet path to register at.") private static final Arg<String> SERVERSET_PATH = Arg.create(); // TODO(zmanji): Consider making this an enum of HTTP or HTTPS. @CmdLine(name = "serverset_endpoint_name", help = "Name of the scheduler endpoint published in ZooKeeper.") private static final Arg<String> SERVERSET_ENDPOINT_NAME = Arg.create("http"); // TODO(Suman Karumuri): Rename viz_job_url_prefix to stats_job_url_prefix for consistency. @CmdLine(name = "viz_job_url_prefix", help = "URL prefix for job container stats.") private static final Arg<String> STATS_URL_PREFIX = Arg.create(""); @CmdLine(name = "allow_gpu_resource", help = "Allow jobs to request Mesos GPU resource.") private static final Arg<Boolean> ALLOW_GPU_RESOURCE = Arg.create(false); public enum DriverKind { // TODO(zmanji): Remove this option once V0_DRIVER has been proven out in production. // This is the original driver that libmesos shipped with. Uses unversioned protobufs, and has // minimal backwards compatability guarantees. SCHEDULER_DRIVER, // These are the new drivers that libmesos ships with. They use versioned (V1) protobufs for // the Java API. // V0 Driver offers the V1 API over the old Scheduler Driver. It does not fully support // the V1 API (ie mesos maintenance). V0_DRIVER, // V1 Driver offers the V1 API over a full HTTP API implementation. It allows for maintenance // primatives and other new features. V1_DRIVER, } @CmdLine(name = "mesos_driver", help = "Which Mesos Driver to use") private static final Arg<DriverKind> DRIVER_IMPL = Arg.create(DriverKind.SCHEDULER_DRIVER); public static class ProtocolModule extends AbstractModule { @Override protected void configure() { bind(String.class) .annotatedWith(SchedulerProtocol.class) .toInstance(SERVERSET_ENDPOINT_NAME.get()); } } @Inject private SingletonService schedulerService; @Inject private HttpService httpService; @Inject private SchedulerLifecycle schedulerLifecycle; @Inject private Lifecycle appLifecycle; @Inject @AppStartup private ServiceManagerIface startupServices; private void stop() { LOG.info("Stopping scheduler services."); try { startupServices.stopAsync().awaitStopped(5L, TimeUnit.SECONDS); } catch (TimeoutException e) { LOG.info("Shutdown did not complete in time: " + e); } appLifecycle.shutdown(); } void run() { startupServices.startAsync(); Runtime.getRuntime().addShutdownHook(new Thread(SchedulerMain.this::stop, "ShutdownHook")); startupServices.awaitHealthy(); LeadershipListener leaderListener = schedulerLifecycle.prepare(); HostAndPort httpAddress = httpService.getAddress(); InetSocketAddress httpSocketAddress = InetSocketAddress.createUnresolved(httpAddress.getHost(), httpAddress.getPort()); try { schedulerService.lead( httpSocketAddress, ImmutableMap.of(SERVERSET_ENDPOINT_NAME.get(), httpSocketAddress), leaderListener); } catch (SingletonService.LeadException e) { throw new IllegalStateException("Failed to lead service.", e); } catch (InterruptedException e) { throw new IllegalStateException("Interrupted while joining scheduler service group.", e); } appLifecycle.awaitShutdown(); stop(); } @VisibleForTesting static Module getUniversalModule() { return Modules.combine( new ProtocolModule(), new LifecycleModule(), new StatsModule(), new AppModule(ALLOW_GPU_RESOURCE.get(), DRIVER_IMPL.get()), new CronModule(), new DbModule.MigrationManagerModule(), DbModule.productionModule(Bindings.annotatedKeyFactory(Storage.Volatile.class)), new DbModule.GarbageCollectorModule()); } /** * Runs the scheduler by including modules configured from command line arguments in * addition to the provided environment-specific module. * * @param appEnvironmentModule Additional modules based on the execution environment. */ @VisibleForTesting public static void flagConfiguredMain(Module appEnvironmentModule) { AtomicLong uncaughtExceptions = Stats.exportLong("uncaught_exceptions"); Thread.setDefaultUncaughtExceptionHandler((t, e) -> { uncaughtExceptions.incrementAndGet(); LOG.error("Uncaught exception from " + t + ":" + e, e); }); Module module = Modules.combine( appEnvironmentModule, getUniversalModule(), new ServiceDiscoveryModule(FlaggedZooKeeperConfig.create(), SERVERSET_PATH.get()), new BackupModule(SnapshotStoreImpl.class), new ExecutorModule(), new AbstractModule() { @Override protected void configure() { bind(IServerInfo.class).toInstance( IServerInfo.build( new ServerInfo() .setClusterName(CLUSTER_NAME.get()) .setStatsUrlPrefix(STATS_URL_PREFIX.get()))); } }); Lifecycle lifecycle = null; try { Injector injector = Guice.createInjector(module); lifecycle = injector.getInstance(Lifecycle.class); SchedulerMain scheduler = new SchedulerMain(); injector.injectMembers(scheduler); try { scheduler.run(); } finally { LOG.info("Application run() exited."); } } finally { if (lifecycle != null) { lifecycle.shutdown(); } } } public static void main(String... args) { applyStaticArgumentValues(args); List<Module> modules = ImmutableList.<Module>builder() .add( new CommandLineDriverSettingsModule(ALLOW_GPU_RESOURCE.get()), new LibMesosLoadingModule(DRIVER_IMPL.get()), new MesosLogStreamModule(FlaggedZooKeeperConfig.create()), new LogStorageModule(), new TierModule(), new WebhookModule() ) .build(); flagConfiguredMain(Modules.combine(modules)); } private static void exit(String message, Exception error) { LOG.error(message + "\n" + error, error); System.exit(1); } /** * Applies {@link CmdLine} arg values throughout the classpath. This must be invoked before * attempting to read any argument values in the system. * * @param args Command line arguments. */ @VisibleForTesting public static void applyStaticArgumentValues(String... args) { try { if (!new ArgScanner().parse(Arrays.asList(args))) { System.exit(0); } } catch (ArgScanException e) { exit("Failed to scan arguments", e); } catch (IllegalArgumentException e) { exit("Failed to apply arguments", e); } } }