/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.mesos; import java.util.Collection; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; import javax.inject.Inject; import com.google.common.base.Optional; import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.AbstractIdleService; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.SettableFuture; import org.apache.aurora.common.stats.Stats; import org.apache.aurora.scheduler.storage.Storage; import org.apache.mesos.Protos.Filters; import org.apache.mesos.Protos.Offer.Operation; import org.apache.mesos.Protos.OfferID; import org.apache.mesos.Protos.Status; import org.apache.mesos.Protos.TaskID; import org.apache.mesos.Protos.TaskStatus; import org.apache.mesos.Scheduler; import org.apache.mesos.SchedulerDriver; import org.apache.mesos.v1.Protos; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static java.util.Objects.requireNonNull; import static com.google.common.base.Preconditions.checkState; import static org.apache.mesos.Protos.Status.DRIVER_RUNNING; /** * Manages the lifecycle of the scheduler driver, and provides a more constrained API to use it. */ class SchedulerDriverService extends AbstractIdleService implements Driver { private static final Logger LOG = LoggerFactory.getLogger(SchedulerDriverService.class); private final AtomicLong killFailures = Stats.exportLong("scheduler_driver_kill_failures"); private final DriverFactory driverFactory; private final FrameworkInfoFactory infoFactory; private final Scheduler scheduler; private final Storage storage; private final DriverSettings driverSettings; private final SettableFuture<SchedulerDriver> driverFuture = SettableFuture.create(); @Inject SchedulerDriverService( Scheduler scheduler, Storage storage, DriverSettings driverSettings, DriverFactory driverFactory, FrameworkInfoFactory infoFactory) { this.scheduler = requireNonNull(scheduler); this.storage = requireNonNull(storage); this.driverSettings = requireNonNull(driverSettings); this.driverFactory = requireNonNull(driverFactory); this.infoFactory = requireNonNull(infoFactory); } @Override protected void startUp() { Optional<String> frameworkId = storage.read( storeProvider -> storeProvider.getSchedulerStore().fetchFrameworkId()); LOG.info("Connecting to mesos master: " + driverSettings.getMasterUri()); if (!driverSettings.getCredentials().isPresent()) { LOG.warn("Connecting to master without authentication!"); } Protos.FrameworkInfo.Builder frameworkBuilder = infoFactory.getFrameworkInfo().toBuilder(); if (frameworkId.isPresent()) { LOG.info("Found persisted framework ID: " + frameworkId); frameworkBuilder.setId(Protos.FrameworkID.newBuilder().setValue(frameworkId.get())); } else { LOG.warn("Did not find a persisted framework ID, connecting as a new framework."); } SchedulerDriver schedulerDriver = driverFactory.create( scheduler, driverSettings.getCredentials(), frameworkBuilder.build(), driverSettings.getMasterUri()); Status status = schedulerDriver.start(); LOG.info("Driver started with code " + status); driverFuture.set(schedulerDriver); } @Override public void blockUntilStopped() { Futures.getUnchecked(driverFuture).join(); } @Override protected void shutDown() throws ExecutionException, InterruptedException { // WARNING: stop() and stop(false) are dangerous, avoid at all costs. See the docs for // SchedulerDriver for more details. driverFuture.get().stop(true /* failover */); } @Override public void abort() { Futures.getUnchecked(driverFuture).abort(); } @Override public void acceptOffers( Protos.OfferID offerId, Collection<Protos.Offer.Operation> operations, Protos.Filters filter) { ensureRunning(); OfferID convertedOfferId = ProtosConversion.convert(offerId); Collection<Operation> convertedOperations = Collections2.transform(operations, ProtosConversion::convert); Filters convertedFilter = ProtosConversion.convert(filter); Futures.getUnchecked(driverFuture) .acceptOffers(ImmutableList.of(convertedOfferId), convertedOperations, convertedFilter); } @Override public void acceptInverseOffer(Protos.OfferID offerID, Protos.Filters filter) { throw new UnsupportedOperationException("SchedulerDriver does not support inverse offers"); } @Override public void declineOffer(Protos.OfferID offerId, Protos.Filters filter) { ensureRunning(); OfferID convertedOfferId = ProtosConversion.convert(offerId); Filters convertedFilter = ProtosConversion.convert(filter); Futures.getUnchecked(driverFuture).declineOffer(convertedOfferId, convertedFilter); } @Override public void killTask(String taskId) { ensureRunning(); Status status = Futures.getUnchecked(driverFuture).killTask( TaskID.newBuilder().setValue(taskId).build()); if (status != DRIVER_RUNNING) { LOG.error("Attempt to kill task {} failed with code {}", taskId, status); killFailures.incrementAndGet(); } } @Override public void acknowledgeStatusUpdate(Protos.TaskStatus status) { ensureRunning(); TaskStatus convertedStatus = ProtosConversion.convert(status); Futures.getUnchecked(driverFuture).acknowledgeStatusUpdate(convertedStatus); } @Override public void reconcileTasks(Collection<Protos.TaskStatus> statuses) { ensureRunning(); Collection<TaskStatus> convertedStatuses = Collections2.transform(statuses, ProtosConversion::convert); Futures.getUnchecked(driverFuture).reconcileTasks(convertedStatuses); } private void ensureRunning() { checkState(isRunning(), "Driver is not running."); } }