/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.offers;
import java.time.Instant;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.atomic.AtomicLong;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Ordering;
import com.google.common.eventbus.Subscribe;
import org.apache.aurora.common.inject.TimedInterceptor.Timed;
import org.apache.aurora.common.quantity.Time;
import org.apache.aurora.common.stats.StatsProvider;
import org.apache.aurora.scheduler.HostOffer;
import org.apache.aurora.scheduler.async.AsyncModule.AsyncExecutor;
import org.apache.aurora.scheduler.async.DelayExecutor;
import org.apache.aurora.scheduler.base.TaskGroupKey;
import org.apache.aurora.scheduler.events.PubsubEvent.DriverDisconnected;
import org.apache.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import org.apache.aurora.scheduler.mesos.Driver;
import org.apache.aurora.scheduler.storage.entities.IHostAttributes;
import org.apache.mesos.v1.Protos;
import org.apache.mesos.v1.Protos.AgentID;
import org.apache.mesos.v1.Protos.Offer.Operation;
import org.apache.mesos.v1.Protos.OfferID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull;
import static org.apache.aurora.gen.MaintenanceMode.DRAINED;
import static org.apache.aurora.gen.MaintenanceMode.DRAINING;
import static org.apache.aurora.gen.MaintenanceMode.NONE;
import static org.apache.aurora.gen.MaintenanceMode.SCHEDULED;
import static org.apache.aurora.scheduler.events.PubsubEvent.HostAttributesChanged;
/**
* Tracks the Offers currently known by the scheduler.
*/
public interface OfferManager extends EventSubscriber {
/**
* Notifies the scheduler of a new resource offer.
*
* @param offer Newly-available resource offer.
*/
void addOffer(HostOffer offer);
/**
* Invalidates an offer. This indicates that the scheduler should not attempt to match any
* tasks against the offer.
*
* @param offerId Canceled offer.
*/
void cancelOffer(OfferID offerId);
/**
* Exclude an offer that results in a static mismatch from further attempts to match against all
* tasks from the same group.
*
* @param offerId Offer ID to exclude for the given {@code groupKey}.
* @param groupKey Task group key to exclude.
*/
void banOffer(OfferID offerId, TaskGroupKey groupKey);
/**
* Launches the task matched against the offer.
*
* @param offerId Matched offer ID.
* @param task Matched task info.
* @throws LaunchException If there was an error launching the task.
*/
void launchTask(OfferID offerId, Protos.TaskInfo task) throws LaunchException;
/**
* Notifies the offer queue that a host's attributes have changed.
*
* @param change State change notification.
*/
void hostAttributesChanged(HostAttributesChanged change);
/**
* Gets the offers that the scheduler is holding.
*
* @return A snapshot of the offers that the scheduler is currently holding.
*/
Iterable<HostOffer> getOffers();
/**
* Gets all offers that are not statically banned for the given {@code groupKey}.
*
* @param groupKey Task group key to check offers for.
* @return A snapshot of all offers eligible for the given {@code groupKey}.
*/
Iterable<HostOffer> getOffers(TaskGroupKey groupKey);
/**
* Gets an offer for the given slave ID.
*
* @param slaveId Slave ID to get offer for.
* @return An offer for the slave ID.
*/
Optional<HostOffer> getOffer(AgentID slaveId);
/**
* Thrown when there was an unexpected failure trying to launch a task.
*/
class LaunchException extends Exception {
@VisibleForTesting
public LaunchException(String msg) {
super(msg);
}
LaunchException(String msg, Throwable cause) {
super(msg, cause);
}
}
class OfferManagerImpl implements OfferManager {
@VisibleForTesting
static final Logger LOG = LoggerFactory.getLogger(OfferManagerImpl.class);
@VisibleForTesting
static final String OFFER_ACCEPT_RACES = "offer_accept_races";
@VisibleForTesting
static final String OUTSTANDING_OFFERS = "outstanding_offers";
@VisibleForTesting
static final String STATICALLY_BANNED_OFFERS = "statically_banned_offers_size";
private final HostOffers hostOffers;
private final AtomicLong offerRaces;
private final Driver driver;
private final OfferSettings offerSettings;
private final DelayExecutor executor;
@Inject
@VisibleForTesting
public OfferManagerImpl(
Driver driver,
OfferSettings offerSettings,
StatsProvider statsProvider,
@AsyncExecutor DelayExecutor executor) {
this.driver = requireNonNull(driver);
this.offerSettings = requireNonNull(offerSettings);
this.executor = requireNonNull(executor);
this.hostOffers = new HostOffers(statsProvider);
this.offerRaces = statsProvider.makeCounter(OFFER_ACCEPT_RACES);
}
@Override
public void addOffer(final HostOffer offer) {
// We run a slight risk of a race here, which is acceptable. The worst case is that we
// temporarily hold two offers for the same host, which should be corrected when we return
// them after the return delay.
// There's also a chance that we return an offer for compaction ~simultaneously with the
// same-host offer being canceled/returned. This is also fine.
Optional<HostOffer> sameSlave = hostOffers.get(offer.getOffer().getAgentId());
if (sameSlave.isPresent()) {
// If there are existing offers for the slave, decline all of them so the master can
// compact all of those offers into a single offer and send them back.
LOG.info("Returning offers for " + offer.getOffer().getAgentId().getValue()
+ " for compaction.");
decline(offer.getOffer().getId());
removeAndDecline(sameSlave.get().getOffer().getId());
} else {
hostOffers.add(offer);
executor.execute(
() -> removeAndDecline(offer.getOffer().getId()),
offerSettings.getOfferReturnDelay());
}
}
void removeAndDecline(OfferID id) {
if (removeFromHostOffers(id)) {
decline(id);
}
}
void decline(OfferID id) {
LOG.debug("Declining offer {}", id);
driver.declineOffer(id, getOfferFilter());
}
private Protos.Filters getOfferFilter() {
return Protos.Filters.newBuilder()
.setRefuseSeconds(offerSettings.getOfferFilterDuration().as(Time.SECONDS))
.build();
}
@Override
public void cancelOffer(final OfferID offerId) {
removeFromHostOffers(offerId);
}
private boolean removeFromHostOffers(final OfferID offerId) {
requireNonNull(offerId);
// The small risk of inconsistency is acceptable here - if we have an accept/remove race
// on an offer, the master will mark the task as LOST and it will be retried.
return hostOffers.remove(offerId);
}
@Override
public Iterable<HostOffer> getOffers() {
return hostOffers.getOffers();
}
@Override
public Iterable<HostOffer> getOffers(TaskGroupKey groupKey) {
return hostOffers.getWeaklyConsistentOffers(groupKey);
}
@Override
public Optional<HostOffer> getOffer(AgentID slaveId) {
return hostOffers.get(slaveId);
}
/**
* Updates the preference of a host's offers.
*
* @param change Host change notification.
*/
@Subscribe
public void hostAttributesChanged(HostAttributesChanged change) {
hostOffers.updateHostAttributes(change.getAttributes());
}
/**
* Notifies the queue that the driver is disconnected, and all the stored offers are now
* invalid.
* <p>
* The queue takes this as a signal to flush its queue.
*
* @param event Disconnected event.
*/
@Subscribe
public void driverDisconnected(DriverDisconnected event) {
LOG.info("Clearing stale offers since the driver is disconnected.");
hostOffers.clear();
}
/**
* A container for the data structures used by this class, to make it easier to reason about
* the different indices used and their consistency.
*/
private static class HostOffers {
private static final Ordering<HostOffer> AURORA_MAINTENANCE_COMPARATOR =
Ordering.explicit(NONE, SCHEDULED, DRAINING, DRAINED)
.onResultOf(offer -> offer.getAttributes().getMode());
// We should not prefer offers from agents that are scheduled to become unavailable.
// We should also sort the unavailability start to prefer agents that are starting
// maintenance later.
private static final Ordering<HostOffer> MESOS_MAINTENANCE_COMPARATOR =
Ordering
.natural()
.reverse()
.onResultOf(o -> o.getUnavailabilityStart().or(Instant.MAX));
private static final Comparator<HostOffer> PREFERENCE_COMPARATOR =
// Currently, the only preference is based on host maintenance status.
AURORA_MAINTENANCE_COMPARATOR
.compound(MESOS_MAINTENANCE_COMPARATOR)
.compound(Ordering.arbitrary());
private final Set<HostOffer> offers = new ConcurrentSkipListSet<>(PREFERENCE_COMPARATOR);
private final Map<OfferID, HostOffer> offersById = Maps.newHashMap();
private final Map<AgentID, HostOffer> offersBySlave = Maps.newHashMap();
private final Map<String, HostOffer> offersByHost = Maps.newHashMap();
// TODO(maxim): Expose via a debug endpoint. AURORA-1136.
// Keep track of offer->groupKey mappings that will never be matched to avoid redundant
// scheduling attempts. See VetoGroup for more details on static ban.
private final Multimap<OfferID, TaskGroupKey> staticallyBannedOffers = HashMultimap.create();
HostOffers(StatsProvider statsProvider) {
// Potential gotcha - since this is a ConcurrentSkipListSet, size() is more expensive.
// Could track this separately if it turns out to pose problems.
statsProvider.exportSize(OUTSTANDING_OFFERS, offers);
statsProvider.makeGauge(STATICALLY_BANNED_OFFERS, () -> staticallyBannedOffers.size());
}
synchronized Optional<HostOffer> get(AgentID slaveId) {
return Optional.fromNullable(offersBySlave.get(slaveId));
}
synchronized void add(HostOffer offer) {
offers.add(offer);
offersById.put(offer.getOffer().getId(), offer);
offersBySlave.put(offer.getOffer().getAgentId(), offer);
offersByHost.put(offer.getOffer().getHostname(), offer);
}
synchronized boolean remove(OfferID id) {
HostOffer removed = offersById.remove(id);
if (removed != null) {
offers.remove(removed);
offersBySlave.remove(removed.getOffer().getAgentId());
offersByHost.remove(removed.getOffer().getHostname());
staticallyBannedOffers.removeAll(id);
}
return removed != null;
}
synchronized void updateHostAttributes(IHostAttributes attributes) {
HostOffer offer = offersByHost.remove(attributes.getHost());
if (offer != null) {
// Remove and re-add a host's offer to re-sort based on its new hostStatus
remove(offer.getOffer().getId());
add(new HostOffer(offer.getOffer(), attributes));
}
}
synchronized Iterable<HostOffer> getOffers() {
return ImmutableSet.copyOf(offers);
}
synchronized Iterable<HostOffer> getWeaklyConsistentOffers(TaskGroupKey groupKey) {
return Iterables.unmodifiableIterable(FluentIterable.from(offers).filter(
e -> !staticallyBannedOffers.containsEntry(e.getOffer().getId(), groupKey)));
}
synchronized void addStaticGroupBan(OfferID offerId, TaskGroupKey groupKey) {
if (offersById.containsKey(offerId)) {
staticallyBannedOffers.put(offerId, groupKey);
}
}
synchronized void clear() {
offers.clear();
offersById.clear();
offersBySlave.clear();
offersByHost.clear();
staticallyBannedOffers.clear();
}
}
@Override
public void banOffer(OfferID offerId, TaskGroupKey groupKey) {
hostOffers.addStaticGroupBan(offerId, groupKey);
}
@Timed("offer_manager_launch_task")
@Override
public void launchTask(OfferID offerId, Protos.TaskInfo task) throws LaunchException {
// Guard against an offer being removed after we grabbed it from the iterator.
// If that happens, the offer will not exist in hostOffers, and we can immediately
// send it back to LOST for quick reschedule.
// Removing while iterating counts on the use of a weakly-consistent iterator being used,
// which is a feature of ConcurrentSkipListSet.
if (hostOffers.remove(offerId)) {
try {
Operation launch = Operation.newBuilder()
.setType(Operation.Type.LAUNCH)
.setLaunch(Operation.Launch.newBuilder().addTaskInfos(task))
.build();
driver.acceptOffers(offerId, ImmutableList.of(launch), getOfferFilter());
} catch (IllegalStateException e) {
// TODO(William Farner): Catch only the checked exception produced by Driver
// once it changes from throwing IllegalStateException when the driver is not yet
// registered.
throw new LaunchException("Failed to launch task.", e);
}
} else {
offerRaces.incrementAndGet();
throw new LaunchException("Offer no longer exists in offer queue, likely data race.");
}
}
}
}