/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.mesos;
import java.util.concurrent.atomic.AtomicLong;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.eventbus.Subscribe;
import com.google.inject.Inject;
import org.apache.aurora.common.stats.StatsProvider;
import org.apache.aurora.common.stats.StatsProvider.RequestTimer;
import org.apache.aurora.common.util.Clock;
import org.apache.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import org.apache.aurora.scheduler.events.PubsubEvent.TaskStatusReceived;
import org.apache.mesos.v1.Protos;
import org.apache.mesos.v1.Protos.TaskStatus.Reason;
import org.apache.mesos.v1.Protos.TaskStatus.Source;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull;
/**
* A status event listener that exports statistics about the contents of status updates.
*/
class TaskStatusStats implements EventSubscriber {
private static final Logger LOG = LoggerFactory.getLogger(TaskStatusStats.class);
private final Clock clock;
private final LoadingCache<Source, AtomicLong> lostSourceCounters;
private final LoadingCache<Reason, AtomicLong> reasonCounters;
private final LoadingCache<Source, RequestTimer> latencyTimers;
@Inject
TaskStatusStats(final StatsProvider statsProvider, Clock clock) {
requireNonNull(statsProvider);
this.clock = requireNonNull(clock);
lostSourceCounters = CacheBuilder.newBuilder()
.build(new CacheLoader<Source, AtomicLong>() {
@Override
public AtomicLong load(Source source) {
return statsProvider.makeCounter(lostCounterName(source));
}
});
reasonCounters = CacheBuilder.newBuilder()
.build(new CacheLoader<Reason, AtomicLong>() {
@Override
public AtomicLong load(Reason reason) {
return statsProvider.makeCounter(reasonCounterName(reason));
}
});
latencyTimers = CacheBuilder.newBuilder()
.build(new CacheLoader<Source, RequestTimer>() {
@Override
public RequestTimer load(Source source) {
return statsProvider.makeRequestTimer(latencyTimerName(source));
}
});
}
@VisibleForTesting
static String lostCounterName(Source source) {
return "task_lost_" + source;
}
@VisibleForTesting
static String reasonCounterName(Reason reason) {
return "task_exit_" + reason;
}
@VisibleForTesting
static String latencyTimerName(Source source) {
return "task_delivery_delay_" + source;
}
@Subscribe
public void accumulate(TaskStatusReceived event) {
if (event.getState() == Protos.TaskState.TASK_LOST && event.getSource().isPresent()) {
lostSourceCounters.getUnchecked(event.getSource().get()).incrementAndGet();
}
if (event.getReason().isPresent()) {
reasonCounters.getUnchecked(event.getReason().get()).incrementAndGet();
}
if (event.getSource().isPresent() && event.getEpochTimestampMicros().isPresent()) {
long nowMicros = clock.nowMillis() * 1000;
// Avoid distorting stats by recording zero or negative values. This can result if delivery
// is faster than the clock resolution (1 ms) or there is clock skew between the systems.
// In reality, this value is likely to be inaccurate, especially at the resolution of millis.
if (event.getEpochTimestampMicros().get() < nowMicros) {
latencyTimers.getUnchecked(event.getSource().get())
.requestComplete(nowMicros - event.getEpochTimestampMicros().get());
} else {
LOG.debug("Not recording stats for status update with timestamp <= now");
}
}
}
}