/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.sla; import java.util.Collection; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import javax.inject.Inject; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Supplier; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.FluentIterable; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Range; import org.apache.aurora.common.inject.TimedInterceptor.Timed; import org.apache.aurora.common.stats.StatsProvider; import org.apache.aurora.common.util.Clock; import org.apache.aurora.scheduler.base.Query; import org.apache.aurora.scheduler.base.Tasks; import org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType; import org.apache.aurora.scheduler.sla.SlaGroup.GroupType; import org.apache.aurora.scheduler.storage.Storage; import org.apache.aurora.scheduler.storage.entities.IScheduledTask; import org.apache.aurora.scheduler.storage.entities.ITaskConfig; import static java.util.Objects.requireNonNull; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.AGGREGATE_PLATFORM_UPTIME; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.JOB_UPTIME_50; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.JOB_UPTIME_75; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.JOB_UPTIME_90; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.JOB_UPTIME_95; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.JOB_UPTIME_99; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.MEDIAN_TIME_TO_ASSIGNED; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.MEDIAN_TIME_TO_RUNNING; import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType.MEDIAN_TIME_TO_STARTING; import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.CLUSTER; import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.JOB; import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.RESOURCE_CPU; import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.RESOURCE_DISK; import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.RESOURCE_RAM; /** * Responsible for calculating and exporting SLA metrics. */ class MetricCalculator implements Runnable { @VisibleForTesting static final String NAME_QUALIFIER_PROD = ""; @VisibleForTesting static final String NAME_QUALIFIER_NON_PROD = "_nonprod"; /** * Pre-configured categories of metrics. */ enum MetricCategory { JOB_UPTIMES(ImmutableMultimap.<AlgorithmType, GroupType>builder() .put(JOB_UPTIME_50, JOB) .put(JOB_UPTIME_75, JOB) .put(JOB_UPTIME_90, JOB) .put(JOB_UPTIME_95, JOB) .put(JOB_UPTIME_99, JOB) .build()), PLATFORM_UPTIME(ImmutableMultimap.<AlgorithmType, GroupType>builder() .putAll(AGGREGATE_PLATFORM_UPTIME, JOB, CLUSTER) .build()), MEDIANS(ImmutableMultimap.<AlgorithmType, GroupType>builder() .putAll(MEDIAN_TIME_TO_ASSIGNED, JOB, CLUSTER, RESOURCE_CPU, RESOURCE_RAM, RESOURCE_DISK) .putAll(MEDIAN_TIME_TO_STARTING, JOB, CLUSTER, RESOURCE_CPU, RESOURCE_RAM, RESOURCE_DISK) .putAll(MEDIAN_TIME_TO_RUNNING, JOB, CLUSTER, RESOURCE_CPU, RESOURCE_RAM, RESOURCE_DISK) .build()); private final Multimap<AlgorithmType, GroupType> metrics; MetricCategory(Multimap<AlgorithmType, GroupType> metrics) { this.metrics = metrics; } Multimap<AlgorithmType, GroupType> getMetrics() { return metrics; } } private static final Predicate<ITaskConfig> IS_SERVICE = ITaskConfig::isIsService; private final LoadingCache<String, Counter> metricCache; private final Storage storage; private final Clock clock; private final MetricCalculatorSettings settings; static class MetricCalculatorSettings { private final long refreshRateMs; private final Set<MetricCategory> prodMetrics; private final Set<MetricCategory> nonProdMetrics; MetricCalculatorSettings( long refreshRateMs, Set<MetricCategory> prodMetrics, Set<MetricCategory> nonProdMetrics) { this.refreshRateMs = refreshRateMs; this.prodMetrics = requireNonNull(prodMetrics); this.nonProdMetrics = requireNonNull(nonProdMetrics); } long getRefreshRateMs() { return refreshRateMs; } } private static class Counter implements Supplier<Number> { private final AtomicReference<Number> value = new AtomicReference<>((Number) 0); private final StatsProvider statsProvider; private boolean exported; Counter(StatsProvider statsProvider) { this.statsProvider = statsProvider; } @Override public Number get() { return value.get(); } private void set(String name, Number newValue) { if (!exported) { statsProvider.makeGauge(name, this); exported = true; } value.set(newValue); } } @Inject MetricCalculator( Storage storage, Clock clock, MetricCalculatorSettings settings, final StatsProvider statsProvider) { this.storage = requireNonNull(storage); this.clock = requireNonNull(clock); this.settings = requireNonNull(settings); requireNonNull(statsProvider); this.metricCache = CacheBuilder.newBuilder().build( new CacheLoader<String, Counter>() { public Counter load(String key) { return new Counter(statsProvider.untracked()); } }); } @Timed("sla_stats_computation") @Override public void run() { FluentIterable<IScheduledTask> tasks = FluentIterable.from(Storage.Util.fetchTasks(storage, Query.unscoped())); List<IScheduledTask> prodTasks = tasks.filter(Predicates.compose( Predicates.and(ITaskConfig::isProduction, IS_SERVICE), Tasks::getConfig)).toList(); List<IScheduledTask> nonProdTasks = tasks.filter(Predicates.compose( Predicates.and(Predicates.not(ITaskConfig::isProduction), IS_SERVICE), Tasks::getConfig)).toList(); long nowMs = clock.nowMillis(); Range<Long> timeRange = Range.closedOpen(nowMs - settings.refreshRateMs, nowMs); runAlgorithms(prodTasks, settings.prodMetrics, timeRange, NAME_QUALIFIER_PROD); runAlgorithms(nonProdTasks, settings.nonProdMetrics, timeRange, NAME_QUALIFIER_NON_PROD); } private void runAlgorithms( List<IScheduledTask> tasks, Set<MetricCategory> categories, Range<Long> timeRange, String nameQualifier) { for (MetricCategory category : categories) { for (Entry<AlgorithmType, GroupType> slaMetric : category.getMetrics().entries()) { for (Entry<String, Collection<IScheduledTask>> namedGroup : slaMetric.getValue().getSlaGroup().createNamedGroups(tasks).asMap().entrySet()) { AlgorithmType algoType = slaMetric.getKey(); String metricName = namedGroup.getKey() + algoType.getAlgorithmName() + nameQualifier; metricCache.getUnchecked(metricName) .set(metricName, algoType.getAlgorithm().calculate(namedGroup.getValue(), timeRange)); } } } } }