/*
* Copyright 2015-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.facebook.buck.slb;
import com.facebook.buck.event.BuckEventBus;
import com.facebook.buck.log.Logger;
import com.facebook.buck.model.Pair;
import com.facebook.buck.timing.Clock;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
public class ServerHealthManager {
private static final Logger LOG = Logger.get(ServerHealthManager.class);
public static final int CACHE_TIME_MS = 1000;
private static final Comparator<Pair<URI, Long>> LATENCY_COMPARATOR =
(o1, o2) -> (int) (o1.getSecond() - o2.getSecond());
// TODO(ruibm): It could be useful to preserve this state across runs in the local fs.
private final ConcurrentHashMap<URI, ServerHealthState> servers;
private final int maxAcceptableLatencyMillis;
private final int latencyCheckTimeRangeMillis;
private final float maxErrorPercentage;
private final int errorCheckTimeRangeMillis;
private final BuckEventBus eventBus;
private final LoadingCache<Object, Optional<URI>> getBestServerCache;
private final Clock clock;
public ServerHealthManager(
ImmutableList<URI> servers,
int errorCheckTimeRangeMillis,
float maxErrorPercentage,
int latencyCheckTimeRangeMillis,
int maxAcceptableLatencyMillis,
BuckEventBus eventBus,
Clock clock) {
LOG.getClass();
this.errorCheckTimeRangeMillis = errorCheckTimeRangeMillis;
this.maxErrorPercentage = maxErrorPercentage;
this.latencyCheckTimeRangeMillis = latencyCheckTimeRangeMillis;
this.maxAcceptableLatencyMillis = maxAcceptableLatencyMillis;
this.clock = clock;
this.servers = new ConcurrentHashMap<>();
for (URI server : servers) {
this.servers.put(server, new ServerHealthState(server));
}
this.eventBus = eventBus;
this.getBestServerCache =
CacheBuilder.newBuilder()
.expireAfterWrite(CACHE_TIME_MS, TimeUnit.MILLISECONDS)
.build(
new CacheLoader<Object, Optional<URI>>() {
@Override
public Optional<URI> load(Object key) throws Exception {
return calculateBestServer();
}
});
}
public void reportPingLatency(URI server, long latencyMillis) {
Preconditions.checkState(servers.containsKey(server), "Unknown server [%s]", server);
servers.get(server).reportPingLatency(clock.currentTimeMillis(), latencyMillis);
if (latencyMillis > maxAcceptableLatencyMillis) {
getBestServerCache.refresh(this);
}
}
public void reportRequestError(URI server) {
Preconditions.checkState(servers.containsKey(server), "Unknown server [%s]", server);
// Invalidate the best server on any error.
servers.get(server).reportRequestError(clock.currentTimeMillis());
getBestServerCache.refresh(this);
}
public void reportRequestSuccess(URI server) {
Preconditions.checkState(servers.containsKey(server), "Unknown server [%s]", server);
servers.get(server).reportRequestSuccess(clock.currentTimeMillis());
}
public URI getBestServer() throws NoHealthyServersException {
try {
Optional<URI> server = getBestServerCache.get(this);
if (server.isPresent()) {
return server.get();
}
throw new NoHealthyServersException(
String.format(
"No servers available. Too many errors reported by all servers in the pool: [%s]",
Joiner.on(", ")
.join(FluentIterable.from(servers.keySet()).transform(Object::toString))));
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
private Optional<URI> calculateBestServer() throws NoHealthyServersException {
ServerHealthManagerEventData.Builder data = ServerHealthManagerEventData.builder();
Map<URI, PerServerData.Builder> allPerServerData = new HashMap<>();
try {
long epochMillis = clock.currentTimeMillis();
List<Pair<URI, Long>> serverLatencies = new ArrayList<>();
for (ServerHealthState state : servers.values()) {
URI server = state.getServer();
PerServerData.Builder perServerData = PerServerData.builder().setServer(server);
allPerServerData.put(server, perServerData);
float errorPercentage = state.getErrorPercentage(epochMillis, errorCheckTimeRangeMillis);
long latencyMillis = state.getPingLatencyMillis(epochMillis, latencyCheckTimeRangeMillis);
if (errorPercentage <= maxErrorPercentage && latencyMillis <= maxAcceptableLatencyMillis) {
serverLatencies.add(new Pair<>(state.getServer(), latencyMillis));
} else {
perServerData.setServerUnhealthy(true);
}
}
if (serverLatencies.size() == 0) {
data.setNoHealthyServersAvailable(true);
return Optional.empty();
}
Collections.sort(serverLatencies, LATENCY_COMPARATOR);
URI bestServer = serverLatencies.get(0).getFirst();
Preconditions.checkNotNull(allPerServerData.get(bestServer)).setBestServer(true);
return Optional.of(bestServer);
} finally {
for (PerServerData.Builder builder : allPerServerData.values()) {
data.addPerServerData(builder.build());
}
eventBus.post(new ServerHealthManagerEvent(data.build()));
}
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder("ServerHealthManager{\n");
for (ServerHealthState server : servers.values()) {
builder.append(
String.format(
" %s\n", server.toString(clock.currentTimeMillis(), latencyCheckTimeRangeMillis)));
}
builder.append("}");
return builder.toString();
}
}