/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.cache.client.internal;
import org.apache.geode.cache.client.internal.PoolImpl.PoolTask;
import org.apache.geode.distributed.internal.DistributionConfig;
import org.apache.geode.distributed.internal.ServerLocation;
import org.apache.geode.internal.logging.LogService;
import org.apache.logging.log4j.Logger;
import java.util.*;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* This class is designed to prevent the client from spinning and reconnected to the same failed
* server over and over. We've removed the old dead server monitor code because the locator is
* supposed to keep track of what servers are alive or dead. However, there is still the possibility
* that the locator may tell us a server is alive but we are unable to reach it.
*
* This class keeps track of the number of consecutive failures that happen to on each server. If
* the number of failures exceeds the limit, the server is added to a blacklist for a certain period
* of time. After the time is expired, the server comes off the blacklist, but the next failure will
* put the server back on the list for a longer period of time.
*
*
*/
public class ServerBlackList {
private static final Logger logger = LogService.getLogger();
private final Map/* <ServerLocation, AI> */ failureTrackerMap = new HashMap();
protected final Set blacklist = new CopyOnWriteArraySet();
private final Set unmodifiableBlacklist = Collections.unmodifiableSet(blacklist);
protected ScheduledExecutorService background;
protected final ListenerBroadcaster broadcaster = new ListenerBroadcaster();
// not final for tests.
static int THRESHOLD = Integer
.getInteger(DistributionConfig.GEMFIRE_PREFIX + "ServerBlackList.THRESHOLD", 3).intValue();
protected final long pingInterval;
public ServerBlackList(long pingInterval) {
this.pingInterval = pingInterval;
}
public void start(ScheduledExecutorService background) {
this.background = background;
}
FailureTracker getFailureTracker(ServerLocation location) {
FailureTracker failureTracker;
synchronized (failureTrackerMap) {
failureTracker = (FailureTracker) failureTrackerMap.get(location);
if (failureTracker == null) {
failureTracker = new FailureTracker(location);
failureTrackerMap.put(location, failureTracker);
}
}
return failureTracker;
}
public Set getBadServers() {
return unmodifiableBlacklist;
}
public class FailureTracker {
private final AtomicInteger consecutiveFailures = new AtomicInteger();
private final ServerLocation location;
public FailureTracker(ServerLocation location) {
this.location = location;
}
public void reset() {
consecutiveFailures.set(0);
}
public void addFailure() {
if (blacklist.contains(location)) {
// A second failure must have happened before we added
// this server to the blacklist. Don't count that failure.
return;
}
long failures = consecutiveFailures.incrementAndGet();
if (failures >= THRESHOLD) {
if (logger.isDebugEnabled()) {
logger.debug("Blacklisting server {} for {}ms because it had {} consecutive failures",
location, pingInterval, failures);
}
blacklist.add(location);
broadcaster.serverAdded(location);
try {
background.schedule(new ExpireBlackListTask(location), pingInterval,
TimeUnit.MILLISECONDS);
} catch (RejectedExecutionException e) {
// ignore, the timer has been cancelled, which means we're shutting down.
}
}
}
}
public void addListener(BlackListListener blackListListener) {
broadcaster.listeners.add(blackListListener);
}
public void removeListener(BlackListListener blackListListener) {
broadcaster.listeners.remove(blackListListener);
}
private class ExpireBlackListTask extends PoolTask {
private ServerLocation location;
public ExpireBlackListTask(ServerLocation location) {
this.location = location;
}
@Override
public void run2() {
if (logger.isDebugEnabled()) {
logger.debug("{} is no longer blacklisted", location);
}
blacklist.remove(location);
broadcaster.serverRemoved(location);
}
}
public static interface BlackListListener {
public void serverAdded(ServerLocation location);
public void serverRemoved(ServerLocation location);
}
public static class BlackListListenerAdapter implements BlackListListener {
public void serverAdded(ServerLocation location) {
// do nothing
}
public void serverRemoved(ServerLocation location) {
// do nothing
}
}
protected static class ListenerBroadcaster implements BlackListListener {
protected Set listeners = new CopyOnWriteArraySet();
public void serverAdded(ServerLocation location) {
for (Iterator itr = listeners.iterator(); itr.hasNext();) {
BlackListListener listener = (BlackListListener) itr.next();
listener.serverAdded(location);
}
}
public void serverRemoved(ServerLocation location) {
for (Iterator itr = listeners.iterator(); itr.hasNext();) {
BlackListListener listener = (BlackListListener) itr.next();
listener.serverRemoved(location);
}
}
}
}