/** * Copyright 2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package com.github.ambry.clustermap; import com.github.ambry.utils.Time; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * FixedBackoffResourceStatePolicy marks a resource as unavailable for retryBackoff milliseconds if the number of * consecutive errors the resource encountered is greater than failureCountThreshold. */ class FixedBackoffResourceStatePolicy implements ResourceStatePolicy { private final Object resource; private final AtomicBoolean hardDown; private final AtomicInteger failureCount; private final int failureCountThreshold; private final long retryBackoffMs; private final AtomicLong downUntil; private final Time time; private final Logger logger = LoggerFactory.getLogger(getClass()); FixedBackoffResourceStatePolicy(Object resource, boolean hardDown, int failureCountThreshold, long retryBackoffMs, Time time) { this.resource = resource; this.hardDown = new AtomicBoolean(hardDown); this.failureCountThreshold = failureCountThreshold; this.retryBackoffMs = retryBackoffMs; this.downUntil = new AtomicLong(0); this.failureCount = new AtomicInteger(0); this.time = time; } /** * On an error, if the failureCount is greater than the threshold, mark the node as down. */ @Override public void onError() { int count = failureCount.incrementAndGet(); if (count >= failureCountThreshold) { if (count == failureCountThreshold) { logger.info("Resource {} has gone down", resource); } logger.trace("Resource {} remains in down state at time {}; adding downtime of {} ms", resource, time.milliseconds(), retryBackoffMs); downUntil.set(time.milliseconds() + retryBackoffMs); } } /** * Called when it is known externally that the resource has gone down. An immediate call to {@link #isDown()} will * return true. */ @Override public void onHardDown() { logger.trace("Marking resource {} as Hard down", resource); hardDown.set(true); } /** * Called when it is known externally that the resource is up. An immediate call to {@link #isDown()} will return * false. */ @Override public void onHardUp() { logger.trace("Marking resource {} as Hard up", resource); hardDown.set(false); onSuccess(); } /** * A single response resets the count. */ @Override public void onSuccess() { if (failureCount.getAndSet(0) >= failureCountThreshold) { logger.info("Resource {} is back up", resource); } } /** * If the number of failures are above the threshold, the resource will be counted as down unless downUntil is in * the past. * Note how failureCount is not reset to 0 here. This is so that the node is marked as down if the first request after * marking a node back up, also times out. We only reset failureCount on an actual response, so down nodes get a * 'chance' to prove they are back up every retryBackoffMs - they do not get 'fully up' status until they are actually * responsive. */ @Override public boolean isDown() { boolean down = false; if (hardDown.get()) { down = true; } else if (failureCount.get() >= failureCountThreshold) { if (time.milliseconds() < downUntil.get()) { down = true; } } if (down) { logger.trace( "Resource {} is down; failureCount: {}; failureCountThreshold: {}; remaining time: {}; hard down: {}", resource, failureCount.get(), failureCountThreshold, downUntil.get() - time.milliseconds(), hardDown); } else { logger.trace("Resource {} is not down; failureCount: {}; failureCountThreshold: {}", resource, failureCount.get(), failureCountThreshold); } return down; } @Override public boolean isHardDown() { return hardDown.get(); } }