package org.radargun.stages.cache.background;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.radargun.logging.Log;
import org.radargun.logging.LogFactory;
import org.radargun.utils.TimeService;
/**
* Manager class holding failures occurring during test run.
*
* @author Matej Cimbora
* @author Radim Vansa
*/
public class FailureManager {
private static final Log log = LogFactory.getLog(FailureManager.class);
private BackgroundOpsManager manager;
private AtomicLong missingOperations = new AtomicLong();
private AtomicLong missingNotifications = new AtomicLong();
private AtomicLong staleReads = new AtomicLong();
private AtomicLong failedTransactionAttempts = new AtomicLong();
private AtomicLong delayedRemovesErrors = new AtomicLong();
public FailureManager(BackgroundOpsManager manager) {
this.manager = manager;
}
public long getMissingOperations() {
return missingOperations.get();
}
public long getMissingNotifications() {
return missingNotifications.get();
}
public long getStaleReads() {
return staleReads.get();
}
public long getFailedTransactionAttempts() {
return failedTransactionAttempts.get();
}
public long getDelayedRemovesErrors() {
return delayedRemovesErrors.get();
}
public void reportMissingOperation() {
missingOperations.incrementAndGet();
}
public void reportMissingNotification() {
missingNotifications.incrementAndGet();
}
public void reportStaleRead() {
staleReads.incrementAndGet();
}
public void reportFailedTransactionAttempt() {
failedTransactionAttempts.incrementAndGet();
}
public void reportDelayedRemoveError() {
delayedRemovesErrors.incrementAndGet();
}
public synchronized String getError(boolean failuresOnly) {
if (!manager.getLogLogicConfiguration().isEnabled()) {
return null;
}
if (getMissingOperations() > 0 || getMissingNotifications() > 0 || getStaleReads() > 0
|| getDelayedRemovesErrors() > 0 || getFailedTransactionAttempts() > 0) {
return String.format("Background stressors report %d missing operations, %d missing notifications, %d stale reads, " +
"%d failed transaction attempts and %d delayed removes errors",
getMissingOperations(), getMissingNotifications(), getStaleReads(),
getFailedTransactionAttempts(), getDelayedRemovesErrors());
}
if (failuresOnly) {
return null;
}
if (!manager.getLifecycle().isRunning()) {
/**
* As stressorRecordPool survives service restarts, we might get false suspicion of checkers showing no progress when
* t(service_not_running) > logLogicConfiguration.noProgressTimeout during time the service is stopped.
*/
log.debug("Service is not running, skipping verification of checker progress");
return null;
}
// Print statuses of stressor threads.
Stressor[] stressorThreads = manager.getThreadManager().getStressorThreads();
if (stressorThreads != null) {
for (Stressor stressor : stressorThreads) {
log.debugf("Stressor: threadId=%d, status=%s", stressor.id, stressor.getLogic().getStatus());
}
}
// Iterate over all stressor records in stressor record pool and check whether last successful check was performed within timeout.
if (manager.getStressorRecordPool() != null) {
boolean progress = true;
long now = TimeService.currentTimeMillis();
for (StressorRecord record : manager.getStressorRecordPool().getAvailableRecords()) {
log.debugf("Record: status=%s.", record.getStatus());
// Especially with elasticity tests a node can be dead for a long time period. Check for progress may need to be skipped as stressors
// on this node can't perform any operations.
if (manager.getLogLogicConfiguration().ignoreDeadCheckers && !manager.isSlaveAlive(record.getThreadId() / manager.getGeneralConfiguration().getNumThreads())) {
log.tracef("Node where stressor for this record resides is dead, skipping check");
continue;
}
if (now - record.getLastSuccessfulCheckTimestamp() > manager.getLogLogicConfiguration().noProgressTimeout) {
log.errorf("No progress in this record for %d ms", now - record.getLastSuccessfulCheckTimestamp());
progress = false;
}
}
// No checking progress detected, print extended information about stressors/other running threads.
if (!progress) {
StringBuilder sb = new StringBuilder(1000);
if (stressorThreads != null) {
sb.append("Current stressors info:\n");
for (Stressor stressor : stressorThreads) {
sb.append(stressor.getStatus()).append(", stacktrace:\n");
for (StackTraceElement ste : stressor.getStackTrace()) {
sb.append(ste).append("\n");
}
}
} else {
sb.append("No stressors are running, ");
}
sb.append("Other threads:\n");
for (Map.Entry<Thread, StackTraceElement[]> entry : Thread.getAllStackTraces().entrySet()) {
Thread thread = entry.getKey();
if (thread.getName().startsWith("StressorThread")) continue;
sb.append(thread.getName()).append(" (").append(thread.getState()).append("):\n");
for (StackTraceElement ste : thread.getStackTrace()) {
sb.append(ste).append("\n");
}
}
log.error(sb.toString());
return "No progress in checkers!";
}
}
return null;
}
}