package org.radargun.stages.cache.background;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReferenceArray;
import org.radargun.logging.Log;
import org.radargun.logging.LogFactory;
import org.radargun.utils.TimeService;
/**
* Component responsible for starting/stopping background stressors and checkers.
*
* @author Matej Cimbora
* @author Radim Vansa
*/
public class ThreadManager {
private static final Log log = LogFactory.getLog(ThreadManager.class);
private BackgroundOpsManager manager;
// Stressors and checkers
private volatile Stressor[] stressorThreads;
private LogChecker[] checkerThreads;
private boolean stressorsPaused;
private boolean checkersPaused;
// Keep alive runner
private ScheduledExecutorService keepAliveExecutor = Executors.newScheduledThreadPool(1);
private ScheduledFuture keepAliveTask;
// Configuration fields
private GeneralConfiguration generalConfiguration;
private BackgroundStressorLogicConfiguration backgroundStressorLogicConfiguration;
private LogLogicConfiguration logLogicConfiguration;
public ThreadManager(BackgroundOpsManager manager) {
this.manager = manager;
}
public void initConfiguration() {
this.generalConfiguration = manager.getGeneralConfiguration();
this.backgroundStressorLogicConfiguration = manager.getBackgroundStressorLogicConfiguration();
this.logLogicConfiguration = manager.getLogLogicConfiguration();
}
public synchronized void startBackgroundThreads() {
if (backgroundStressorLogicConfiguration.isNoLoading()) {
manager.setLoaded(true);
}
if (backgroundStressorLogicConfiguration.loadDataOnSlaves != null
&& !backgroundStressorLogicConfiguration.loadDataOnSlaves.isEmpty()
&& !backgroundStressorLogicConfiguration.loadDataOnSlaves.contains(manager.getSlaveState().getSlaveIndex())) {
log.info("This slave is not loading any data");
return;
}
if (stressorThreads != null) {
log.warn("Can't start stressors, they're already running");
return;
}
if (manager.getLifecycle() != null && !manager.getLifecycle().isRunning()) {
log.warn("Can't start stressors, service is not running");
return;
}
startStressorThreads();
if (logLogicConfiguration.enabled) {
startCheckerThreads();
if (logLogicConfiguration.ignoreDeadCheckers) {
keepAliveTask = keepAliveExecutor.scheduleAtFixedRate(new KeepAliveTask(), 0, 1000, TimeUnit.MILLISECONDS);
}
}
if (backgroundStressorLogicConfiguration.waitUntilLoaded) {
log.info("Waiting until all stressor threads load data");
try {
waitUntilLoaded();
} catch (InterruptedException e) {
log.error("Waiting for loading interrupted", e);
}
}
manager.setLoaded(true);
}
private synchronized void startStressorThreads() {
if (stressorsPaused) {
log.info("Not starting stressors, paused");
return;
}
stressorThreads = new Stressor[generalConfiguration.numThreads];
if (generalConfiguration.numThreads <= 0) {
log.warn("Stressor thread number set to 0!");
return;
}
for (int i = 0; i < stressorThreads.length; i++) {
stressorThreads[i] = new Stressor(manager, manager.createLogic(i), i);
stressorThreads[i].start();
}
}
private synchronized void startCheckerThreads() {
if (checkersPaused) {
log.info("Checkers are paused, not starting");
return;
}
if (logLogicConfiguration.checkingThreads <= 0) {
log.error("LogValue checker set to 0!");
} else if (checkerThreads != null) {
throw new IllegalStateException("Log checkers are started");
} else {
checkerThreads = new LogChecker[logLogicConfiguration.checkingThreads];
for (int i = 0; i < logLogicConfiguration.checkingThreads; ++i) {
if (generalConfiguration.sharedKeys) {
checkerThreads[i] = new SharedLogChecker(i, manager);
} else {
checkerThreads[i] = new PrivateLogChecker(i, manager);
}
checkerThreads[i].start();
}
}
}
/**
* Stops the stressors, call this before stopping CacheWrapper.
*/
public synchronized void stopBackgroundThreads() {
stopBackgroundThreads(true, true, true);
}
private synchronized void stopBackgroundThreads(boolean stressors, boolean checkers, boolean keepAlive) {
// interrupt all threads
log.debug("Stopping stressors");
if (stressors && stressorThreads != null) {
for (int i = 0; i < stressorThreads.length; i++) {
stressorThreads[i].requestTerminate();
}
}
if (checkers && checkerThreads != null) {
for (int i = 0; i < checkerThreads.length; ++i) {
checkerThreads[i].requestTerminate();
}
}
if (keepAlive && keepAliveTask != null) {
keepAliveTask.cancel(true);
keepAliveTask = null;
}
// give the threads a second to terminate
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
log.error("Thread has been interrupted", e);
Thread.currentThread().interrupt();
}
log.debug("Interrupting stressors");
if (stressors && stressorThreads != null) {
for (int i = 0; i < stressorThreads.length; i++) {
stressorThreads[i].interrupt();
}
}
if (checkers && checkerThreads != null) {
for (int i = 0; i < checkerThreads.length; ++i) {
checkerThreads[i].interrupt();
}
}
log.debug("Waiting until all threads join");
// then wait for them to finish
try {
if (stressors && stressorThreads != null) {
for (int i = 0; i < stressorThreads.length; i++) {
stressorThreads[i].join();
}
}
if (checkers && checkerThreads != null) {
for (int i = 0; i < checkerThreads.length; ++i) {
checkerThreads[i].join();
}
}
log.debug("All threads have joined");
} catch (InterruptedException e1) {
log.error("interrupted while waiting for sizeThread and stressorThreads to stop");
}
if (stressors) stressorThreads = null;
if (checkers) checkerThreads = null;
}
public synchronized void waitUntilLoaded() throws InterruptedException {
if (logLogicConfiguration.isEnabled()) {
log.warn("Not waiting as log logic does not preload data");
return;
}
if (stressorThreads == null) {
log.info("Not loading, no stressors alive");
return;
}
boolean loaded = false;
while (!loaded) {
loaded = true;
for (Stressor st : stressorThreads) {
if ((st.getLogic() instanceof BackgroundStressorLogic)) {
boolean isLoaded = ((BackgroundStressorLogic) st.getLogic()).isLoaded();
loaded = loaded && isLoaded;
} else {
log.warnf("Thread %s has logic %s", st.getName(), st.getLogic());
}
}
if (!loaded) {
Thread.sleep(100);
}
}
}
public String waitUntilChecked() {
if (manager.getStressorRecordPool() == null || checkerThreads == null) {
log.warn("No log checker pool or active checkers");
return null;
}
Stressor[] stressors = stressorThreads;
if (stressors != null) {
stopBackgroundThreads(true, false, false);
}
String error = waitUntilChecked(logLogicConfiguration.noProgressTimeout);
if (error != null) {
return error;
}
stopBackgroundThreads(false, true, false);
stressorsPaused = true;
checkersPaused = true;
return null;
}
public String waitUntilChecked(long timeout) {
AtomicReferenceArray<StressorRecord> allRecords = manager.getStressorRecordPool().getAllRecords();
int totalThreads = manager.getStressorRecordPool().getTotalThreads();
for (int i = 0; i < totalThreads; ++i) {
StressorRecord record = allRecords.get(i);
if (record == null) continue;
try {
// as the pool survives service restarts, we have to always grab actual cache
LogChecker.LastOperation lastOperation = (LogChecker.LastOperation) manager.getBasicCache().get(LogChecker.lastOperationKey(record.getThreadId()));
if (lastOperation == null) {
log.tracef("Thread %d has no recorded operation", record.getThreadId());
} else {
record.addConfirmation(lastOperation.getOperationId(), lastOperation.getTimestamp());
}
} catch (Exception e) {
log.errorf(e, "Failed to read last operation key for thread %d", record.getThreadId(), e);
}
}
for (; ; ) {
boolean allChecked = true;
long now = TimeService.currentTimeMillis();
for (int i = 0; i < totalThreads; ++i) {
StressorRecord record = allRecords.get(i);
if (record == null) continue;
long confirmationTimestamp = record.getCurrentConfirmationTimestamp();
if (confirmationTimestamp > 0) {
if (log.isTraceEnabled()) {
log.trace(record.getStatus());
}
allChecked = false;
if (record.getLastSuccessfulCheckTimestamp() + timeout < now) {
String error = String.format("Waiting for checker for record %s timed out after %d ms", record.getStatus(), now - record.getLastSuccessfulCheckTimestamp());
log.error(error);
return error;
}
break;
}
}
if (allChecked) {
StringBuilder sb = new StringBuilder("All checks OK: ");
for (int i = 0; i < totalThreads; ++i) {
StressorRecord record = allRecords.get(i);
if (record == null) continue;
sb.append(record.getThreadId()).append("# ")
.append(record.getOperationId()).append(" (")
.append(record.getLastConfirmedOperationId()).append("), ");
}
log.debug(sb.toString());
return null;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("Interrupted waiting for checkers", e);
return e.toString();
}
}
}
public boolean waitForProgress() {
Stressor[] stressors = stressorThreads;
if (stressors == null) {
log.error("Stressors are not running!");
return false;
}
Map<Stressor, Long> confirmed = new HashMap<>(stressors.length);
for (Stressor stressor : stressors) {
Logic logic = stressor.getLogic();
if (logic instanceof AbstractLogLogic) {
long operationId = ((AbstractLogLogic) logic).getLastConfirmedOperation();
confirmed.put(stressor, operationId);
} else {
log.warnf("Cannot wait for stressor %d as it does not implement LogLogic", stressor.id);
}
}
long deadline = TimeService.currentTimeMillis() + logLogicConfiguration.getNoProgressTimeout();
while (!confirmed.isEmpty()) {
for (Iterator<Map.Entry<Stressor, Long>> iterator = confirmed.entrySet().iterator(); iterator.hasNext(); ) {
Map.Entry<Stressor, Long> entry = iterator.next();
AbstractLogLogic logic = (AbstractLogLogic) entry.getKey().getLogic();
long operationId = logic.getLastConfirmedOperation();
if (operationId != entry.getValue()) {
log.tracef("Operation change detected %d -> %d for stressor %s", operationId, entry.getValue(), entry.getKey());
iterator.remove();
}
}
if (TimeService.currentTimeMillis() >= deadline) {
log.error("No progress in stressors within timeout");
return false;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
log.error("Interrupted when waiting for progress", e);
Thread.currentThread().interrupt();
return false;
}
}
return true;
}
public void resumeAfterChecked() {
stressorsPaused = false;
checkersPaused = false;
if (stressorThreads == null) {
startStressorThreads();
} else {
log.error("Stressors already started");
}
startCheckerThreads();
}
public Stressor[] getStressorThreads() {
return stressorThreads;
}
private class KeepAliveTask implements Runnable {
@Override
public void run() {
try {
manager.getBasicCache().put("__keepAlive_" + manager.getSlaveState().getIndexInGroup(), TimeService.currentTimeMillis());
} catch (Exception e) {
log.error("Failed to place keep alive timestamp", e);
}
}
}
}