/**
* Copyright (C) 2012 - present by OpenGamma Inc. and the OpenGamma group of companies
*
* Please see distribution for license.
*/
package com.opengamma.engine.calcnode;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.opengamma.util.ArgumentChecker;
/**
* Watchdog for dealing with job items that run for too long. Detected at this level, recovery actions are limited.
*/
public class MaximumJobItemExecutionWatchdog {
private static final Logger s_logger = LoggerFactory.getLogger(MaximumJobItemExecutionWatchdog.class);
/**
* Callback for the action to take when the watchdog is triggered.
*/
public interface Action {
/**
* The time limit for job item execution has been exceeded.
*
* @param jobItem the job item involved
* @param thread the thread that is running the job item
*/
void jobItemExecutionLimitExceeded(CalculationJobItem jobItem, Thread thread);
}
private static final class ThreadInfo {
private long _startTime;
private CalculationJobItem _jobItem;
private int _fault;
public ThreadInfo(final CalculationJobItem jobItem) {
setJobItem(jobItem);
}
public long getElapsed(final long timeNow) {
return timeNow - _startTime;
}
public CalculationJobItem getJobItem() {
return _jobItem;
}
public void setJobItem(final CalculationJobItem jobItem) {
_startTime = System.nanoTime();
_jobItem = jobItem;
_fault = 0;
}
public int getFault() {
return _fault;
}
public void incrementFault() {
_fault++;
}
}
private final ConcurrentMap<Thread, ThreadInfo> _state = new ConcurrentHashMap<Thread, ThreadInfo>();
private long _maxExecutionTime;
private Action _action = new Action() {
@Override
public void jobItemExecutionLimitExceeded(final CalculationJobItem jobItem, final Thread thread) {
s_logger.error("Job item execution limit exceeded on {} by {}", jobItem, thread);
thread.interrupt();
}
};
private ScheduledExecutorService _scheduler;
private volatile Future<?> _task;
public void setMaxJobItemExecutionTime(final long milliseconds) {
_maxExecutionTime = milliseconds * 1000000L;
}
public long getMaxJobItemExecutionTime() {
return _maxExecutionTime / 1000000L;
}
public void setTimeoutAction(final Action action) {
ArgumentChecker.notNull(action, "action");
_action = action;
}
public Action getTimeoutAction() {
return _action;
}
public void setScheduler(final ScheduledExecutorService scheduler) {
_scheduler = scheduler;
}
public ScheduledExecutorService getScheduler() {
return _scheduler;
}
private final class CheckThreads implements Runnable {
@Override
public void run() {
final long time = System.nanoTime();
final long limit = _maxExecutionTime;
final Iterator<Map.Entry<Thread, ThreadInfo>> itr = _state.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry<Thread, ThreadInfo> thread = itr.next();
if (thread.getKey().isAlive()) {
if (thread.getValue().getJobItem() == null) {
s_logger.debug("Thread {} alive but not executing any job items", thread.getKey());
} else {
final long elapsed = thread.getValue().getElapsed(time);
if (elapsed > limit) {
s_logger.warn("Thread {} has been executing {} for {}ms", new Object[] {thread.getKey(), thread.getValue().getJobItem(), (double) elapsed / 1e6 });
thread.getValue().incrementFault();
getTimeoutAction().jobItemExecutionLimitExceeded(thread.getValue().getJobItem(), thread.getKey());
} else {
s_logger.debug("Thread {} within job limit", thread.getKey());
}
}
} else {
s_logger.info("Removed terminated thread {} from watchlist", thread.getKey());
itr.remove();
}
}
synchronized (this) {
if (_state.isEmpty()) {
_task.cancel(false);
_task = null;
}
}
}
}
/**
* The calling thread is about to start executing the job item. This call must be paired with a call to {@link #jobExecutionStopped} when the thread has finished, before the time limit elapses, to
* avoid the watchdog triggering.
*
* @param jobItem the item
*/
protected void jobExecutionStarted(final CalculationJobItem jobItem) {
if (getMaxJobItemExecutionTime() > 0) {
final Thread t = Thread.currentThread();
ThreadInfo info = _state.get(t);
if (info == null) {
info = new ThreadInfo(jobItem);
_state.put(t, info);
if (_task == null) {
synchronized (this) {
if (_task == null) {
if (getScheduler() == null) {
setScheduler(Executors.newSingleThreadScheduledExecutor());
}
_task = getScheduler().scheduleWithFixedDelay(new CheckThreads(), getMaxJobItemExecutionTime(), getMaxJobItemExecutionTime(), TimeUnit.MILLISECONDS);
}
}
}
} else {
info.setJobItem(jobItem);
}
}
}
/**
* The calling thread has finished executing the job item from the previous call to {@link #jobExecutionStarted}.
*/
protected void jobExecutionStopped() {
if (getMaxJobItemExecutionTime() > 0) {
ThreadInfo info = _state.get(Thread.currentThread());
if (info != null) {
info.setJobItem(null);
}
}
}
public boolean areThreadsAlive() {
for (Map.Entry<Thread, ThreadInfo> thread : _state.entrySet()) {
if ((thread.getValue().getFault() == 0) && thread.getKey().isAlive()) {
return true;
}
}
return false;
}
}