/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.threads;
import java.lang.Thread.UncaughtExceptionHandler;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.UUID;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.inject.Inject;
import org.diqube.config.Config;
import org.diqube.config.ConfigKey;
import org.diqube.context.AutoInstatiate;
import org.diqube.queries.QueryRegistry;
import org.diqube.queries.QueryUuid;
import org.diqube.queries.QueryUuid.QueryUuidThreadState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* Manages {@link ExecutorService}s for diqube.
*
* @author Bastian Gloeckle
*/
@AutoInstatiate
public class ExecutorManager {
private static final Logger logger = LoggerFactory.getLogger(ExecutorManager.class);
/**
* Map from query UUID to execution UUID to list of executors registered for it. Usually there should be one Executor
* for a queryUuid/executorUuid combination.
*/
private Map<UUID, Map<UUID, List<DiqubeFixedThreadPoolExecutor>>> queryExecutors = new HashMap<>();
private ShutdownThread shutdownThread = new ShutdownThread();
private TimeoutThread timeoutThread = new TimeoutThread();
@Inject
private QueryRegistry queryRegistry;
@Config(ConfigKey.QUERY_EXECUTION_TIMEOUT_SECONDS)
private int queryExecutionTimeoutSeconds;
@PostConstruct
public void initialize() {
shutdownThread.start();
timeoutThread.start();
}
@PreDestroy
public void cleanup() {
// in a production environment it would actually not matter if we really shutdown everything (because the JVM is
// exiting anyway). In tests though, it is meaningful to shutdown explicitly!
shutdownEverythingOfAllQueries();
shutdownThread.interrupt();
timeoutThread.interrupt();
}
/**
* Create a new cached thread pool, see {@link Executors#newCachedThreadPool()}.
*
* @param nameFormat
* a {@link String#format(String, Object...)}-compatible format String, to which a unique integer (0, 1,
* etc.) will be supplied as the single parameter. This integer will be unique to the built instance of the
* ThreadFactory and will be assigned sequentially. For example, {@code "rpc-pool-%d"} will generate thread
* names like {@code "rpc-pool-0"}, {@code "rpc-pool-1"}, {@code "rpc-pool-2"}, etc.
* @param uncaughtExceptionHandler
* This will be called in case any of the threads of the ExecutorService ends because an exception was
* thrown.
*
* @return The new cached thread pool.
*/
public ExecutorService newCachedThreadPool(String nameFormat, UncaughtExceptionHandler uncaughtExceptionHandler) {
ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
threadFactoryBuilder.setNameFormat(nameFormat);
threadFactoryBuilder.setUncaughtExceptionHandler(uncaughtExceptionHandler);
return Executors.newCachedThreadPool(threadFactoryBuilder.build());
}
/**
* Create a new {@link ExecutorService} that does create threads as needed, but contains a maxmimum number of threads.
*
* @param nameFormat
* a {@link String#format(String, Object...)}-compatible format String, to which a unique integer (0, 1,
* etc.) will be supplied as the single parameter. This integer will be unique to the built instance of the
* ThreadFactory and will be assigned sequentially. For example, {@code "rpc-pool-%d"} will generate thread
* names like {@code "rpc-pool-0"}, {@code "rpc-pool-1"}, {@code "rpc-pool-2"}, etc.
* @param uncaughtExceptionHandler
* This will be called in case any of the threads of the ExecutorService ends because an exception was
* thrown.
* @param maxPoolSize
* Maximum number of threads.
* @return The new {@link ExecutorService}.
*/
public ExecutorService newCachedThreadPoolWithMax(String nameFormat,
UncaughtExceptionHandler uncaughtExceptionHandler, int maxPoolSize) {
ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
threadFactoryBuilder.setNameFormat(nameFormat);
threadFactoryBuilder.setUncaughtExceptionHandler(uncaughtExceptionHandler);
return new ThreadPoolExecutor(0, maxPoolSize, 10, TimeUnit.SECONDS, new LinkedBlockingQueue<>(),
threadFactoryBuilder.build());
}
/**
* Create a new thread pool with a fixed set of threads, see {@link Executors#newFixedThreadPool(int)}. The returned
* {@link ExecutorService} should be used for executing a specific diql query, with having the correct
* {@link QueryUuidThreadState} set.
*
* <p>
* All threads that are used by the returned {@link Executor} will be "bound" to the specific diql query: That means
* that in case there is an uncaught exception thrown by one of those threads, the {@link QueryRegistry} will be
* informed about this and a potentially installed exception handler for that query will be called.
*
* <p>
* In addition to that, the returned {@link Executor} will also be affected when any of the following methods are
* called with the specific query ID:
*
* <ul>
* <li>{@link #findQueryUuidOfExecutorService(ExecutorService)}
* <li>{@link #findAllExecutorServicesOfQueryExecution(UUID)}
* <li>{@link #shutdownEverythingOfQueryExecution(UUID)}
* <li>{@link #shutdownEverythingOfAllQueries()}
* </ul>
*
* <p>
* The returned executor will be automatically terminated after {@link ConfigKey#QUERY_EXECUTION_TIMEOUT_SECONDS}
* seconds.
*
* @param numberOfThreads
* Number of threads the thread pool should have
* @param nameFormat
* a {@link String#format(String, Object...)}-compatible format String, to which a unique integer (0, 1,
* etc.) will be supplied as the single parameter. This integer will be unique to the built instance of the
* ThreadFactory and will be assigned sequentially. For example, {@code "rpc-pool-%d"} will generate thread
* names like {@code "rpc-pool-0"}, {@code "rpc-pool-1"}, {@code "rpc-pool-2"}, etc.
* @param queryUuid
* The UUID to whose execution the returned {@link Executor} belongs to. For a description of query
* UUID/executor UUID, see {@link QueryUuid} and ExecutablePlan.
* @param executionUuid
* The UUID of the execution the returned {@link Executor} belongs to. For a description of query
* UUID/executor UUID, see {@link QueryUuid} and ExecutablePlan.
*
* @return The new thread pool. It is not a {@link ExecutorService}, but only an {@link Executor} returned, because
* ONLY the {@link Executor#execute(Runnable)} method must be run, because then the exception forwarding which
* is described above will work correctly. This does not work when the method
* {@link ExecutorService#submit(java.util.concurrent.Callable)} etc. are called, becuase the ExecutorService
* won't forward the exception in that case, but encapsulate it in the corresponding {@link Future}.
*/
public synchronized Executor newQueryFixedThreadPoolWithTimeout(int numberOfThreads, String nameFormat,
UUID queryUuid, UUID executionUuid) {
ThreadFactoryBuilder baseThreadFactoryBuilder = new ThreadFactoryBuilder();
baseThreadFactoryBuilder.setNameFormat(nameFormat);
// Use our ThreadFactory as facette in order to install our exception handling and enable the publication of the
// query & execution UUID in QueryUuid when any thread of the query starts running.
ThreadFactory threadFactory =
new QueryThreadFactory(baseThreadFactoryBuilder.build(), queryUuid, executionUuid, queryRegistry);
DiqubeFixedThreadPoolExecutor res =
new DiqubeFixedThreadPoolExecutor(numberOfThreads, threadFactory, queryUuid, executionUuid);
res.setThreadNameFormatForToString(nameFormat);
synchronized (queryExecutors) {
if (!queryExecutors.containsKey(queryUuid))
queryExecutors.put(queryUuid, new HashMap<>());
if (!queryExecutors.get(queryUuid).containsKey(executionUuid))
queryExecutors.get(queryUuid).put(executionUuid, new ArrayList<>());
queryExecutors.get(queryUuid).get(executionUuid).add(res);
}
timeoutThread.registerTimeout((System.nanoTime() / (long) 1e6) + (queryExecutionTimeoutSeconds * 1000), res);
return res;
}
/**
* @return The Query UUID of the given {@link Executor} that was created by
* {@link #newQueryFixedThreadPool(int, String, UUID)}. <code>null</code> if not available.
*/
public UUID findQueryUuidOfExecutorService(ExecutorService service) {
if (!(service instanceof DiqubeFixedThreadPoolExecutor))
return null;
return ((DiqubeFixedThreadPoolExecutor) service).getQueryUuid();
}
/**
* @return All {@link Executor}s that are registered as executing some work for the given query.
*/
public List<DiqubeFixedThreadPoolExecutor> findAllExecutorServicesOfQueryExecution(UUID queryUuid,
UUID executionUuid) {
synchronized (queryExecutors) {
if (!queryExecutors.containsKey(queryUuid) || !queryExecutors.get(queryUuid).containsKey(executionUuid))
return null;
return new ArrayList<>(queryExecutors.get(queryUuid).get(executionUuid));
}
}
/**
* Calls {@link ExecutorService#shutdownNow()} on all Executors that are registered for the given query execution and
* unregisters those executors. The executors have to have been created using
* {@link #newQueryFixedThreadPool(int, String, UUID)}.
*/
public synchronized void shutdownEverythingOfQueryExecution(UUID queryUuid, UUID executionUuid) {
Collection<ExecutorService> shutdownExecutors = new ArrayList<>();
List<DiqubeFixedThreadPoolExecutor> executors = findAllExecutorServicesOfQueryExecution(queryUuid, executionUuid);
if (executors != null) {
logger.trace("Shutting down {} executors of query {} execution {}: {}", executors.size(), queryUuid,
executionUuid, executors);
for (DiqubeFixedThreadPoolExecutor executor : executors)
shutdownExecutors.add(executor);
}
synchronized (queryExecutors) {
if (queryExecutors.get(queryUuid) != null) {
queryExecutors.get(queryUuid).remove(executionUuid);
if (queryExecutors.get(queryUuid).isEmpty())
queryExecutors.remove(queryUuid);
}
}
synchronized (shutdownThread.shutdownExecutors) {
shutdownThread.shutdownExecutors.addAll(shutdownExecutors);
shutdownThread.numberOfServicesToShutdown.addAndGet(shutdownExecutors.size());
synchronized (shutdownThread.sync) {
shutdownThread.sync.notifyAll();
}
}
}
/**
* Calls {@link ExecutorService#shutdownNow()} on all Executors that are registered for the given query and
* unregisters those executors.
*/
public synchronized void shutdownEverythingOfQuery(UUID queryUuid) {
Collection<ExecutorService> shutdownExecutors = new ArrayList<>();
Map<UUID, List<DiqubeFixedThreadPoolExecutor>> executorsByExecutionUuid;
synchronized (queryExecutors) {
executorsByExecutionUuid = queryExecutors.remove(queryUuid);
}
if (executorsByExecutionUuid == null) {
logger.trace("Nothing is being executed for query {}, so there's nothing to shut down.", queryUuid);
return;
}
shutdownExecutors =
executorsByExecutionUuid.values().stream().flatMap(l -> l.stream()).collect(Collectors.toList());
logger.trace("Shutting down {} executors of query {} (all executions): {}", shutdownExecutors.size(), queryUuid,
shutdownExecutors);
synchronized (shutdownThread.shutdownExecutors) {
shutdownThread.shutdownExecutors.addAll(shutdownExecutors);
shutdownThread.numberOfServicesToShutdown.addAndGet(shutdownExecutors.size());
synchronized (shutdownThread.sync) {
shutdownThread.sync.notifyAll();
}
}
}
/**
* Calls {@link ExecutorService#shutdownNow()} on all Executors that have been created using
* {@link #newQueryFixedThreadPool(int, String, UUID)} and which are still active.
*/
public synchronized void shutdownEverythingOfAllQueries() {
Collection<ExecutorService> shutdownExecutors = new ArrayList<>();
synchronized (queryExecutors) {
for (Iterator<Entry<UUID, Map<UUID, List<DiqubeFixedThreadPoolExecutor>>>> it =
queryExecutors.entrySet().iterator(); it.hasNext();) {
Entry<UUID, Map<UUID, List<DiqubeFixedThreadPoolExecutor>>> e = it.next();
for (Entry<UUID, List<DiqubeFixedThreadPoolExecutor>> e2 : e.getValue().entrySet())
for (DiqubeFixedThreadPoolExecutor executor : e2.getValue())
shutdownExecutors.add(executor);
it.remove();
}
}
synchronized (shutdownThread.shutdownExecutors) {
shutdownThread.shutdownExecutors.addAll(shutdownExecutors);
shutdownThread.numberOfServicesToShutdown.addAndGet(shutdownExecutors.size());
synchronized (shutdownThread.sync) {
shutdownThread.sync.notifyAll();
}
}
}
/**
* Thread that is used to shut down any created {@link Executor}s.
*
* This needs to be done in a separate thread, in case the
* {@link ExecutorManager#shutdownEverythingOfQueryExecution(UUID, Collection)} or
* {@link ExecutorManager#shutdownEverythingOfAllQueries()} is called within a thread that belongs to one of the
* Executors that will be shutdown. In that case we might end up not shutting down some, because the thread executing
* the shutdowns was killed before.
*
* <p>
* Communicating with this thread:
* <ul>
* <li>Start sync on {@link #shutdownExecutors}.
* <li>Add the executors that should be shutdown to the end of {@link #shutdownExecutors}.
* <li>Increase {@link #numberOfServicesToShutdown} by the number of executors you added to {@link #shutdownExecutors}
* <li>Start sync on {@link #sync}
* <li>call {@link #sync#notifyAll()}.
* <li>Leave sync on {@link #sync}
* <li>Leave sync on {@link #shutdownExecutors}.
* </ul>
*/
private static class ShutdownThread extends Thread {
private Deque<ExecutorService> shutdownExecutors = new ConcurrentLinkedDeque<>();
private Object sync = new Object();
private AtomicInteger numberOfServicesToShutdown = new AtomicInteger(0);
public ShutdownThread() {
super("ExecutorManager-shutdown");
setUncaughtExceptionHandler(new UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
logger.error("Uncaught exception in ExecutorManagers shurdownThread. Restart the server.", e);
}
});
}
@Override
public void run() {
boolean interrupted = false;
while (!interrupted) {
synchronized (sync) {
try {
sync.wait(1000);
} catch (InterruptedException e) {
// we were interrupted, work on remaining threads, then exit!
interrupted = true;
}
}
if (numberOfServicesToShutdown.get() > 0) {
synchronized (shutdownExecutors) {
// If the "shutdown" request was sent from a thread that would itself be shutdown, we give it a grace period
// of 100ms to finish its job, before sending the shutdown (and interrupting those threads, which might
// perhaps lead to exceptions in the logs, although they should not be serious, because the threads should
// have completed their work already)
try {
if (!interrupted)
Thread.sleep(100);
} catch (InterruptedException e) {
// we were interrupted, work on remaining threads, then exit!
interrupted = true;
}
while (numberOfServicesToShutdown.get() > 0) {
ExecutorService executor = shutdownExecutors.poll();
String executorString = executor.toString();
logger.trace("Shutting down {}", executorString);
executor.shutdownNow();
try {
if (!interrupted && !executor.awaitTermination(100, TimeUnit.MILLISECONDS))
logger.warn("Could not shutdown all threads of executor within 100ms: {}", executorString);
} catch (InterruptedException e) {
// we were interrupted, work on remaining threads, then exit!
interrupted = true;
}
numberOfServicesToShutdown.decrementAndGet();
}
}
}
}
logger.trace("Thread exiting.");
}
}
/**
* Thread that handles the timeouts of all executors created with
* {@link ExecutorManager#newQueryFixedThreadPoolWithTimeout(int, String, UUID, UUID)}.
*/
private class TimeoutThread extends Thread {
private Object wait = new Object();
private NavigableMap<Long, DiqubeFixedThreadPoolExecutor> timeouts = new ConcurrentSkipListMap<>();
private ReentrantReadWriteLock timeoutLock = new ReentrantReadWriteLock();
public TimeoutThread() {
super("ExecutorManager-timeout");
}
public void registerTimeout(long timeoutAtMillis, DiqubeFixedThreadPoolExecutor executor) {
timeoutLock.readLock().lock();
try {
while (timeouts.putIfAbsent(timeoutAtMillis++, executor) != null)
;
} finally {
timeoutLock.readLock().unlock();
}
}
@Override
public void run() {
while (true) {
synchronized (wait) {
try {
wait.wait(1000);
} catch (InterruptedException e) {
// exit quietly.
return;
}
}
List<DiqubeFixedThreadPoolExecutor> servicesToShutdown = new ArrayList<>();
long nowMillis = System.nanoTime() / (long) 1e6;
timeoutLock.writeLock().lock();
try {
NavigableMap<Long, DiqubeFixedThreadPoolExecutor> activeTimeouts = timeouts.headMap(nowMillis, true);
servicesToShutdown.addAll(activeTimeouts.values());
activeTimeouts.clear();
} finally {
timeoutLock.writeLock().unlock();
}
for (DiqubeFixedThreadPoolExecutor shutdownService : servicesToShutdown) {
if (!shutdownService.isTerminated()) {
UUID queryUuid = shutdownService.getQueryUuid();
UUID executionUuid = shutdownService.getExecutionUuid();
logger.info(
"An executor of query {} execution {} timed out after {} seconds. Terminating that query execution.",
queryUuid, executionUuid, queryExecutionTimeoutSeconds);
// issue an exception to give feedback to user.
queryRegistry.handleException(queryUuid, executionUuid,
new QueryTimeoutException("Query timeout after " + queryExecutionTimeoutSeconds + " seconds."));
// although the exception handler probably already shuts down everything, we want to really make sure that
// everything terminates. Note that the exception handler has to work synchronously! If it is asynchronous,
// the following line might be executed before the exception handler actually handled the exception and
// therefore that thread might be terminated before!
ExecutorManager.this.shutdownEverythingOfQueryExecution(queryUuid, executionUuid);
}
}
}
}
}
/**
* The execution of a query timed out.
*
* Configured via {@link ConfigKey#QUERY_EXECUTION_TIMEOUT_SECONDS}.
*/
public static class QueryTimeoutException extends RuntimeException {
private static final long serialVersionUID = 1L;
public QueryTimeoutException(String msg) {
super(msg);
}
}
}