package dk.statsbiblioteket.medieplatform.autonomous;
import com.netflix.curator.framework.CuratorFramework;
import com.netflix.curator.framework.recipes.locks.InterProcessLock;
import com.netflix.curator.framework.recipes.locks.InterProcessSemaphoreMutex;
import org.slf4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
/**
* This is the Autonomous Component main class. It should contain all the harnessing stuff that allows a system to work
* in the autonomous mindset
*/
public class AutonomousComponent<T extends Item> implements Callable<CallResult<T>> {
private static final Logger log = org.slf4j.LoggerFactory.getLogger(AutonomousComponent.class);
private final CuratorFramework lockClient;
private final List<String> oldEvents;
private final List<String> itemTypes;
private final long timeoutSBOI;
private final long timeoutBatch;
private final RunnableComponent<T> runnable;
private final long pollTime = 1000;
private final ConcurrencyConnectionStateListener concurrencyConnectionStateListener;
private final long workerTimout;
private final int simultaneousProcesses;
private final int workQueueMaxLength;
private final List<String> pastSuccessfulEvents;
private final List<String> futureEvents;
private boolean paused = false;
private boolean stopped = false;
private final Integer maxResults;
private final EventTrigger<T> eventTrigger;
private final EventStorer<T> eventStorer;
public AutonomousComponent(RunnableComponent<T> runnable, CuratorFramework lockClient, int simultaneousProcesses,
Integer workQueueMaxLength, List<String> pastSuccessfulEvents, List<String> futureEvents,
List<String> oldEvents, List<String> itemTypes, long timeoutSBOI, long timeoutBatch,
long workerTimout, Integer maxResults, EventTrigger<T> eventTrigger, EventStorer<T> eventStorer) {
this.lockClient = lockClient;
this.oldEvents = oldEvents;
this.itemTypes = itemTypes;
this.timeoutSBOI = timeoutSBOI;
this.timeoutBatch = timeoutBatch;
this.runnable = runnable;
this.workerTimout = workerTimout;
this.simultaneousProcesses = simultaneousProcesses;
if (workQueueMaxLength == null){
this.workQueueMaxLength = simultaneousProcesses;
} else {
this.workQueueMaxLength = workQueueMaxLength;
}
this.pastSuccessfulEvents = pastSuccessfulEvents;
this.futureEvents = futureEvents;
this.eventTrigger = eventTrigger;
this.eventStorer = eventStorer;
concurrencyConnectionStateListener = new ConcurrencyConnectionStateListener(this);
this.lockClient.getConnectionStateListenable().addListener(concurrencyConnectionStateListener);
this.maxResults = maxResults;
}
/**
* Utility method to release locks, ignoring any errors being thrown. Will continue to release the lock until
* errors
* are being thrown.
*
* @param lock the lock to release
*/
protected static void releaseQuietly(InterProcessLock lock) {
boolean released = false;
while (!released) {
try {
lock.release();
} catch (IllegalStateException e) {
released = true;
} catch (Exception e) {
log.warn("Caught exception while trying to release lock", e);
return;
}
}
}
protected static boolean acquireQuietly(InterProcessLock lock, long timeout) throws LockingException {
try {
return lock.acquire(timeout, TimeUnit.MILLISECONDS);
} catch (Exception e) {
throw new LockingException("Failed to acquire lock", e);
}
}
/**
* Get the zookeeper lockpath for the SBOI instance for this component
*
* @return the lock path
*/
private static <T extends Item> String getSBOILockpath(RunnableComponent<T> runnable) {
return "/SBOI/" + runnable.getComponentName();
}
/**
* Get the lock path for this batch for this component
*
* @param item the item to lock
*
* @return the zookeepr lock path
*/
private static <T extends Item> String getBatchLockPath(RunnableComponent<T> runnable, T item) {
return "/" + runnable.getComponentName() + "/" + item.getFullID();
}
/**
* Parse the propertyValue as a long, and if failing, return the default value
*
* @param propertyValue the string to parse
* @param defaultValue the default value
*
* @return the long value
*/
private long parseLong(String propertyValue, long defaultValue) {
try {
return Long.parseLong(propertyValue);
} catch (Exception e) {
return defaultValue;
}
}
/**
* The primary method of the autonomous components. This method does the following
*
* <ul>
* <li> Locks the SBOI</li>
* <li> gets the batches in the right state</li>
* <li> attempts to lock it</li>
* <li> when sufficient batches are locked</li>
* <li> do the work on the batches and store the results for each</li>
* <li> when all work is completed, unlock all the batches</li>
* <li> unlock sboi</li>
* </ul>
*
* @return true if a batch was succesfully worked on. False if no batch was ready
* @throws CouldNotGetLockException if no lock could be achieved within the set timeouts. This is not an anormal
* situation, as it just means that all the relevant batches are already being
* processed.
* @throws LockingException if the locking framework fails
* @throws CommunicationException if communication with SBOI fails
*/
@Override
public CallResult<T> call() throws LockingException, CouldNotGetLockException, CommunicationException {
InterProcessLock SBOILock = null;
CallResult<T> result = new CallResult<>();
Map<AutonomousWorker<T>, InterProcessLock> workers = new HashMap<>();
try {
log.info("Starting {}",runnable.getComponentName());
//lock SBOI for this component name
SBOILock = new InterProcessSemaphoreMutex(lockClient, getSBOILockpath(runnable));
try {
boolean sboi_locked = acquireQuietly(SBOILock, timeoutSBOI);
if (!sboi_locked) {
throw new CouldNotGetLockException("Could not get lock of SBOI, so returning");
}
log.debug("SBOI locked, quering for items");
//get items, lock n, release the SBOI
EventTrigger.Query<T> query = makeQuery();
Iterator<T> items = eventTrigger.getTriggeredItems(query);
//for each batch
while (items.hasNext()) {
T item = items.next();
log.info("Found item {}", item.getFullID());
//attempt to lock
InterProcessLock batchlock = new InterProcessSemaphoreMutex(
lockClient, getBatchLockPath(runnable, item));
boolean success = acquireQuietly(batchlock, timeoutBatch);
if (success) {//if lock gotten
log.info("Item {} locked, creating a worker", item.getFullID());
if (maxResults != null) {
log.debug("Worker will report a maximum of {} results.", maxResults);
}
AutonomousWorker<T> worker = new AutonomousWorker<>(
runnable,
new ResultCollector(runnable.getComponentName(), runnable.getComponentVersion(), maxResults),
item, eventStorer);
workers.put(worker, batchlock);
if (workers.size() >= workQueueMaxLength) {
log.debug("We now have sufficient workers, look for no more items");
break;
}
} else {
log.info("Item {} already locked, so ignoring.", item.getFullID());
}
}
} catch (RuntimeException runtimeException) {
for (InterProcessLock interProcessLock : workers.values()) {
releaseQuietly(interProcessLock);
}
throw runtimeException;
}
if (workers.isEmpty()){ //Nothing more to do
log.info("No Items locked, so nothing further to do");
return result;
} else {
checkLockServerConnectionState();
ExecutorService pool = Executors.newFixedThreadPool(simultaneousProcesses);
try {
ArrayList<Future<?>> futures = new ArrayList<>();
for (AutonomousWorker<T> autonomousWorker : workers.keySet()) {
log.info("Submitting worker for Item {}", autonomousWorker.getItem().getFullID());
concurrencyConnectionStateListener.add(autonomousWorker);
Future<?> future = pool.submit(autonomousWorker);
futures.add(future);
}
log.debug("Shutting down the pool, and waiting for the workers to terminate");
pool.shutdown();
//The wait loop for the running threads
long start = System.currentTimeMillis();
boolean allDone = false;
while (!allDone) {
log.trace("Waiting to terminate");
allDone = true;
for (Future<?> future : futures) {
allDone = allDone && future.isDone();
}
checkLockServerConnectionState(pool);
try {
Thread.sleep(pollTime);
} catch (InterruptedException e) {
//okay, continue
}
if (System.currentTimeMillis() - start > workerTimout) {
log.error("Worker timeout exceeded (" + workerTimout + "ms), shutting down all threads. We still need to wait for them" + " to terminate, however.");
pool.shutdownNow();
for (Future<?> future : futures) {
future.cancel(true);
}
}
}
log.info("All is now done, all workers have completed");
for (AutonomousWorker<T> autonomousWorker : workers.keySet()) {
result.addResult(autonomousWorker.getItem(), autonomousWorker.getResultCollector());
}
} finally {
//clean up pool?
}
}
} finally {
for (InterProcessLock interProcessLock : workers.values()) {
releaseQuietly(interProcessLock);
}
releaseQuietly(SBOILock);
}
return result;
}
private EventTrigger.Query<T> makeQuery() {
EventTrigger.Query<T> query = new EventTrigger.Query<T>();
if (pastSuccessfulEvents != null) {
query.getPastSuccessfulEvents().addAll(pastSuccessfulEvents);
}
if (futureEvents != null) {
query.getFutureEvents().addAll(futureEvents);
}
if (oldEvents != null) {
query.getOldEvents().addAll(oldEvents);
}
if (itemTypes != null) {
query.getTypes().addAll(itemTypes);
}
return query;
}
/**
* Check the lock server connection state. If the connection is lost, all our locks are dirty, so the execution
* should stop. An CommunicationException is thrown in this case. If the connection is suspended, enter into an
* potentially infinite loop waiting for the connection to either be restored or lost.
*
* @throws CommunicationException if the connection was lost
*/
private void checkLockServerConnectionState() throws CommunicationException {
checkLockServerConnectionState(null);
}
/**
* Check the lock server connection state. If the connection is lost, all our locks are dirty, so the execution
* should stop. An CommunicationException is thrown in this case. If the connection is suspended, enter into an
* potentially infinite loop waiting for the connection to either be restored or lost.
*
* @param pool this is the pool of executing threads. The threads will be stopped as best as the system is able, if
* the connection is lost.
*
* @throws CommunicationException if the connection was lost
*/
private void checkLockServerConnectionState(ExecutorService pool) throws CommunicationException {
checkStopped(pool);
while (paused && !stopped) {
try {
Thread.sleep(pollTime);
} catch (InterruptedException e) {
}
}
checkStopped(pool);
}
/**
* Check if the stopped flag is set. If set, and pool is non-null, shut down the pool
*
* @param pool the pool of worker threads
*
* @throws CommunicationException if the stopped flag is set
*/
private void checkStopped(ExecutorService pool) throws CommunicationException {
if (stopped) {
if (pool != null) {
pool.shutdownNow();
}
throw new CommunicationException("Lost connection to lock server");
}
}
/**
* Mark the connection to the lock server as suspended or not
*
* @param paused true if the connection is suspended
*/
public void setPaused(boolean paused) {
this.paused = paused;
}
/**
* Mark the connection to the lock server as lost
*
* @param stopped if true, the connection is lost
*
* @see #checkLockServerConnectionState()
* @see #checkStopped(java.util.concurrent.ExecutorService)
*/
public void setStopped(boolean stopped) {
this.stopped = stopped;
}
}