/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.harvest;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.harvest.factory.HarvestServiceFactory;
import org.dspace.harvest.service.HarvestedCollectionService;
import java.io.IOException;
import java.sql.SQLException;
import java.util.*;
/**
* The class responsible for scheduling harvesting cycles are regular intervals.
* @author alexey
*/
public class HarvestScheduler implements Runnable
{
protected static Logger log = Logger.getLogger(HarvestScheduler.class);
protected static EPerson harvestAdmin;
protected Context mainContext;
public static final Object lock = new Object();
protected static Stack<HarvestThread> harvestThreads;
protected static Integer maxActiveThreads;
protected static volatile Integer activeThreads = 0;
public static final int HARVESTER_STATUS_RUNNING = 1;
public static final int HARVESTER_STATUS_SLEEPING = 2;
public static final int HARVESTER_STATUS_PAUSED = 3;
public static final int HARVESTER_STATUS_STOPPED = 4;
public static final int HARVESTER_INTERRUPT_NONE = 0;
public static final int HARVESTER_INTERRUPT_PAUSE = 1;
public static final int HARVESTER_INTERRUPT_STOP = 2;
public static final int HARVESTER_INTERRUPT_RESUME = 3;
public static final int HARVESTER_INTERRUPT_INSERT_THREAD = 4;
public static final int HARVESTER_INTERRUPT_KILL_THREAD = 5;
protected static int status = HARVESTER_STATUS_STOPPED;
private static int interrupt = HARVESTER_INTERRUPT_NONE;
protected static UUID interruptValue = null;
protected static long minHeartbeat;
protected static long maxHeartbeat;
private static final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
private static final HarvestedCollectionService harvestedCollectionService = HarvestServiceFactory.getInstance().getHarvestedCollectionService();
public static boolean hasStatus(int statusToCheck) {
return status == statusToCheck;
}
public static synchronized void setInterrupt(int newInterrupt) {
interrupt = newInterrupt;
}
public static synchronized void setInterrupt(int newInterrupt, UUID newInterruptValue) {
interrupt = newInterrupt;
interruptValue = newInterruptValue;
}
public static int getInterrupt() {
return interrupt;
}
public static String getStatus() {
switch (status) {
case HARVESTER_STATUS_RUNNING:
switch (interrupt) {
case HARVESTER_INTERRUPT_PAUSE:
return("The scheduler is finishing active harvests before pausing. ");
case HARVESTER_INTERRUPT_STOP:
return("The scheduler is shutting down. ");
}
return("The scheduler is actively harvesting collections. ");
case HARVESTER_STATUS_SLEEPING:
return("The scheduler is waiting for collections to harvest. ");
case HARVESTER_STATUS_PAUSED:
return("The scheduler is paused. ");
default:
return("Automatic harvesting is not active. ");
}
}
public HarvestScheduler() throws SQLException, AuthorizeException {
mainContext = new Context();
String harvestAdminParam = ConfigurationManager.getProperty("oai", "harvester.eperson");
harvestAdmin = null;
if (harvestAdminParam != null && harvestAdminParam.length() > 0)
{
harvestAdmin = EPersonServiceFactory.getInstance().getEPersonService().findByEmail(mainContext, harvestAdminParam);
}
harvestThreads = new Stack<HarvestThread>();
maxActiveThreads = ConfigurationManager.getIntProperty("oai", "harvester.maxThreads");
if (maxActiveThreads == 0)
{
maxActiveThreads = 3;
}
minHeartbeat = ConfigurationManager.getIntProperty("oai", "harvester.minHeartbeat") * 1000;
if (minHeartbeat == 0)
{
minHeartbeat = 30000;
}
maxHeartbeat = ConfigurationManager.getIntProperty("oai", "harvester.maxHeartbeat") * 1000;
if (maxHeartbeat == 0)
{
maxHeartbeat = 3600000;
}
}
@Override
public void run() {
scheduleLoop();
}
protected void scheduleLoop() {
long i=0;
while(true)
{
try
{
mainContext = new Context();
synchronized (HarvestScheduler.class) {
switch (interrupt) {
case HARVESTER_INTERRUPT_NONE:
break;
case HARVESTER_INTERRUPT_INSERT_THREAD:
interrupt = HARVESTER_INTERRUPT_NONE;
addThread(mainContext, harvestedCollectionService.find(mainContext, collectionService.find(mainContext, interruptValue)));
interruptValue = null;
break;
case HARVESTER_INTERRUPT_PAUSE:
interrupt = HARVESTER_INTERRUPT_NONE;
status = HARVESTER_STATUS_PAUSED;
break;
case HARVESTER_INTERRUPT_STOP:
interrupt = HARVESTER_INTERRUPT_NONE;
status = HARVESTER_STATUS_STOPPED;
return;
}
}
if (status == HARVESTER_STATUS_PAUSED) {
while(interrupt != HARVESTER_INTERRUPT_RESUME && interrupt != HARVESTER_INTERRUPT_STOP) {
Thread.sleep(1000);
}
if (interrupt != HARVESTER_INTERRUPT_STOP) {
break;
}
}
status = HARVESTER_STATUS_RUNNING;
// Stage #1: if something is ready for harvest, push it onto the ready stack, mark it as "queued"
List<HarvestedCollection> cids = harvestedCollectionService.findReady(mainContext);
log.info("Collections ready for immediate harvest: " + cids.toString());
for (HarvestedCollection harvestedCollection : cids) {
addThread(mainContext, harvestedCollection);
}
// Stage #2: start up all the threads currently in the queue up to the maximum number
while (!harvestThreads.isEmpty()) {
synchronized(HarvestScheduler.class) {
activeThreads++;
}
Thread activeThread = new Thread(harvestThreads.pop());
activeThread.start();
log.info("Thread started: " + activeThread.toString());
/* Wait while the number of threads running is greater than or equal to max */
while (activeThreads >= maxActiveThreads) {
/* Wait a second */
Thread.sleep(1000);
}
}
// Finally, wait for the last few remaining threads to finish
// TODO: this step might be unnecessary. Theoretically a single very long harvest process
// could then lock out all the other ones from starting on their next iteration.
// FIXME: also, this might lead to a situation when a single thread getting stuck without
// throwing an exception would shut down the whole scheduler
while (activeThreads != 0) {
/* Wait a second */
Thread.sleep(1000);
}
// Commit everything
try {
mainContext.complete();
log.info("Done with iteration " + i);
} catch (SQLException e) {
e.printStackTrace();
mainContext.abort();
}
}
catch (Exception e) {
log.error("Exception on iteration: " + i);
e.printStackTrace();
}
// Stage #3: figure out how long until the next iteration and wait
try {
Context tempContext = new Context();
HarvestedCollection hc = harvestedCollectionService.findOldestHarvest(tempContext);
int harvestInterval = ConfigurationManager.getIntProperty("oai", "harvester.harvestFrequency");
if (harvestInterval == 0)
{
harvestInterval = 720;
}
Date nextTime;
long nextHarvest = 0;
if (hc != null) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(hc.getHarvestDate());
calendar.add(Calendar.MINUTE, harvestInterval);
nextTime = calendar.getTime();
nextHarvest = nextTime.getTime() + - new Date().getTime();
}
long upperBound = Math.min(nextHarvest,maxHeartbeat);
long delay = Math.max(upperBound, minHeartbeat) + 1000;
tempContext.complete();
status = HARVESTER_STATUS_SLEEPING;
synchronized(lock) {
lock.wait(delay);
}
}
catch (InterruptedException ie) {
log.warn("Interrupt: " + ie.getMessage());
}
catch (SQLException e) {
e.printStackTrace();
}
i++;
}
}
/**
* Adds a thread to the ready stack. Can also be called externally to queue up a collection
* for harvesting before it is "due" for another cycle. This allows starting a harvest process
* from the UI that still "plays nice" with these thread mechanics instead of making an
* asynchronous call to runHarvest().
*
* @param context
* The relevant DSpace Context.
* @param harvestedCollection
* collection to be harvested
* @throws IOException
* A general class of exceptions produced by failed or interrupted I/O operations.
* @throws SQLException
* An exception that provides information on a database access error or other errors.
* @throws AuthorizeException
* Exception indicating the current user of the context does not have permission
* to perform a particular action.
*/
public void addThread(Context context, HarvestedCollection harvestedCollection) throws SQLException, IOException, AuthorizeException {
log.debug("****** Entered the addThread method. Active threads: " + harvestThreads.toString());
context.setCurrentUser(harvestAdmin);
harvestedCollection.setHarvestStatus(HarvestedCollection.STATUS_QUEUED);
harvestedCollectionService.update(context, harvestedCollection);
context.dispatchEvents();
HarvestThread ht = new HarvestThread(harvestedCollection.getCollection().getID());
harvestThreads.push(ht);
log.debug("****** Queued up a thread. Active threads: " + harvestThreads.toString());
log.info("Thread queued up: " + ht.toString());
}
}