/*
* Copyright 2012, CMM, University of Queensland.
*
* This file is part of Paul.
*
* Paul is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Paul is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Paul. If not, see <http://www.gnu.org/licenses/>.
*/
package au.edu.uq.cmm.paul.grabber;
import java.io.File;
import java.util.Date;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import au.edu.uq.cmm.aclslib.service.ServiceException;
import au.edu.uq.cmm.aclslib.service.SimpleService;
import au.edu.uq.cmm.paul.Paul;
import au.edu.uq.cmm.paul.PaulException;
import au.edu.uq.cmm.paul.queue.QueueManager;
import au.edu.uq.cmm.paul.queue.QueueManager.DateRange;
import au.edu.uq.cmm.paul.status.Facility;
import au.edu.uq.cmm.paul.status.FacilityStatus;
import au.edu.uq.cmm.paul.status.FacilityStatusManager;
import au.edu.uq.cmm.paul.status.FacilityStatusManager.Status;
/**
* A FileGrabber service is registered as a listener for FileWatcher events
* for a particular Facility. The first event for a file generates WorkEntry
* object that is enqueued. Subsequent events are added to the WorkEntry.
* <p>
* The FileGrabber's thread pulls WorkEntry objects from the queue and processed
* them as follows:
* <ul>
* <li>It waits until the file events stop arriving.</li>
* <li>It optionally locks the file.</li>
* <li>It captures the user / account from the ACLS status manager.</li>
* <li>It copies the file to another directory.</li>
* <li>It records the file's administrative metadata.</li>
* <li>It releases the lock.</li>
* </ul>
* <p>
* The functionality is mostly implemented in the {@link WorkEntry} class.
*
* @author scrawley
*/
public class FileGrabber extends AbstractFileGrabber implements SimpleService {
static final Logger LOG = LoggerFactory.getLogger(FileGrabber.class);
static final int DEFAULT_FILE_SETTLING_TIME = 2000; // 2 seconds
private final PausableQueue<Runnable> work = new PausableQueue<Runnable>();
private final FacilityStatusManager statusManager;
private File safeDirectory;
private ThreadPoolExecutor executor;
private final EntityManagerFactory entityManagerFactory;
private final QueueManager queueManager;
private final boolean testMode;
public FileGrabber(Paul services, Facility facility) {
this(services, facility, false);
}
public FileGrabber(Paul services, Facility facility, boolean testMode) {
super(services, facility);
this.testMode = testMode;
statusManager = services.getFacilityStatusManager();
queueManager = services.getQueueManager();
FacilityStatus status = statusManager.getStatus(facility);
status.setFileGrabber(this);
safeDirectory = new File(
services.getConfiguration().getCaptureDirectory());
if (!safeDirectory.exists() || !safeDirectory.isDirectory()) {
throw new PaulException(
"The grabber's safe directory doesn't exist: " +
safeDirectory);
}
entityManagerFactory = services.getEntityManagerFactory();
}
public File getSafeDirectory() {
return safeDirectory;
}
public FacilityStatusManager getStatusManager() {
return statusManager;
}
@Override
public void shutdown() throws InterruptedException {
if (executor != null) {
executor.shutdown();
if (executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.SECONDS)) {
LOG.info("FileGrabber's executor shut down");
} else {
LOG.warn("FileGrabber's executor didn't shut down cleanly");
}
executor = null;
}
}
@Override
public void startup() {
if (testMode) {
// In the unit tests we just want to start the executor and ignore
// the niceties of the HWM and catchup processing.
createExecutor();
return;
}
FacilityStatus status = statusManager.getStatus(getFacility());
DateRange range = queueManager.getQueueDateRange(getFacility());
Date lwm = status.getGrabberLWMTimestamp();
Date hwm = status.getGrabberHWMTimestamp();
LOG.debug("Catchup from = " + range + ", lwm = " + lwm + ", hwm = " + hwm);
if (hwm == null) {
hwm = lwm;
}
if (hwm != null && (range == null || hwm.getTime() <= range.getToDate().getTime())) {
createExecutor();
// We do "catchup" event generation with the executor paused, so that the worker
// thread doesn't jump the gun and start processing work entries before all events
// have been accumulated.
// Note: datasets grabbed in catchup will contain all files whose names match,
// irrespective of the file timestamps. This is the best we can do in the circumstances.
work.pause();
long start = Math.max(hwm.getTime(),
range == null ? Long.MIN_VALUE : range.getToDate().getTime());
LOG.info("Commencing catchup treewalk for " + status.getLocalDirectory());
int count = analyseTree(status.getLocalDirectory(), start, Long.MAX_VALUE);
LOG.info("Catchup treewalk found " + count + " files");
// This ensures that the "caught-up" datasets get ingested in roughly the order
// that the original files were saved rather than a seemingly random order, for
// a better Mirage user experience ...
reorderQueue(work);
LOG.info("Resuming the worker thread");
work.resume();
} else {
LOG.error("HWM and queue date range are inconsistent: queue hwm is " + hwm +
", queue date range is " + range);
status.setStatus(Status.OFF);
status.setMessage("Grabber LWM / HWM need attention (see log)");
throw new ServiceException(status.getMessage());
}
}
private void createExecutor() {
executor = new ThreadPoolExecutor(0, 1, 999, TimeUnit.SECONDS, work);
}
@Override
protected boolean isShutDown() {
return executor == null;
}
@Override
protected void enqueueWorkEntry(WorkEntry entry) {
if (executor == null) {
LOG.info("Dropping work entry as there is currently no executor");
} else {
try {
executor.execute(entry);
LOG.debug("Enqueued work entry");
} catch (RejectedExecutionException ex) {
if (executor.isShutdown()) {
LOG.info("Dropping work entry as the executor is shut down");
} else {
throw ex;
}
}
}
}
public EntityManager getEntityManager() {
return entityManagerFactory.createEntityManager();
}
}