/*
Copyright 2011-2014 Red Hat, Inc
This file is part of PressGang CCMS.
PressGang CCMS is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PressGang CCMS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with PressGang CCMS. If not, see <http://www.gnu.org/licenses/>.
*/
package org.jboss.pressgang.ccms.server.async;
import static com.google.common.base.Strings.isNullOrEmpty;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.annotation.Resource;
import javax.ejb.DependsOn;
import javax.ejb.Lock;
import javax.ejb.LockType;
import javax.ejb.Singleton;
import javax.ejb.Startup;
import javax.ejb.TransactionManagement;
import javax.ejb.TransactionManagementType;
import javax.enterprise.context.ApplicationScoped;
import javax.management.Notification;
import javax.management.NotificationListener;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.transaction.SystemException;
import javax.transaction.UserTransaction;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.jboss.pressgang.ccms.model.Process;
import org.jboss.pressgang.ccms.model.ProcessStatus;
import org.jboss.pressgang.ccms.model.config.ApplicationConfig;
import org.jboss.pressgang.ccms.server.async.process.PGProcess;
import org.jboss.pressgang.ccms.server.async.process.ProcessResults;
import org.jppf.JPPFException;
import org.jppf.client.JPPFClient;
import org.jppf.client.JPPFClientConnection;
import org.jppf.client.JPPFClientConnectionStatus;
import org.jppf.client.JPPFResultCollector;
import org.jppf.client.event.ClientConnectionStatusEvent;
import org.jppf.client.event.ClientConnectionStatusListener;
import org.jppf.client.event.ClientEvent;
import org.jppf.client.event.ClientListener;
import org.jppf.client.event.SubmissionStatusEvent;
import org.jppf.client.event.SubmissionStatusListener;
import org.jppf.client.persistence.DefaultFilePersistenceManager;
import org.jppf.client.persistence.JobPersistence;
import org.jppf.client.persistence.JobPersistenceException;
import org.jppf.client.submission.SubmissionStatus;
import org.jppf.job.JobEventType;
import org.jppf.job.JobNotification;
import org.jppf.node.protocol.Task;
import org.jppf.server.job.management.DriverJobManagementMBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@DependsOn("StartUp")
@Singleton
@Startup
@ApplicationScoped
@TransactionManagement(TransactionManagementType.BEAN)
@Lock(LockType.READ)
public class ProcessManager {
private static final Logger LOG = LoggerFactory.getLogger(ProcessManager.class);
private static final String DEFAULT_JOB_PERSISTENCE_DIR = System.getProperty("user.home") + File.separator + "pressgang" + File.separator +
"processes";
private static final String PROCESS_PREFIX = "process-";
// Use a cache so that job results aren't held forever
private Cache<String, PGProcess> currentLiveProcesses = CacheBuilder.newBuilder().build();
private Cache<String, ProcessResults> processResults = CacheBuilder.newBuilder().softValues().expireAfterWrite(2,
TimeUnit.HOURS).build();
private AtomicInteger numRunningOrQueuedProcesses = new AtomicInteger(0);
private Map<String, JobEventType> jobStates = new ConcurrentHashMap<String, JobEventType>();
private ExtendedJMXDriverConnectionWrapper jmxConnection;
private JPPFClient client;
private JobPersistence<String> persistenceManager;
private AtomicBoolean resetRequired = new AtomicBoolean(true);
@PersistenceContext(unitName="PressGangCCMS")
private EntityManager em;
@Resource
private UserTransaction transactionManager;
/**
* A JMX Notification listener, that listens for changes in job states on the server. This is used to get a more accurate job state
* since we can tell if the job is queued on the server, or being processed in a node.
*/
private final NotificationListener listener = new NotificationListener() {
@Override
public void handleNotification(final Notification notification, Object o) {
if (notification instanceof JobNotification) {
final JobNotification jobNotification = (JobNotification) notification;
final String uuid = jobNotification.getJobInformation().getJobUuid();
final JobEventType type = jobNotification.getEventType();
if (type == JobEventType.JOB_UPDATED || type == JobEventType.JOB_RETURNED) {
// ignore update and returned events
} else if (type.equals(JobEventType.JOB_ENDED)) {
jobStates.remove(uuid);
} else {
jobStates.put(uuid, type);
}
}
}
};
/**
* A JMX Notification Listener that listens to the connection and closes it when it has been disconnected
*/
private final NotificationListener connectionListener = new NotificationListener() {
@Override
public void handleNotification(final Notification notification, final Object o) {
if ("jmx.remote.connection.failed".equals(notification.getType())) {
resetRequired.set(true);
} else if ("jmx.remote.connection.closed".equals(notification.getType())) {
try {
// The connection will be lost at this point so just do basic clean up
closeJMXConnection(false);
} catch (Exception e) {
LOG.error("Failed to close the JMX connection to the DriverMBean", e);
}
}
}
};
/**
* The JPPF client doesn't handle disconnects properly with regards to the JMX connection to the DriverMBean, this lets us know we
* have got the connection back so that we can restart the JMX connection properly.
*/
private final ClientListener clientListener = new ClientListener() {
private final ClientConnectionStatusListener connectionListener = new ClientConnectionStatusListener() {
@Override
public void statusChanged(ClientConnectionStatusEvent event) {
final JPPFClientConnection conn = (JPPFClientConnection) event.getClientConnectionStatusHandler();
if (conn.getStatus() == JPPFClientConnectionStatus.ACTIVE) {
try {
initDriverNotifications();
} catch (Exception e) {
LOG.error("Failed to init the driver notification listener", e);
}
}
}
};
@Override
public void newConnection(ClientEvent event) {
event.getConnection().addClientConnectionStatusListener(connectionListener);
}
@Override
public void connectionFailed(ClientEvent event) {
event.getConnection().removeClientConnectionStatusListener(connectionListener);
}
};
@PostConstruct
protected void init() throws Exception {
LOG.info("Starting the Process Manager");
client = new JPPFClient(clientListener);
// Wait for the client to connect properly
while (!client.hasAvailableConnection()) {
Thread.sleep(1000L);
}
// Get the connection to the server
if (client.getConfig().getBoolean("jppf.remote.execution.enabled", true)) {
// Get the JMX Connection
final Boolean sslEnabled = client.getConfig().getBoolean("jppf.management.ssl.enabled", false);
final String host = client.getConfig().getString("jppf.management.host", "localhost");
final Integer port = client.getConfig().getInt("jppf.management.port", 11198);
jmxConnection = new ExtendedJMXDriverConnectionWrapper(host, port, sslEnabled);
initDriverNotifications();
} else {
resetRequired.set(false);
}
persistenceManager = createPersistenceManager();
// Load and start any saved processes
loadAndStartProcesses();
}
@PreDestroy
protected void destroy() throws Exception {
// Stop any queued processes and wait for running processes to complete
if (numRunningOrQueuedProcesses.get() > 0) {
LOG.info("Stopping and saving all queued or executing processes");
for (final Map.Entry<String, PGProcess> entry : currentLiveProcesses.asMap().entrySet()) {
/*
* Note: We don't wait for processes to try and finish, as by the time the destroy method is closed the DataSource will
* have been closed. This means that we won't be able to save the state so we might as well just kill it straight away.
*/
if (getProcessStatus(entry.getKey(), false).ordinal() < ProcessStatus.COMPLETED.ordinal()) {
saveAndKillProcess(entry.getKey(), persistenceManager);
}
}
}
persistenceManager.close();
// Kill the connection and cleanup
LOG.info("Destroying the Process Manager");
try {
closeJMXConnection(true);
} catch (Exception e) {
LOG.error("Failed to close the JMX connection to the DriverMBean", e);
}
jmxConnection = null;
client.close();
client = null;
}
protected JPPFClient getClient() {
return client;
}
@Lock(LockType.WRITE)
protected synchronized void initDriverNotifications() throws Exception {
// We only need to proceed if a reset is required (ie due to a dropout)
if (!resetRequired.get()) return;
if (jmxConnection != null && !jmxConnection.isConnected()) {
jmxConnection.connectAndWait(-1);
// subscribe to all notifications from the MBean
jmxConnection.addNotificationListener(DriverJobManagementMBean.MBEAN_NAME, listener);
jmxConnection.addConnectionNotificationListener(connectionListener);
resetRequired.set(false);
}
}
@Lock(LockType.WRITE)
protected synchronized void closeJMXConnection(boolean removeListeners) throws Exception {
if (jmxConnection != null && jmxConnection.isConnected()) {
if (removeListeners) {
jmxConnection.removeNotificationListener(DriverJobManagementMBean.MBEAN_NAME, listener);
jmxConnection.removeConnectionNotificationListener(connectionListener);
}
jmxConnection.close();
}
}
/**
* Loads any saved processes and starts them.
*/
protected void loadAndStartProcesses() {
try {
// Load each job that was stopped when the process manager was destroyed
final Collection<String> keys = persistenceManager.allKeys();
if (!keys.isEmpty()) {
LOG.info("Loading and starting saved processes");
// Load each process
for (final String key : keys) {
final PGProcess process = (PGProcess) persistenceManager.loadJob(key);
final Process entity = em.find(Process.class, process.getUuid());
if (entity != null) {
// Init the entity/process
process.setDbEntity(entity);
entity.setName(process.getName());
startProcessInternal(process);
} else {
// This shouldn't happen but incase it does print a warning
LOG.warn("No process exists in the database for " + process.getUuid());
}
}
}
} catch (Exception e) {
LOG.error("Failed to load saved processes", e);
}
persistenceManager.close();
}
protected JobPersistence<String> createPersistenceManager() {
// Load the directory from file
final String processDir = ApplicationConfig.getInstance().getProcessDirectory();
File dir;
if (!isNullOrEmpty(processDir)) {
final File customDir = new File(processDir);
customDir.mkdirs();
if (customDir.isDirectory()) {
dir = customDir;
} else {
dir = new File(DEFAULT_JOB_PERSISTENCE_DIR);
}
} else {
dir = new File(DEFAULT_JOB_PERSISTENCE_DIR);
}
LOG.info("Storing processes in " + dir.getAbsolutePath());
return new DefaultFilePersistenceManager(dir, PROCESS_PREFIX, DefaultFilePersistenceManager.DEFAULT_EXT);
}
/**
* Gets all the current processes that are stored in memory.
*
* @return A list of {@link PGProcess} objects which contain the information about the process.
*/
public Collection<PGProcess> getAllLiveProcesses() {
return Collections.unmodifiableCollection(currentLiveProcesses.asMap().values());
}
/**
* Gets all the current process ids that are stored in memory.
*
* @return A set of UUIDs for all the processes stored in memory.
*/
public Set<String> getAllLiveProcessIds() {
return Collections.unmodifiableSet(currentLiveProcesses.asMap().keySet());
}
/**
* Start a process by submitting it to the JPPF server for processing.
*
* @param process The process to be started.
* @return The UUID of the started process.
* @throws JPPFException Thrown when the process fails to be sent to the JPPF server.
*/
public String startProcess(final PGProcess process) throws JPPFException {
try {
// Save the job incase the server crashes
persistenceManager.storeJob(persistenceManager.computeKey(process), process, Collections.EMPTY_LIST);
// Set the start date on the process
if (process.getDBEntity().getStartTime() != null) {
process.getDBEntity().setStartTime(new Date());
}
startProcessInternal(process);
} catch (Exception e) {
LOG.error("Failed to start process " + process.getName(), e);
}
return process.getUuid();
}
protected void startProcessInternal(final PGProcess process) throws Exception {
final JPPFResultCollector resultCollector = new JPPFResultCollector(process);
process.setResultListener(resultCollector);
resultCollector.addSubmissionStatusListener(new SubmissionStatusListener() {
@Override
public void submissionStatusChanged(SubmissionStatusEvent event) {
// Ignore FAILED events as they probably mean there was a connection issue, so it will try to re-submit later
if (event.getStatus() == SubmissionStatus.COMPLETE) {
processEnded(process);
}
}
});
// Submit the job to the JPPF server
getClient().submitJob(process);
currentLiveProcesses.put(process.getUuid(), process);
numRunningOrQueuedProcesses.getAndIncrement();
}
/**
* Updates the processes database entity when the job has been completed.
*
* @param process The process to be marked as finished and saved to the database.
*/
@Lock(LockType.WRITE)
protected synchronized void processEnded(final PGProcess process) {
// Make sure the process is actually running
if (currentLiveProcesses.getIfPresent(process.getUuid()) == null) {
return;
}
try {
// Start a Transaction
transactionManager.begin();
// Join the transaction we just started
em.joinTransaction();
final Process entity = process.getDBEntity();
entity.setEndTime(new Date());
entity.setLogs(process.getLogs());
// Make sure we have the latest status
if (entity.getStatus() != ProcessStatus.CANCELLED) {
entity.setStatus(getProcessStatus(process));
}
em.merge(entity);
// Commit the changes
transactionManager.commit();
} catch (final Exception ex) {
LOG.error("Failed to end process " + process.getUuid(), ex);
try {
if (transactionManager != null) {
transactionManager.rollback();
}
} catch (final SystemException ex2) {
// nothing to do here
LOG.debug("Failed to rollback the transaction", ex2);
}
}
// Remove the process from the live processes
currentLiveProcesses.invalidate(process.getUuid());
// Decrease the number of running processes
numRunningOrQueuedProcesses.decrementAndGet();
// Cache the results
processResults.put(process.getUuid(), new ProcessResults(process.getResults()));
// Delete the job now that it has been completed
try {
persistenceManager.deleteJob(persistenceManager.computeKey(process));
} catch (JobPersistenceException e) {
LOG.debug("Failed to delete process " + process.getUuid() + " from the persistence store", e);
}
}
/**
* Cancel a process from running.
* <br /><br />
* Note: Once a job has been submitted to a node for processing there isn't any way to cancel it.
*
* @param processId The UUID of the process to be cancelled.
*/
public boolean cancelProcess(final String processId) {
return cancelProcess(processId, false);
}
/**
* Cancel a process from running.
* <br /><br />
* Note: Once a job has been submitted to a node for processing there isn't any way to cancel it.
*
* @param processId The UUID of the process to be cancelled.
* @param force If the process should be forced to cancel, even if it is currently executing.
*/
public boolean cancelProcess(final String processId, boolean force) {
final PGProcess process = currentLiveProcesses.getIfPresent(processId);
if (process != null) {
// Check that the job isn't currently executing, as killing it when it is executing could be very dangerous
final ProcessStatus status = getProcessStatus(processId, false);
if ((force && status == ProcessStatus.EXECUTING) || status == ProcessStatus.QUEUED || status == ProcessStatus.PENDING) {
try {
process.setStatus(ProcessStatus.CANCELLED);
if (jmxConnection != null && jmxConnection.isConnected()) {
jmxConnection.cancelJob(processId);
}
getClient().cancelJob(processId);
processEnded(process);
return true;
} catch (Exception e) {
LOG.error("Failed to cancel process " + processId, e);
}
} else {
throw new IllegalStateException("The process is not in a state that can be cancelled");
}
}
return false;
}
/**
* Get the live process for a specific uuid.
*
* @param processId The Processes UUID.
* @return Null if no process exists with the specified UUID, otherwise a {@link PGProcess} representing the live process will be
* returned.
*/
public PGProcess getLiveProcess(final String processId) {
PGProcess process = currentLiveProcesses.getIfPresent(processId);
if (process != null) {
// Update the state
if (process.getStatus() != ProcessStatus.CANCELLED) {
process.setStatus(getProcessStatus(process.getUuid(), false));
}
}
return process;
}
/**
* Get the process for a specific uuid.
*
* @param processId The Processes UUID.
* @return Null if no process exists with the specified UUID, otherwise a {@link Process} representing the process will be returned.
*/
public Process getProcess(final String processId) {
final PGProcess liveProcess = getLiveProcess(processId);
if (liveProcess != null) {
return liveProcess.getDBEntity();
} else {
return em.find(Process.class, processId);
}
}
/**
* Get the results of a live process.
* <br /><br />
* Note: Process Results are not persisted to the Database, so once a process has been removed from memory the results will be lost.
*
* @param processId The UUID of the process to get results for.
* @return A {@link ProcessResults} that contains all the information about the results of the process,
* or null if no process with the specified UUID exists.
*/
public ProcessResults getProcessResults(final String processId) {
return processResults.getIfPresent(processId);
}
/**
* Get the status of a process.
*
* @param processId The UUID of the process to get the status for.
* @return A {@link ProcessStatus} that contains the state of the process.
*/
public ProcessStatus getProcessStatus(final String processId) {
return getProcessStatus(processId, true);
}
/**
* Get the status of a process.
*
* @param processId The UUID of the process to get the status for.
* @param loadFromDB Whether the status should be loaded from a Database Entity (ie it doesn't exist in memory anymore).
* @return A {@link ProcessStatus} that contains the state of the process.
*/
public ProcessStatus getProcessStatus(final String processId, boolean loadFromDB) {
final PGProcess process = currentLiveProcesses.getIfPresent(processId);
if (process != null) {
return getProcessStatus(process);
} else if (loadFromDB) {
// The process isn't live, so fetch the status from memory
final Process entity = em.find(Process.class, processId);
if (entity == null) {
return null;
} else {
return entity.getStatus();
}
} else {
return null;
}
}
/**
* Get the status of a process.
*
* @param process The process to get the status for.
* @return A {@link ProcessStatus} that contains the state of the process.
*/
protected ProcessStatus getProcessStatus(final PGProcess process) {
final JPPFResultCollector resultCollector = (JPPFResultCollector) process.getResultListener();
final SubmissionStatus status = resultCollector.getStatus();
// If the status is executing then we need to check the job states (which come from the server) and see if it's actually
// executing on the server
if (status == SubmissionStatus.EXECUTING) {
if (jobStates.containsKey(process.getUuid())) {
final JobEventType eventType = jobStates.get(process.getUuid());
if (eventType == JobEventType.JOB_QUEUED) {
return ProcessStatus.QUEUED;
}
}
}
switch (status) {
case SUBMITTED:
return ProcessStatus.PENDING;
case PENDING:
return ProcessStatus.QUEUED;
case FAILED:
return ProcessStatus.FAILED;
case COMPLETE:
if (process.wasSuccessful()) {
return ProcessStatus.COMPLETED;
} else {
return ProcessStatus.FAILED;
}
case EXECUTING:
return ProcessStatus.EXECUTING;
default:
return null;
}
}
/**
* Stops a process and stores its state using the provided persistence manager.
*
* @param processId The UUID of the process to be cancelled.
* @param persistenceManager The process persistence manager, that stores process state.
*/
public boolean saveAndKillProcess(final String processId, final JobPersistence<String> persistenceManager) {
final PGProcess process = currentLiveProcesses.getIfPresent(processId);
if (process != null) {
if (getProcessStatus(processId, false).ordinal() < ProcessStatus.COMPLETED.ordinal()) {
process.setDbEntity(null);
try {
// Update the task with any completed results
final Collection<Task<?>> results = process.getResults().getAllResults();
persistenceManager.storeJob(persistenceManager.computeKey(process), process, new ArrayList<Task<?>>(results));
/*
* Kill the process
*
* Note: Ensure to remove the process from the live processes, first otherwise the processEnded() method will be
* called when we cancel the job.
*/
currentLiveProcesses.invalidate(processId);
numRunningOrQueuedProcesses.decrementAndGet();
if (jmxConnection != null && jmxConnection.isConnected()) {
jmxConnection.cancelJob(processId);
}
getClient().cancelJob(processId);
return true;
} catch (Exception e) {
LOG.error("Failed to save and kill process " + processId, e);
}
}
}
return false;
}
}