/* * Copyright (c) 2010-2013 Evolveum * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.evolveum.midpoint.task.quartzimpl.execution; import com.evolveum.midpoint.prism.PrismObject; import com.evolveum.midpoint.schema.result.OperationResult; import com.evolveum.midpoint.schema.result.OperationResultStatus; import com.evolveum.midpoint.task.api.*; import com.evolveum.midpoint.task.quartzimpl.*; import com.evolveum.midpoint.task.quartzimpl.cluster.ClusterStatusInformation; import com.evolveum.midpoint.util.exception.ObjectNotFoundException; import com.evolveum.midpoint.util.exception.SchemaException; import com.evolveum.midpoint.util.logging.LoggingUtils; import com.evolveum.midpoint.util.logging.Trace; import com.evolveum.midpoint.util.logging.TraceManager; import com.evolveum.midpoint.xml.ns._public.common.common_3.NodeType; import org.jetbrains.annotations.NotNull; import org.quartz.*; import java.util.*; /** * Manages task threads (clusterwide). Concerned mainly with stopping threads and querying their state. * * @author Pavol Mederly */ public class ExecutionManager { private static final transient Trace LOGGER = TraceManager.getTrace(ExecutionManager.class); private static final String DOT_CLASS = ExecutionManager.class.getName() + "."; // the following values would be (some day) part of TaskManagerConfiguration private static final long WAIT_FOR_COMPLETION_INITIAL = 100; // initial waiting time (for task or tasks to be finished); it is doubled at each step private static final long WAIT_FOR_COMPLETION_MAX = 1600; // max waiting time (in one step) for task(s) to be finished private static final long INTERRUPT_TASK_THREAD_AFTER = 5000; // how long to wait before interrupting task thread (if UseThreadInterrupt = 'whenNecessary') private static final long ALLOWED_CLUSTER_STATE_INFORMATION_AGE = 1500L; private TaskManagerQuartzImpl taskManager; private LocalNodeManager localNodeManager; private RemoteNodesManager remoteNodesManager; private TaskSynchronizer taskSynchronizer; private Scheduler quartzScheduler; private ClusterStatusInformation lastClusterStatusInformation = null; public ExecutionManager(TaskManagerQuartzImpl taskManager) { this.taskManager = taskManager; this.localNodeManager = new LocalNodeManager(taskManager); this.remoteNodesManager = new RemoteNodesManager(taskManager); this.taskSynchronizer = new TaskSynchronizer(taskManager); } /* * ==================== NODE-LEVEL METHODS (WITH EFFECTS) ==================== */ public void stopScheduler(String nodeIdentifier, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(this.getClass().getName() + ".stopScheduler"); result.addParam("nodeIdentifier", nodeIdentifier); if (isCurrentNode(nodeIdentifier)) { localNodeManager.stopScheduler(result); } else { remoteNodesManager.stopRemoteScheduler(nodeIdentifier, result); } if (result.isUnknown()) { result.computeStatus(); } } public boolean stopSchedulersAndTasks(Collection<String> nodeIdentifiers, long timeToWait, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(this.getClass().getName() + ".stopSchedulersAndTasks"); result.addCollectionOfSerializablesAsParam("nodeList", nodeIdentifiers); result.addParam("timeToWait", timeToWait); LOGGER.info("Stopping schedulers and tasks on nodes: {}, waiting {} ms for task(s) shutdown.", nodeIdentifiers, timeToWait); for (String nodeIdentifier : nodeIdentifiers) { stopScheduler(nodeIdentifier, result); } ClusterStatusInformation csi = getClusterStatusInformation(true, false, result); Set<ClusterStatusInformation.TaskInfo> taskInfoList = csi.getTasksOnNodes(nodeIdentifiers); LOGGER.debug("{} task(s) found on nodes that are going down, stopping them.", taskInfoList.size()); Set<Task> tasks = new HashSet<>(); for (ClusterStatusInformation.TaskInfo taskInfo : taskInfoList) { try { tasks.add(taskManager.getTask(taskInfo.getOid(), result)); } catch (ObjectNotFoundException e) { LoggingUtils.logException(LOGGER, "Task {} that was about to be stopped does not exist. Ignoring it.", e, taskInfo.getOid()); } catch (SchemaException e) { LoggingUtils.logUnexpectedException(LOGGER, "Task {} that was about to be stopped cannot be read due to schema problem. Ignoring it.", e, taskInfo.getOid()); } } boolean stopped = stopTasksRunAndWait(tasks, csi, timeToWait, true, result); LOGGER.trace("All tasks stopped = " + stopped); result.recordSuccessIfUnknown(); return stopped; } public void startScheduler(String nodeIdentifier, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(this.getClass().getName() + ".startScheduler"); result.addParam("nodeIdentifier", nodeIdentifier); if (isCurrentNode(nodeIdentifier)) { localNodeManager.startScheduler(result); } else { remoteNodesManager.startRemoteScheduler(nodeIdentifier, result); } } public boolean isLocalNodeRunning() { return localNodeManager.isRunning(); } /* * ==================== NODE-LEVEL METHODS (QUERIES) ==================== */ public ClusterStatusInformation getClusterStatusInformation(boolean clusterwide, boolean allowCached, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(ExecutionManager.class.getName() + ".getClusterStatusInformation"); result.addParam("clusterwide", clusterwide); if (allowCached && clusterwide && lastClusterStatusInformation != null && lastClusterStatusInformation.isFresh(ALLOWED_CLUSTER_STATE_INFORMATION_AGE)) { result.recordSuccess(); return lastClusterStatusInformation; } ClusterStatusInformation retval = new ClusterStatusInformation(); if (clusterwide) { for (PrismObject<NodeType> node : taskManager.getClusterManager().getAllNodes(result)) { addNodeAndTaskInformation(retval, node, result); } } else { addNodeAndTaskInformation(retval, taskManager.getClusterManager().getNodePrism(), result); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("cluster state information = {}", retval.dump()); } if (clusterwide) { lastClusterStatusInformation = retval; } result.recomputeStatus(); return retval; } private void addNodeAndTaskInformation(ClusterStatusInformation info, PrismObject<NodeType> node, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(ExecutionManager.class.getName() + ".addNodeAndTaskInformation"); result.addParam("node", node); if (isCurrentNode(node)) { LOGGER.trace("Getting node and task info from the current node ({})", node.asObjectable().getNodeIdentifier()); List<ClusterStatusInformation.TaskInfo> taskInfoList = new ArrayList<>(); Set<Task> tasks = localNodeManager.getLocallyRunningTasks(result); for (Task task : tasks) { taskInfoList.add(new ClusterStatusInformation.TaskInfo(task.getOid())); } node.asObjectable().setExecutionStatus(localNodeManager.getLocalNodeExecutionStatus()); node.asObjectable().setErrorStatus(taskManager.getLocalNodeErrorStatus()); info.addNodeAndTaskInfo(node.asObjectable(), taskInfoList); } else { // if remote LOGGER.debug("Getting running task info from remote node ({}, {})", node.asObjectable().getNodeIdentifier(), node.asObjectable().getHostname()); remoteNodesManager.addNodeStatusFromRemoteNode(info, node, result); } result.recordSuccessIfUnknown(); } /* * ==================== TASK-LEVEL METHODS ==================== * * ---------- STOP TASK AND WAIT METHODS ---------- */ /** * Signals all running tasks that they have to finish. Waits for their completion. * * Terminology: STOP TASK means "tell the task to stop" (using any appropriate means) * * @param timeToWait How long to wait (milliseconds); 0 means forever. * @return true if all the tasks finished within time allotted, false otherwise. */ public boolean stopAllTasksOnThisNodeAndWait(long timeToWait, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(DOT_CLASS + "stopAllTasksOnThisNodeAndWait"); result.addParam("timeToWait", timeToWait); LOGGER.info("Stopping all tasks on local node"); Set<Task> tasks = localNodeManager.getLocallyRunningTasks(result); boolean retval = stopTasksRunAndWait(tasks, null, timeToWait, false, result); result.computeStatus(); return retval; } /** * Stops given set of tasks and waits for their completion. * * @param tasks Tasks to stop. * @param csi Cluster status information. Must be relatively current, i.e. got AFTER a moment preventing new tasks * to be scheduled (e.g. when suspending tasks, CSI has to be taken after tasks have been unscheduled; * when stopping schedulers, CSI has to be taken after schedulers were stopped). May be null; in that case * the method will query nodes themselves. * @param waitTime How long to wait for task stop. Value less than zero means no wait will be performed. * @param clusterwide If false, only tasks running on local node will be stopped. * @return * * Note: does not throw exceptions: it tries hard to stop the tasks, if something breaks, it just return 'false' */ public boolean stopTasksRunAndWait(Collection<Task> tasks, ClusterStatusInformation csi, long waitTime, boolean clusterwide, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(DOT_CLASS + "stopTasksRunAndWait"); result.addArbitraryCollectionAsParam("tasks", tasks); result.addParam("waitTime", waitTime); result.addParam("clusterwide", clusterwide); if (tasks.isEmpty()) { result.recordSuccess(); return true; } LOGGER.trace("Stopping tasks " + tasks + " (waiting " + waitTime + " msec); clusterwide = " + clusterwide); if (clusterwide && csi == null) { csi = getClusterStatusInformation(true, false, result); } for (Task task : tasks) stopTaskRun(task, csi, clusterwide, result); boolean stopped = false; if (waitTime >= 0) { stopped = waitForTaskRunCompletion(tasks, waitTime, clusterwide, result); } result.recordSuccessIfUnknown(); return stopped; } // boolean stopTaskAndWait(Task task, long waitTime, boolean clusterwide) { // ArrayList<Task> list = new ArrayList<Task>(1); // list.add(task); // return stopTasksRunAndWait(list, waitTime, clusterwide); // } // returns true if tasks are down private boolean waitForTaskRunCompletion(Collection<Task> tasks, long maxWaitTime, boolean clusterwide, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(ExecutionManager.class.getName() + ".waitForTaskRunCompletion"); result.addArbitraryCollectionAsParam("tasks", tasks); result.addParam("maxWaitTime", maxWaitTime); result.addParam("clusterwide", clusterwide); boolean interruptExecuted = false; LOGGER.trace("Waiting for task(s) " + tasks + " to complete, at most for " + maxWaitTime + " ms."); Set<String> oids = new HashSet<>(); for (Task t : tasks) if (t.getOid() != null) oids.add(t.getOid()); long singleWait = WAIT_FOR_COMPLETION_INITIAL; long started = System.currentTimeMillis(); for(;;) { boolean isAnythingExecuting = false; ClusterStatusInformation rtinfo = getClusterStatusInformation(clusterwide, false, result); for (String oid : oids) { if (rtinfo.findNodeInfoForTask(oid) != null) { isAnythingExecuting = true; break; } } if (!isAnythingExecuting) { String message = "The task(s), for which we have been waiting for, have finished."; LOGGER.trace(message); result.recordStatus(OperationResultStatus.SUCCESS, message); return true; } if (maxWaitTime > 0 && System.currentTimeMillis() - started >= maxWaitTime) { String message = "Wait time has elapsed without (some of) tasks being stopped. Finishing waiting for task(s) completion."; LOGGER.trace(message); result.recordWarning(message); return false; } if (getConfiguration().getUseThreadInterrupt() == UseThreadInterrupt.WHEN_NECESSARY && !interruptExecuted && System.currentTimeMillis() - started >= INTERRUPT_TASK_THREAD_AFTER) { LOGGER.info("Some tasks have not completed yet, sending their threads the 'interrupt' signal (if running locally)."); for (String oid : oids) { localNodeManager.interruptLocalTaskThread(oid); } interruptExecuted = true; } LOGGER.trace("Some tasks have not completed yet, waiting for " + singleWait + " ms (max: " + maxWaitTime + ")"); try { Thread.sleep(singleWait); } catch (InterruptedException e) { LOGGER.trace("Waiting interrupted" + e); } if (singleWait < WAIT_FOR_COMPLETION_MAX) singleWait *= 2; } } // if clusterwide, csi must not be null // on entry we do not know if the task is really running private void stopTaskRun(Task task, ClusterStatusInformation csi, boolean clusterwide, OperationResult parentResult) { String oid = task.getOid(); LOGGER.trace("stopTaskRun: task = {}, csi = {}, clusterwide = {}", task, csi, clusterwide); if (!clusterwide) { stopLocalTaskIfRunning(oid, parentResult); } else { NodeType node = csi.findNodeInfoForTask(task.getOid()); if (node != null) { if (taskManager.getClusterManager().isCurrentNode(node.getNodeIdentifier())) { stopLocalTaskIfRunning(oid, parentResult); } else { remoteNodesManager.stopRemoteTaskRun(task.getOid(), node, parentResult); } } } } private void stopLocalTaskIfRunning(String oid, OperationResult parentResult) { if (localNodeManager.isTaskThreadActiveLocally(oid)) { localNodeManager.stopLocalTaskRun(oid, parentResult); } } /* * ---------- TASK SCHEDULING METHODS ---------- */ public void unscheduleTask(Task task, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(DOT_CLASS + "unscheduleTask"); //TriggerKey triggerKey = TaskQuartzImplUtil.createTriggerKeyForTask(task); JobKey jobKey = TaskQuartzImplUtil.createJobKeyForTask(task); try { for (Trigger trigger : quartzScheduler.getTriggersOfJob(jobKey)) { quartzScheduler.unscheduleJob(trigger.getKey()); } result.recordSuccess(); } catch (SchedulerException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot unschedule task {}", e, task); result.recordFatalError("Cannot unschedule task " + task, e); } } /** * Removes task from quartz. On error, creates a subresult in parent OperationResult. (On success, does nothing to keep ORs from becoming huge.) * * @param oid Task OID * @return true if the job was successfully removed. */ public boolean removeTaskFromQuartz(String oid, OperationResult parentResult) { JobKey jobKey = TaskQuartzImplUtil.createJobKeyForTaskOid(oid); try { quartzScheduler.deleteJob(jobKey); return true; } catch (SchedulerException e) { String message = "Cannot delete task " + oid + " from Quartz job store"; LoggingUtils.logUnexpectedException(LOGGER, message, e); parentResult.createSubresult(DOT_CLASS + "removeTaskFromQuartz").recordFatalError(message, e); return false; } } /* * ==================== THREAD QUERY METHODS ==================== */ // boolean isTaskThreadActiveClusterwide(String oid) { // ClusterStatusInformation info = getClusterStatusInformation(true); // return info.findNodeInfoForTask(oid) != null; // } /* * Various auxiliary methods */ // private OperationResult createOperationResult(String methodName) { // return new OperationResult(ExecutionManager.class.getName() + "." + methodName); // } // // private ClusterManager getClusterManager() { // return taskManager.getClusterManager(); // } void setQuartzScheduler(Scheduler quartzScheduler) { this.quartzScheduler = quartzScheduler; } public Scheduler getQuartzScheduler() { return quartzScheduler; } private boolean isCurrentNode(String nodeIdentifier) { return taskManager.getClusterManager().isCurrentNode(nodeIdentifier); } private boolean isCurrentNode(PrismObject<NodeType> node) { return taskManager.isCurrentNode(node); } private TaskManagerConfiguration getConfiguration() { return taskManager.getConfiguration(); } public void shutdownLocalScheduler() throws TaskManagerException { localNodeManager.shutdownScheduler(); } /** * Robust version of 'shutdownScheduler', ignores exceptions, shuts down the scheduler only if not shutdown already. * Used for emergency situations, e.g. node error. */ public void shutdownLocalSchedulerChecked() { try { localNodeManager.shutdownScheduler(); } catch (TaskManagerException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot shutdown scheduler.", e); } } public boolean stopSchedulerAndTasksLocally(long timeToWait, OperationResult result) { return localNodeManager.stopSchedulerAndTasks(timeToWait, result); } public void synchronizeTask(TaskQuartzImpl task, OperationResult result) { taskSynchronizer.synchronizeTask(task, result); } public TaskManagerQuartzImpl.NextStartTimes getNextStartTimes(@NotNull String oid, boolean retrieveNextRunStartTime, boolean retrieveRetryTime, OperationResult result) { try { if (retrieveNextRunStartTime && !retrieveRetryTime) { Trigger standardTrigger = quartzScheduler.getTrigger(TaskQuartzImplUtil.createTriggerKeyForTaskOid(oid)); result.recordSuccess(); return new TaskManagerQuartzImpl.NextStartTimes(standardTrigger, null); } else if (retrieveNextRunStartTime || retrieveRetryTime) { List<? extends Trigger> triggers = quartzScheduler .getTriggersOfJob(TaskQuartzImplUtil.createJobKeyForTaskOid(oid)); Trigger standardTrigger = null; Trigger nextRetryTrigger = null; for (Trigger trigger : triggers) { if (oid.equals(trigger.getKey().getName())) { standardTrigger = trigger; } else { if (willOccur(trigger) && (nextRetryTrigger == null || isBefore(trigger, nextRetryTrigger))) { nextRetryTrigger = trigger; } } } result.recordSuccess(); return new TaskManagerQuartzImpl.NextStartTimes( retrieveNextRunStartTime ? standardTrigger : null, nextRetryTrigger); // retrieveRetryTime is always true here } else { return new TaskManagerQuartzImpl.NextStartTimes(null, null); // shouldn't occur } } catch (SchedulerException e) { String message = "Cannot determine next start times for task with OID " + oid; LoggingUtils.logUnexpectedException(LOGGER, message, e); result.recordFatalError(message, e); return null; } } // null means "never" private boolean isBefore(Trigger t1, Trigger t2) { Date date1 = t1.getNextFireTime(); Date date2 = t2.getNextFireTime(); return date1 != null && (date2 == null || date1.getTime() < date2.getTime()); } private boolean willOccur(Trigger t) { return t.getNextFireTime() != null && t.getNextFireTime().getTime() >= System.currentTimeMillis(); } public boolean synchronizeJobStores(OperationResult result) { return taskSynchronizer.synchronizeJobStores(result); } public Set<Task> getLocallyRunningTasks(OperationResult parentResult) { return localNodeManager.getLocallyRunningTasks(parentResult); } public void initializeLocalScheduler() throws TaskManagerInitializationException { localNodeManager.initializeScheduler(); } public void reRunClosedTask(Task task, OperationResult parentResult) throws SchemaException, ObjectNotFoundException { OperationResult result = parentResult.createSubresult(DOT_CLASS + "reRunClosedTask"); if (task.getExecutionStatus() != TaskExecutionStatus.CLOSED) { String message = "Task " + task + " cannot be re-run, because it is not in CLOSED state."; result.recordFatalError(message); LOGGER.error(message); return; } if (!task.isSingle()) { String message = "Closed recurring task " + task + " cannot be re-run, because this operation is not available for recurring tasks. Please use RESUME instead."; result.recordWarning(message); LOGGER.warn(message); return; } taskSynchronizer.synchronizeTask((TaskQuartzImpl) task, result); // this should remove any triggers ((TaskQuartzImpl) task).setRecreateQuartzTrigger(true); ((TaskQuartzImpl) task).setExecutionStatusImmediate(TaskExecutionStatus.RUNNABLE, result); // this will create the trigger result.recordSuccess(); // note that if scheduling (not executes before/after) prevents the task from running, it will not run! } public void scheduleRunnableTaskNow(Task task, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(DOT_CLASS + "scheduleRunnableTaskNow"); if (task.getExecutionStatus() != TaskExecutionStatus.RUNNABLE) { String message = "Task " + task + " cannot be scheduled, because it is not in RUNNABLE state."; result.recordFatalError(message); LOGGER.error(message); return; } // for loosely-bound, recurring, interval-based tasks we reschedule the task in order to start immediately // and then continue after specified interval (i.e. NOT continue according to original schedule) - MID-1410 if (!getConfiguration().isRunNowKeepsOriginalSchedule() && task.isLooselyBound() && task.isCycle() && task.getSchedule() != null && task.getSchedule().getInterval() != null && task.getSchedule().getInterval() != 0) { LOGGER.trace("'Run now' for task invoked: unscheduling and rescheduling it; task = {}", task); unscheduleTask(task, result); ((TaskQuartzImpl) task).setRecreateQuartzTrigger(true); synchronizeTask((TaskQuartzImpl) task, result); result.computeStatus(); return; } // otherwise, we simply add another trigger to this task Trigger now = TaskQuartzImplUtil.createTriggerNowForTask(task); try { quartzScheduler.scheduleJob(now); result.recordSuccess(); } catch (SchedulerException e) { String message = "Task " + task + " cannot be scheduled: " + e.getMessage(); result.recordFatalError(message, e); LOGGER.error(message); } } // nodeId should not be the current node void redirectTaskToNode(@NotNull Task task, @NotNull NodeType node, @NotNull OperationResult result) { remoteNodesManager.redirectTaskToNode(task, node, result); } public void pauseTaskJob(Task task, OperationResult parentResult) { OperationResult result = parentResult.createSubresult(DOT_CLASS + "pauseTaskJob"); JobKey jobKey = TaskQuartzImplUtil.createJobKeyForTask(task); TriggerKey standardTriggerKey = TaskQuartzImplUtil.createTriggerKeyForTask(task); try { for (Trigger trigger : quartzScheduler.getTriggersOfJob(jobKey)) { if (standardTriggerKey.equals(trigger.getKey())) { LOGGER.trace("Suspending {}: pausing standard trigger {}", task, trigger); quartzScheduler.pauseTrigger(trigger.getKey()); } else { LOGGER.trace("Suspending {}: deleting non-standard trigger {}", task, trigger); quartzScheduler.unscheduleJob(trigger.getKey()); } } result.recordSuccess(); } catch (SchedulerException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot pause job for task {}", e, task); result.recordFatalError("Cannot pause job for task " + task, e); } } }