/* * RHQ Management Platform * Copyright (C) 2005-2014 Red Hat, Inc. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package org.rhq.core.pc.operation; import java.util.EnumSet; import java.util.Set; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.rhq.core.clientapi.agent.PluginContainerException; import org.rhq.core.clientapi.agent.operation.CancelResults; import org.rhq.core.clientapi.agent.operation.CancelResults.InterruptedState; import org.rhq.core.clientapi.agent.operation.OperationAgentService; import org.rhq.core.clientapi.server.operation.OperationServerService; import org.rhq.core.domain.configuration.Configuration; import org.rhq.core.domain.configuration.ConfigurationUtility; import org.rhq.core.domain.configuration.PropertySimple; import org.rhq.core.domain.operation.OperationDefinition; import org.rhq.core.domain.resource.ResourceType; import org.rhq.core.pc.ContainerService; import org.rhq.core.pc.PluginContainerConfiguration; import org.rhq.core.pc.agent.AgentService; import org.rhq.core.pc.agent.AgentServiceStreamRemoter; import org.rhq.core.pc.operation.OperationInvocation.Status; import org.rhq.core.pc.util.ComponentUtil; import org.rhq.core.pc.util.FacetLockType; import org.rhq.core.pc.util.LoggingThreadFactory; import org.rhq.core.pluginapi.operation.OperationFacet; import org.rhq.core.util.exception.WrappedRemotingException; /** * Manages the scheduling and invocation of operations for all resources across all plugins. * * <p>This is an agent service; its interface is made remotely accessible if this is deployed within the agent.</p> * * @author Ian Springer * @author John Mazzitelli */ public class OperationManager extends AgentService implements OperationAgentService, ContainerService { private static final String SENDER_THREAD_POOL_NAME = "OperationManager.invoker"; private static final Log log = LogFactory.getLog(OperationManager.class); private final PluginContainerConfiguration configuration; private final Timer timer; private final OperationThreadPoolGateway operationGateway; public OperationManager(PluginContainerConfiguration configuration, AgentServiceStreamRemoter streamRemoter) { super(OperationAgentService.class, streamRemoter); this.configuration = configuration; timer = new Timer(SENDER_THREAD_POOL_NAME + ".timeout-timer"); // read the javadoc on ThreadPoolExecutor and how max pool size is affected when using LinkedBlockingQueue LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(10000); LoggingThreadFactory threadFactory = new LoggingThreadFactory(SENDER_THREAD_POOL_NAME, true); int maxPoolSize = configuration.getOperationInvokerThreadPoolSize(); ThreadPoolExecutor threadPool = new ThreadPoolExecutor(maxPoolSize, maxPoolSize, 1000, TimeUnit.MILLISECONDS, queue, threadFactory); operationGateway = new OperationThreadPoolGateway(threadPool); } /** * This will shutdown the operation thread pool and attempt to cancel operations already in progress. Note that when * this method returns, you are not guaranteed that all operations are finished. If there were one or more * long-lived operations that do not want to be canceled (that is, they ignore the thread interrupt they will * receive and do not terminate promptly), those operations will still be running when this returns. * * @see ContainerService#shutdown() */ public void shutdown() { timer.cancel(); operationGateway.shutdown(); } public void invokeOperation(@NotNull final String jobId, final int resourceId, @NotNull final String operationName, @Nullable final Configuration parameterConfig) throws PluginContainerException { invokeOperation(jobId, resourceId, operationName, parameterConfig, getOperationServerService()); } /** * Not tying this call to a particular {@link OperationServerService} implementation allows other internal classes * to call this method and receive the results, rather than having them sent to the server. */ protected void invokeOperation(final String jobId, final int resourceId, final String operationName, final Configuration parameterConfig, final OperationServerService operationServerService) throws PluginContainerException { try { final OperationInvocation[] theJob = new OperationInvocation[1]; // need array so we can use it in the timer task final long invocationTime = System.currentTimeMillis(); OperationDefinition operationDefinition = getOperationDefinition(resourceId, operationName); if (operationDefinition != null && operationDefinition.getParametersConfigurationDefinition() != null && parameterConfig != null) { ConfigurationUtility.normalizeConfiguration(parameterConfig, operationDefinition.getParametersConfigurationDefinition(), true, true); } // create our timer task that will force the operation invocation to time out if it takes too long to complete final long operationTimeout = getOperationTimeout(operationDefinition, parameterConfig); // ensure the facet method timeout is comfortably longer than the operation timeout long facetMethodTimeout = operationTimeout + (10 * 1000L); final OperationFacet operationComponent = getOperationFacet(resourceId, facetMethodTimeout); final TimerTask timerTask = new TimerTask() { // TIMER TASK THREAD - waits until the timeout time expires - if this is not canceled before the timeout hits, // the operation invocation thread is interrupted and the server will be told the operation has timed out. @Override public void run() { if (theJob[0] != null) { theJob[0].markAsTimedOut(); } } }; timer.schedule(timerTask, operationTimeout); theJob[0] = new OperationInvocation(resourceId, invocationTime, timerTask, parameterConfig, jobId, operationName, operationComponent, operationServerService, operationGateway, operationDefinition); operationGateway.submit(theJob[0]); } catch (Exception e) { log.warn("Failed to submit operation invocation request", e); throw new PluginContainerException("Failed to submit invocation request. resource=[" + resourceId + "], operation=[" + operationName + "], jobId=[" + jobId + "]", new WrappedRemotingException(e)); } return; } public CancelResults cancelOperation(String jobId) { OperationInvocation operation = operationGateway.getOperationInvocation(jobId); /* * there is a small window of time during which the user sees the operation as INPROGRESS (and thus available * for cancellation). the user click on the cancel button, but by the time the request makes it down to the * agent the operation has completed (or failed). in this case, asking for the operation from the gateway will * fail with a NullPointerException. here, we assume the job has finished and return that to the caller. */ if (operation == null) { return new CancelResults(InterruptedState.FINISHED); } EnumSet<Status> interruptedStatus = operation.markAsCanceled(); // tell the caller what state the operation was in when it was canceled if (interruptedStatus.contains(Status.FINISHED)) { return new CancelResults(InterruptedState.FINISHED); } else if (interruptedStatus.contains(Status.QUEUED)) { return new CancelResults(InterruptedState.QUEUED); } else if (interruptedStatus.contains(Status.RUNNING)) { return new CancelResults(InterruptedState.RUNNING); } return new CancelResults(InterruptedState.UNKNOWN); } /** * Given a resource, this obtains that resource's OperationFacet interface. If it does not support the operation * facet, an exception is thrown. The resource does *not* need to be in the STARTED (i.e. connected) state. * * @param resourceId identifies the resource that is to have the operation invoked on it * * @return the resource's operation facet component * * @throws PluginContainerException on error */ protected OperationFacet getOperationFacet(int resourceId, long facetMethodTimeout) throws PluginContainerException { return ComponentUtil.getComponent(resourceId, OperationFacet.class, FacetLockType.WRITE, facetMethodTimeout, false, false, true); } /** * Given a resource ID, this obtains that resource's type. * * @param resourceId identifies the resource whose type is to be returned * * @return the resource's type, if known * * @throws PluginContainerException if cannot determine the resource's type */ protected ResourceType getResourceType(int resourceId) throws PluginContainerException { return ComponentUtil.getResourceType(resourceId); } /** * If the invocation passed in a {@link OperationDefinition#TIMEOUT_PARAM_NAME timeout property} in the * configuration, it is used. If that is not set, but the operation metadata defines a timeout, it is used. If * neither of those are set, the plugin container's default timeout is used. The timeouts are always specified in * seconds. * * @param paramConfig * * @return the timeout to use * * @throws PluginContainerException if the timeout found was invalid */ private long getOperationTimeout(OperationDefinition operationDefinition, Configuration paramConfig) throws PluginContainerException { // see if this particular invocation has overridden all timeout defaults with its own if (paramConfig != null) { PropertySimple timeoutProperty = paramConfig.getSimple(OperationDefinition.TIMEOUT_PARAM_NAME); if (timeoutProperty != null) { try { paramConfig.remove(timeoutProperty.getName()); // we have to remove it since ResourceComponents are not expecting it Long timeout = timeoutProperty.getLongValue(); if (timeout != null) { return timeout * 1000L; } } catch (Exception e) { throw new PluginContainerException("The timeout specified in the configuration was invalid: " + timeoutProperty); } } } // see if the operation metadata defines the timeout if ((operationDefinition != null) && (operationDefinition.getTimeout() != null)) { return operationDefinition.getTimeout().longValue() * 1000L; } // use the PC's default since we can't find it anywhere else return configuration.getOperationInvocationTimeout() * 1000L; } /** * Returns the operation definition for the operation with the specified name on the * {@link org.rhq.core.domain.resource.Resource} with the specified id. * * @param resourceId a <code>Resource</code> id * @param operationName an operation name * * @return the operation definition for the operation with the specified name on the <code>Resource</code> with the * specified id * * @throws PluginContainerException if the resource type could not be determined for the specified <code> * Resource</code> id */ @Nullable private OperationDefinition getOperationDefinition(int resourceId, String operationName) throws PluginContainerException { ResourceType resourceType = getResourceType(resourceId); Set<OperationDefinition> operationDefinitions = resourceType.getOperationDefinitions(); if (operationDefinitions != null) { for (OperationDefinition operationDefinition : operationDefinitions) { if (operationDefinition.getName().equals(operationName)) { return operationDefinition; } } } return null; } /** * If this manager can talk to a server-side {@link OperationServerService}, a proxy to that service is returned. * * @return the server-side proxy; <code>null</code> if this manager doesn't have a server to talk to */ private OperationServerService getOperationServerService() { if (configuration.getServerServices() != null) { return configuration.getServerServices().getOperationServerService(); } return null; } }