/* * ProActive Parallel Suite(TM): * The Open Source library for parallel and distributed * Workflows & Scheduling, Orchestration, Cloud Automation * and Big Data Analysis on Enterprise Grids & Clouds. * * Copyright (c) 2007 - 2017 ActiveEon * Contact: contact@activeeon.com * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation: version 3 of * the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * If needed, contact us to obtain a release under GPL Version 2 or 3 * or a different license than the AGPL. */ package org.ow2.proactive.resourcemanager.nodesource; import java.security.Permission; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import org.objectweb.proactive.Body; import org.objectweb.proactive.InitActive; import org.objectweb.proactive.RunActive; import org.objectweb.proactive.Service; import org.objectweb.proactive.annotation.ImmediateService; import org.objectweb.proactive.api.PAActiveObject; import org.objectweb.proactive.api.PAFuture; import org.objectweb.proactive.core.node.Node; import org.objectweb.proactive.core.node.NodeFactory; import org.objectweb.proactive.core.util.wrapper.BooleanWrapper; import org.objectweb.proactive.core.util.wrapper.IntWrapper; import org.objectweb.proactive.extensions.annotation.ActiveObject; import org.ow2.proactive.authentication.principals.IdentityPrincipal; import org.ow2.proactive.authentication.principals.TokenPrincipal; import org.ow2.proactive.authentication.principals.UserNamePrincipal; import org.ow2.proactive.permissions.PrincipalPermission; import org.ow2.proactive.resourcemanager.authentication.Client; import org.ow2.proactive.resourcemanager.common.event.RMEventType; import org.ow2.proactive.resourcemanager.common.event.RMNodeEvent; import org.ow2.proactive.resourcemanager.common.event.RMNodeSourceEvent; import org.ow2.proactive.resourcemanager.core.RMCore; import org.ow2.proactive.resourcemanager.core.properties.PAResourceManagerProperties; import org.ow2.proactive.resourcemanager.exception.AddingNodesException; import org.ow2.proactive.resourcemanager.exception.RMException; import org.ow2.proactive.resourcemanager.frontend.RMMonitoringImpl; import org.ow2.proactive.resourcemanager.nodesource.infrastructure.InfrastructureManager; import org.ow2.proactive.resourcemanager.nodesource.policy.AccessType; import org.ow2.proactive.resourcemanager.nodesource.policy.NodeSourcePolicy; import org.ow2.proactive.resourcemanager.rmnode.AbstractRMNode; import org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode; import org.ow2.proactive.resourcemanager.rmnode.RMNode; import org.ow2.proactive.resourcemanager.rmnode.RMNodeImpl; import org.ow2.proactive.resourcemanager.utils.RMNodeStarter; import com.google.common.annotations.VisibleForTesting; /** * Abstract class designed to manage a NodeSource. A NodeSource active object is * designed to manage acquisition, monitoring and removing of a set of * {@link Node} objects in the Resource Manager. Each node source consists * of two entities {@link InfrastructureManager} and {@link NodeSourcePolicy}. <BR> * * Each particular {@link InfrastructureManager} defines a specific infrastructure and ways * to manipulate nodes. <BR> * * The node source policy {@link NodeSourcePolicy} defines a strategy of nodes acquisition. * It can be static acquiring nodes once and forever or dynamic where nodes acquisition takes * into account different external factors such as time, scheduling state, etc. * */ @ActiveObject public class NodeSource implements InitActive, RunActive { private static Logger logger = Logger.getLogger(NodeSource.class); private int pingFrequency = PAResourceManagerProperties.RM_NODE_SOURCE_PING_FREQUENCY.getValueAsInt(); /** Default name for NS with local nodes started with the Scheduler by default */ public static final String DEFAULT_LOCAL_NODES_NODE_SOURCE_NAME = "LocalNodes"; public static final String DEFAULT = "Default"; public static final int INTERNAL_POOL = 0; public static final int EXTERNAL_POOL = 1; /** unique name of the source */ private final String name; private final InfrastructureManager infrastructureManager; private final NodeSourcePolicy nodeSourcePolicy; private final String description; private final RMCore rmcore; // The url used by spawn nodes to register themself private final String registrationURL; private boolean toShutdown = false; // all nodes except down private Map<String, Node> nodes; private Map<String, Node> downNodes; private static ThreadPoolHolder threadPoolHolder; private NodeSource stub; private final Client administrator; // to be able to emit rmdeployingnode related events private final transient RMMonitoringImpl monitoring; // admin can remove node source, add nodes to the node source, remove any node // it is a PrincipalPermission of the user who created this node source private final Permission adminPermission; // provider can add nodes to the node source, remove only its nodes // level is configured by ns admin at the moment of ns creation // NOTE: the administrator is always the provider because each provider is one of those: // ns creator, ns creator groups, all private final Permission providerPermission; // user can get nodes for running computations // level is configured by ns admin at the moment of ns creation private AccessType nodeUserAccessType; static { try { int maxThreads = PAResourceManagerProperties.RM_NODESOURCE_MAX_THREAD_NUMBER.getValueAsInt(); if (maxThreads < 2) { maxThreads = 2; } // executor service initialization NodeSource.threadPoolHolder = new ThreadPoolHolder(new int[] { maxThreads / 2, maxThreads / 2 }); } catch (Exception e) { logger.error("Could not initialize threadPoolHolder", e); } } /** * Creates a new instance of NodeSource. * This constructor is used by Proactive as one of requirements for active objects. */ public NodeSource() { registrationURL = null; name = null; infrastructureManager = null; nodeSourcePolicy = null; description = null; rmcore = null; administrator = null; adminPermission = null; providerPermission = null; monitoring = null; } /** * Creates a new instance of NodeSource. * * @param name node source name * @param registrationURL the url used by the spawn nodes to register * @param im underlying infrastructure manager * @param policy nodes acquisition policy * @param rmcore resource manager core */ public NodeSource(String registrationURL, String name, Client provider, InfrastructureManager im, NodeSourcePolicy policy, RMCore rmcore, RMMonitoringImpl monitor) { this.registrationURL = registrationURL; this.name = name; this.administrator = provider; this.infrastructureManager = im; this.nodeSourcePolicy = policy; this.rmcore = rmcore; this.monitoring = monitor; this.description = "Infrastructure: " + im + ", Policy: " + policy; this.nodes = Collections.synchronizedMap(new HashMap<String, Node>()); this.downNodes = Collections.synchronizedMap(new HashMap<String, Node>()); // node source admin permission // it's the PrincipalPermission of the user who created the node source this.adminPermission = new PrincipalPermission(provider.getName(), provider.getSubject().getPrincipals(UserNamePrincipal.class)); // creating node source provider permission // could be one of the following: PrincipalPermission (NS creator) or PrincipalPermission (NS creator groups) // or PrincipalPermission (anyone) this.providerPermission = new PrincipalPermission(provider.getName(), nodeSourcePolicy.getProviderAccessType() .getIdentityPrincipals(provider)); this.nodeUserAccessType = nodeSourcePolicy.getUserAccessType(); } /** * Initialization of node source. Creates and activates a pinger to monitor nodes. * * @param body active object body */ public void initActivity(Body body) { stub = (NodeSource) PAActiveObject.getStubOnThis(); infrastructureManager.setNodeSource(this); nodeSourcePolicy.setNodeSource((NodeSource) PAActiveObject.getStubOnThis()); Thread.currentThread().setName("Node Source \"" + name + "\""); } public void runActivity(Body body) { Service service = new Service(body); long timeStamp = System.currentTimeMillis(); long delta = 0; // recalculating nodes number only once per policy period while (body.isActive()) { try { service.blockingServeOldest(pingFrequency); delta += System.currentTimeMillis() - timeStamp; timeStamp = System.currentTimeMillis(); if (delta > pingFrequency) { logger.info("[" + name + "] Pinging alive nodes : " + getAliveNodes().size()); for (Node node : getAliveNodes()) { pingNode(node); } delta = 0; } } catch (InterruptedException e) { logger.warn("runActivity interrupted", e); } } } /** * Updates internal node source structures. */ @VisibleForTesting RMDeployingNode internalAddNode(Node node) throws RMException { String nodeUrl = node.getNodeInformation().getURL(); if (this.nodes.containsKey(nodeUrl)) { throw new RMException("The node " + nodeUrl + " already added to the node source " + name); } logger.info("[" + name + "] new node available : " + node.getNodeInformation().getURL()); RMDeployingNode rmDeployingNode = infrastructureManager.internalRegisterAcquiredNode(node); nodes.put(nodeUrl, node); return rmDeployingNode; } /** * Acquires the existing node with specific url. The node have to be up and running. * * @param nodeUrl the url of the node * @param provider */ public BooleanWrapper acquireNode(String nodeUrl, Client provider) { if (toShutdown) { throw new AddingNodesException("[" + name + "] node " + nodeUrl + " adding request discarded because node source is shutting down"); } // checking that client has a right to change this node source // if the provider is the administrator of the node source it always has this permission provider.checkPermission(providerPermission, provider + " is not authorized to add node " + nodeUrl + " to " + name); // lookup for a new Node int lookUpTimeout = PAResourceManagerProperties.RM_NODELOOKUP_TIMEOUT.getValueAsInt(); Node nodeToAdd = null; try { logger.info("Looking up the node " + nodeUrl + " with " + lookUpTimeout + " ms timeout"); nodeToAdd = lookupNode(nodeUrl, lookUpTimeout); logger.info("The node " + nodeUrl + " has been successfully looked up"); } catch (Exception e) { logger.warn("Cannot look up the node " + nodeUrl + " within " + lookUpTimeout + " ms due to " + e.getMessage(), e); throw new AddingNodesException(e); } // node should be not null at this point... if (nodeToAdd == null) { throw new AddingNodesException("Cannot lookup node for unknown reason : " + nodeUrl); } // the node with specified url was successfully looked up // now checking if this node has been registered before in the node source if (downNodes.containsKey(nodeUrl)) { // it was registered but detected as down node, // so basically the node was restarted. // adding a new node and removing old one from the down list logger.debug("Removing existing node from down nodes list"); BooleanWrapper result = rmcore.removeNodeFromCore(nodeUrl); if (result.getBooleanValue()) { if (logger.isDebugEnabled()) logger.debug("[" + name + "] successfully removed node " + nodeUrl + " from the core"); // just removing it from down nodes list removeNode(nodeUrl, provider); } } else if (nodes.containsKey(nodeUrl)) { // adding a node which exists in node source Node existingNode = nodes.get(nodeUrl); if (nodeToAdd.equals(existingNode)) { // adding the same node twice // don't do anything if (logger.isDebugEnabled()) logger.debug("An attempt to add the same node twice " + nodeUrl + " - ignoring"); return new BooleanWrapper(false); } else { // adding another node with the same url // replacing the old node by the new one logger.debug("Removing existing node from the RM without request propagation to the infrastructure manager"); BooleanWrapper result = rmcore.removeNodeFromCore(nodeUrl); if (result.getBooleanValue()) { if (logger.isDebugEnabled()) logger.debug("[" + name + "] successfully removed node " + nodeUrl + " from the core"); // removing it from the nodes list but don't propagate // the request the the infrastructure because the restarted node will be killed nodes.remove(nodeUrl); } } } // if any exception occurs in internalAddNode(node) do not add the node to the core RMDeployingNode deployingNode; try { deployingNode = internalAddNode(nodeToAdd); } catch (RMException e) { throw new AddingNodesException(e); } //we build the rmnode RMNode rmNode = buildRMNode(nodeToAdd, provider); if (deployingNode != null) { // inherit locking status from associated deploying node created before ((AbstractRMNode) rmNode).copyLockStatusFrom(deployingNode); } //we notify the configuration of the node to the rmcore //it then will be seen as "configuring" rmcore.internalRegisterConfiguringNode(rmNode); return new BooleanWrapper(true); } /** * Builds a RMNode from a raw Node * @param node the node object * @param provider the client of the request * @return the expected RMNode */ private RMNode buildRMNode(Node node, Client provider) { // creating a node access permission // it could be either PROVIDER/PROVIDER_GROUPS and in this case // the provider principals will be taken or // ME/MY_GROUPS (ns creator/ns creator groups) and in this case // creator's principals will be used Client permissionOwner = administrator; if (nodeUserAccessType.equals(AccessType.PROVIDER) || nodeUserAccessType.equals(AccessType.PROVIDER_GROUPS)) { permissionOwner = provider; } // now selecting the type (user or group) and construct the permission Set<IdentityPrincipal> principals = (Set<IdentityPrincipal>) nodeUserAccessType.getIdentityPrincipals(permissionOwner); boolean tokenInNode = false; boolean tokenInNodeSource = nodeUserAccessType.getTokens() != null && nodeUserAccessType.getTokens().length > 0; try { String nodeAccessToken = node.getProperty(RMNodeStarter.NODE_ACCESS_TOKEN); tokenInNode = nodeAccessToken != null && nodeAccessToken.length() > 0; if (tokenInNode) { logger.debug("Node " + node.getNodeInformation().getURL() + " is protected by access token " + nodeAccessToken); // it overrides all other principals principals.clear(); principals.add(new TokenPrincipal(nodeAccessToken)); } } catch (Exception e) { throw new AddingNodesException(e); } PrincipalPermission nodeAccessPermission = new PrincipalPermission(node.getNodeInformation().getURL(), principals); RMNodeImpl rmnode = new RMNodeImpl(node, stub, provider, nodeAccessPermission); rmnode.setProtectedByToken(tokenInNode || tokenInNodeSource); return rmnode; } public boolean setNodeAvailable(RMNode node) { Node proactiveProgrammingNode = node.getNode(); String proactiveProgrammingNodeUrl = proactiveProgrammingNode.getNodeInformation().getURL(); Node downNode = downNodes.remove(proactiveProgrammingNodeUrl); if (downNode != null) { logger.info("Setting node as available: " + proactiveProgrammingNodeUrl); nodes.put(proactiveProgrammingNodeUrl, proactiveProgrammingNode); infrastructureManager.onDownNodeReconnection(proactiveProgrammingNode); return true; } else { logger.info("Node state not changed since it is unknown: " + proactiveProgrammingNodeUrl); return false; } } public RMDeployingNode update(RMDeployingNode rmNode) { return infrastructureManager.update(rmNode); } public boolean setDeploying(RMDeployingNode deployingNode) { return rmcore.setDeploying(deployingNode); } /** * Looks up the node */ private class NodeLocator implements Callable<Node> { private String nodeUrl; public NodeLocator(String url) { nodeUrl = url; } public Node call() throws Exception { Node node = NodeFactory.getNode(nodeUrl); return node; } } /** * Lookups a node with specified timeout. * * @param nodeUrl a url of the node * @param timeout to wait in ms * @return node if it was successfully obtained, null otherwise * @throws Exception if node was not looked up */ private Node lookupNode(String nodeUrl, long timeout) throws Exception { Future<Node> futureNode = threadPoolHolder.submit(INTERNAL_POOL, new NodeLocator(nodeUrl)); return futureNode.get(timeout, TimeUnit.MILLISECONDS); } /** * Requests one node to be acquired from the underlying infrastructure. */ public void acquireNode() { if (toShutdown) { logger.warn("[" + name + "] acquireNode request discarded because node source is shutting down"); return; } infrastructureManager.acquireNode(); } public void acquireNodes(int n, Map<String, ?> nodeConfiguration) { infrastructureManager.acquireNodes(n, nodeConfiguration); } /** * Requests all nodes to be acquired from the infrastructure. */ public void acquireAllNodes() { if (toShutdown) { logger.warn("[" + name + "] acquireAllNodes request discarded because node source is shutting down"); return; } infrastructureManager.acquireAllNodes(); } public void acquireAllNodes(Map<String, ?> nodeConfiguration) { infrastructureManager.acquireAllNodes(nodeConfiguration); } /** * Removes the node from the node source. * * @param nodeUrl the url of the node to be released */ public BooleanWrapper removeNode(String nodeUrl, Client initiator) { //verifying if node is already in the list, //node could have fallen between remove request and the confirm if (this.nodes.containsKey(nodeUrl)) { logger.info("[" + name + "] removing node : " + nodeUrl); Node node = nodes.remove(nodeUrl); RMCore.topologyManager.removeNode(node); try { infrastructureManager.internalRemoveNode(node); } catch (RMException e) { logger.error(e.getCause().getMessage(), e); } } else { Node downNode = downNodes.remove(nodeUrl); if (downNode != null) { logger.info("[" + name + "] removing down node : " + nodeUrl); } else { logger.error("[" + name + "] removing node : " + nodeUrl + " which not belongs to this node source"); return new BooleanWrapper(false); } } if (toShutdown && nodes.size() == 0) { // shutdown all pending nodes shutdownNodeSourceServices(initiator); } return new BooleanWrapper(true); } /** * Shutdowns the node source and releases all its nodes. */ public void shutdown(Client initiator) { logger.info("[" + name + "] is shutting down by " + initiator); toShutdown = true; if (nodes.size() == 0) { shutdownNodeSourceServices(initiator); } } /** * To emit a deploying node event * @param event the deploying node event to emit */ @ImmediateService public void internalEmitDeployingNodeEvent(final RMNodeEvent event) { this.monitoring.nodeEvent(event); } /** * Removes the deploying node from the nodesource's infrastructure manager. * @param pnUrl the deploying url * @return true in case of succes, false otherwise */ public boolean removeDeployingNode(String pnUrl) { return this.infrastructureManager.internalRemoveDeployingNode(pnUrl); } /** * Gets the ping frequency. * @return ping frequency */ public IntWrapper getPingFrequency() { return new IntWrapper(pingFrequency); } /** * Sets the ping frequency (in ms) * @param frequency new value of monitoring period */ public void setPingFrequency(int frequency) { pingFrequency = frequency; } /** * Creates a node source string representation * @return string representation of the node source */ @ImmediateService public String getDescription() { return description; } /** * Gets name of the node source * @return name of the node source */ @ImmediateService public String getName() { return name; } /** * Activates a node source policy. */ public BooleanWrapper activate() { logger.info("[" + name + "] Activating the policy " + nodeSourcePolicy); return nodeSourcePolicy.activate(); } /** * Initiates node source services shutdown, such as pinger, policy, thread pool. * @param initiator */ protected void shutdownNodeSourceServices(Client initiator) { logger.info("[" + name + "] Shutdown finalization"); nodeSourcePolicy.shutdown(initiator); infrastructureManager.internalShutDown(); } /** * Terminates a node source active object when the policy is shutdown. */ public void finishNodeSourceShutdown(Client initiator) { PAFuture.waitFor(rmcore.nodeSourceUnregister(name, new RMNodeSourceEvent(RMEventType.NODESOURCE_REMOVED, initiator.getName(), this.getName(), this.getDescription(), this.getAdministrator().getName()))); PAActiveObject.terminateActiveObject(false); } /** * Retrieves a list of alive nodes * @return a list of alive nodes */ @ImmediateService public LinkedList<Node> getAliveNodes() { LinkedList<Node> nodes = new LinkedList<>(); nodes.addAll(this.nodes.values()); return nodes; } /** * Retrieves a list of down nodes * @return a list of down nodes */ @ImmediateService public LinkedList<Node> getDownNodes() { LinkedList<Node> downNodes = new LinkedList<>(); downNodes.addAll(this.downNodes.values()); return downNodes; } /** * Retrieves the list of deploying nodes handled by the infrastructure manager * @return the list of deploying nodes handled by the infrastructure manager */ @ImmediateService public LinkedList<RMDeployingNode> getDeployingNodes() { LinkedList<RMDeployingNode> result = new LinkedList<>(); result.addAll(this.infrastructureManager.getDeployingNodes()); return result; } /** * Returns the deploying node identified by the specified {@code nodeUrl}. * * @param nodeUrl the URL of the deploying node to lookup. * @return the deploying node found, or {@code null}. Since a node source * is an Active Object, the caller will receive a deep copy of the original object. */ @ImmediateService public RMDeployingNode getDeployingNode(String nodeUrl) { return infrastructureManager.getDeployingNode(nodeUrl); } /** * Gets the nodes size excluding down nodes. * @return the node size */ public int getNodesCount() { return this.nodes.values().size(); } /** * Marks node as down. Remove it from node source node set. It remains in rmcore nodes list until * user decides to remove them or node source is shutdown. * @see NodeSource#detectedPingedDownNode(String) */ public void detectedPingedDownNode(String nodeUrl) { if (toShutdown) { logger.warn("[" + name + "] detectedPingedDownNode request discarded because node source is shutting down"); return; } logger.warn("[" + name + "] Detected down node: " + nodeUrl); Node downNode = nodes.remove(nodeUrl); if (downNode != null) { downNodes.put(nodeUrl, downNode); try { RMCore.topologyManager.removeNode(downNode); infrastructureManager.internalNotifyDownNode(downNode); } catch (RMException e) { logger.error("Error while removing down node: " + nodeUrl, e); } } rmcore.setDownNode(nodeUrl); } /** * Gets resource manager core. Used by policies. * @return {@link RMCore} instance. */ @ImmediateService public RMCore getRMCore() { return rmcore; } /** * Executed command in parallel using thread pool. * @param task to execute */ @ImmediateService public void executeInParallel(Runnable task) { NodeSource.threadPoolHolder.execute(EXTERNAL_POOL, task); } /** * Pings the node with specified url. * If the node is dead sends the request to the node source. */ public void pingNode(final Node node) { executeInParallel(new Runnable() { public void run() { String nodeUrl = node.getNodeInformation().getURL(); try { node.getNumberOfActiveObjects(); if (logger.isDebugEnabled()) { logger.debug("Node " + nodeUrl + " is alive"); } } catch (Throwable t) { logger.warn("Error occurred when trying to ping node " + nodeUrl, t); stub.detectedPingedDownNode(nodeUrl); } } }); } /** * The provider of the node source is the resource manager client initiated * the node source creation. * * @return the node source provider */ @ImmediateService public Client getAdministrator() { return administrator; } /** * Returns the the node source stub */ public NodeSource getStub() { return stub; } /** * Returns the permission which administrator must have. * Administrator of the node source can remove it, add nodes to this node source and remove any node. */ @ImmediateService public Permission getAdminPermission() { return adminPermission; } /** * Returns the permission required to add/remove nodes to/from the node source. * Provider can remove only its one nodes. */ @ImmediateService public Permission getProviderPermission() { return providerPermission; } /** * Returns the registration url the node spawn by this nodesource * must use. * @return the registration url the node spawn by this nodesource * must use. */ @ImmediateService public String getRegistrationURL() { return this.registrationURL; } }