/* * ProActive Parallel Suite(TM): * The Open Source library for parallel and distributed * Workflows & Scheduling, Orchestration, Cloud Automation * and Big Data Analysis on Enterprise Grids & Clouds. * * Copyright (c) 2007 - 2017 ActiveEon * Contact: contact@activeeon.com * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation: version 3 of * the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * If needed, contact us to obtain a release under GPL Version 2 or 3 * or a different license than the AGPL. */ package org.ow2.proactive.resourcemanager.selection; import java.io.File; import java.io.Serializable; import java.security.Permission; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import org.apache.log4j.MDC; import org.objectweb.proactive.api.PAActiveObject; import org.objectweb.proactive.api.PAFuture; import org.objectweb.proactive.core.node.Node; import org.objectweb.proactive.utils.NamedThreadFactory; import org.ow2.proactive.authentication.principals.TokenPrincipal; import org.ow2.proactive.permissions.PrincipalPermission; import org.ow2.proactive.resourcemanager.authentication.Client; import org.ow2.proactive.resourcemanager.core.RMCore; import org.ow2.proactive.resourcemanager.core.properties.PAResourceManagerProperties; import org.ow2.proactive.resourcemanager.exception.NotConnectedException; import org.ow2.proactive.resourcemanager.rmnode.RMNode; import org.ow2.proactive.resourcemanager.selection.policies.ShufflePolicy; import org.ow2.proactive.resourcemanager.selection.topology.TopologyHandler; import org.ow2.proactive.scripting.Script; import org.ow2.proactive.scripting.ScriptException; import org.ow2.proactive.scripting.ScriptResult; import org.ow2.proactive.scripting.SelectionScript; import org.ow2.proactive.utils.Criteria; import org.ow2.proactive.utils.NodeSet; import org.ow2.proactive.utils.appenders.MultipleFileAppender; /** * An interface of selection manager which is responsible for * nodes selection from a pool of free nodes for further scripts execution. * */ public abstract class SelectionManager { private final static Logger logger = Logger.getLogger(SelectionManager.class); private RMCore rmcore; private static final long DEFAULT_AUTHORIZED_SCRIPT_LOAD_PERIOD = (long) 60 * 1000; private static long lastAuthorizedFolderLoadingTime = 0; private ExecutorService scriptExecutorThreadPool; private Set<String> inProgress; protected HashSet<String> authorizedSelectionScripts = null; // the policy for arranging nodes private SelectionPolicy selectionPolicy; public SelectionManager() { } public SelectionManager(RMCore rmcore) { this.rmcore = rmcore; this.scriptExecutorThreadPool = Executors.newFixedThreadPool(PAResourceManagerProperties.RM_SELECTION_MAX_THREAD_NUMBER.getValueAsInt(), new NamedThreadFactory("Selection manager threadpool")); this.inProgress = Collections.synchronizedSet(new HashSet<String>()); String policyClassName = PAResourceManagerProperties.RM_SELECTION_POLICY.getValueAsString(); try { Class<?> policyClass = Class.forName(policyClassName); selectionPolicy = (SelectionPolicy) policyClass.newInstance(); } catch (Exception e) { logger.error("Cannot use the specified policy class: " + policyClassName, e); logger.warn("Using the default class: " + ShufflePolicy.class.getName()); selectionPolicy = new ShufflePolicy(); } updateAuthorizedScriptsSignatures(); } /** * Loads authorized selection scripts. */ public void updateAuthorizedScriptsSignatures() { String dirName = PAResourceManagerProperties.RM_EXECUTE_SCRIPT_AUTHORIZED_DIR.getValueAsStringOrNull(); if (dirName != null && dirName.length() > 0) { dirName = PAResourceManagerProperties.getAbsolutePath(dirName); File folder = new File(dirName); if (folder.exists() && folder.isDirectory()) { logger.debug("The resource manager will accept only selection scripts from " + dirName); long currentTime = System.currentTimeMillis(); long configuredAuthorizedScriptLoadPeriod = getConfiguredAuthorizedScriptLoadPeriod(); if (currentTime - lastAuthorizedFolderLoadingTime > configuredAuthorizedScriptLoadPeriod) { lastAuthorizedFolderLoadingTime = currentTime; loadAuthorizedScriptsSignatures(folder); } } else { logger.error("Invalid dir name for authorized scripts " + dirName); throw new SecurityException("Invalid dir name for authorized scripts " + dirName); } } } private void loadAuthorizedScriptsSignatures(File folder) { authorizedSelectionScripts = new HashSet<>(); for (File file : folder.listFiles()) { if (file.isFile()) { try { String script = SelectionScript.readFile(file); logger.debug("Adding authorized selection script " + file.getAbsolutePath()); authorizedSelectionScripts.add(Script.digest(script.trim())); } catch (Exception e) { logger.error(e.getMessage(), e); throw new SecurityException("Error while reading authorized script file", e); } } } } private long getConfiguredAuthorizedScriptLoadPeriod() { long configuredAuthorizedScriptLoadPeriod = DEFAULT_AUTHORIZED_SCRIPT_LOAD_PERIOD; if (PAResourceManagerProperties.RM_EXECUTE_SCRIPT_AUTHORIZED_DIR_REFRESHPERIOD.isSet()) { configuredAuthorizedScriptLoadPeriod = PAResourceManagerProperties.RM_EXECUTE_SCRIPT_AUTHORIZED_DIR_REFRESHPERIOD.getValueAsLong(); } return configuredAuthorizedScriptLoadPeriod; } /** * Arranges nodes for script execution based on some criteria * for example previous execution statistics. * * @param nodes - nodes list for script execution * @param scripts - set of selection scripts * @return collection of arranged nodes */ public abstract List<RMNode> arrangeNodesForScriptExecution(final List<RMNode> nodes, List<SelectionScript> scripts); /** * Predicts script execution result. Allows to avoid duplicate script execution * on the same node. * * @param script - script to execute * @param rmnode - target node * @return true if script will pass on the node */ public abstract boolean isPassed(SelectionScript script, RMNode rmnode); /** * Processes script result and updates knowledge base of * selection manager at the same time. * * @param script - executed script * @param scriptResult - obtained script result * @param rmnode - node on which script has been executed * @return whether node is selected */ public abstract boolean processScriptResult(SelectionScript script, ScriptResult<Boolean> scriptResult, RMNode rmnode); public NodeSet selectNodes(Criteria criteria, Client client) { maybeSetLoggingContext(criteria); try { return doSelectNodes(criteria, client); } finally { unsetLoggingContext(); } } static void maybeSetLoggingContext(Criteria criteria) { if (criteria.getComputationDescriptors() != null) { // logging selection script execution into tasks logs MDC.put(MultipleFileAppender.FILE_NAMES, criteria.getComputationDescriptors()); } } static void unsetLoggingContext() { MDC.remove(MultipleFileAppender.FILE_NAMES); } private NodeSet doSelectNodes(Criteria criteria, Client client) { boolean hasScripts = criteria.getScripts() != null && criteria.getScripts().size() > 0; boolean loggerIsDebugEnabled = logger.isDebugEnabled(); if (loggerIsDebugEnabled) { logger.debug(client + " requested " + criteria.getSize() + " nodes with " + criteria.getTopology()); if (hasScripts) { logger.debug("Selection scripts:"); for (SelectionScript s : criteria.getScripts()) { logger.debug(s); } } if (criteria.getBlackList() != null && criteria.getBlackList().size() > 0) { logger.debug("Black list nodes:"); for (Node n : criteria.getBlackList()) { logger.debug(n); } } } // can throw Exception if topology is disabled TopologyHandler handler = RMCore.topologyManager.getHandler(criteria.getTopology()); List<RMNode> freeNodes = rmcore.getFreeNodes(); // filtering out the "free node list" // removing exclusion and checking permissions List<RMNode> filteredNodes = filterOut(freeNodes, criteria, client); if (filteredNodes.size() == 0) { if (loggerIsDebugEnabled) { logger.debug(client + " will get 0 nodes"); } return new NodeSet(); } // arranging nodes according to the selection policy // if could be shuffling or node source priorities List<RMNode> afterPolicyNodes = selectionPolicy.arrangeNodes(criteria.getSize(), filteredNodes, client); List<Node> matchedNodes; if (hasScripts) { // checking if all scripts are authorized checkAuthorizedScripts(criteria.getScripts()); // arranging nodes for script execution List<RMNode> arrangedNodes = arrangeNodesForScriptExecution(afterPolicyNodes, criteria.getScripts()); if (criteria.getTopology().isTopologyBased()) { // run scripts on all available nodes matchedNodes = runScripts(arrangedNodes, criteria); } else { // run scripts not on all nodes, but always on missing number of nodes // until required node set is found matchedNodes = new LinkedList<>(); while (matchedNodes.size() < criteria.getSize()) { int numberOfNodesForScriptExecution = criteria.getSize() - matchedNodes.size(); if (numberOfNodesForScriptExecution < PAResourceManagerProperties.RM_SELECTION_MAX_THREAD_NUMBER.getValueAsInt()) { // we can run "PAResourceManagerProperties.RM_SELECTION_MAX_THREAD_NUMBER.getValueAsInt()" scripts in parallel // in case when we need less nodes it still useful to // the full capacity of the thread pool to find nodes quicker // it is not important if we find more nodes than needed // subset will be selected later (topology handlers) numberOfNodesForScriptExecution = PAResourceManagerProperties.RM_SELECTION_MAX_THREAD_NUMBER.getValueAsInt(); } List<RMNode> subset = arrangedNodes.subList(0, Math.min(numberOfNodesForScriptExecution, arrangedNodes.size())); matchedNodes.addAll(runScripts(subset, criteria)); // removing subset of arrangedNodes subset.clear(); if (arrangedNodes.size() == 0) { break; } } } if (loggerIsDebugEnabled) { logger.debug(matchedNodes.size() + " nodes found after scripts execution for " + client); } } else { matchedNodes = new LinkedList<>(); for (RMNode node : afterPolicyNodes) { matchedNodes.add(node.getNode()); } } // now we have a list of nodes which match to selection scripts // selecting subset according to topology requirements // TopologyHandler handler = RMCore.topologyManager.getHandler(topologyDescriptor); if (criteria.getTopology().isTopologyBased() && loggerIsDebugEnabled) { logger.debug("Filtering nodes with topology " + criteria.getTopology()); } NodeSet selectedNodes = handler.select(criteria.getSize(), matchedNodes); if (selectedNodes.size() < criteria.getSize() && !criteria.isBestEffort()) { selectedNodes.clear(); if (selectedNodes.getExtraNodes() != null) { selectedNodes.getExtraNodes().clear(); } } // the nodes are selected, now mark them as busy. for (Node node : selectedNodes) { try { // Synchronous call rmcore.setBusyNode(node.getNodeInformation().getURL(), client); } catch (NotConnectedException e) { // client has disconnected during getNodes request logger.warn(e.getMessage(), e); return null; } } // marking extra selected nodes as busy if (selectedNodes.size() > 0 && selectedNodes.getExtraNodes() != null) { for (Node node : new LinkedList<>(selectedNodes.getExtraNodes())) { try { // synchronous call rmcore.setBusyNode(node.getNodeInformation().getURL(), client); } catch (NotConnectedException e) { // client has disconnected during getNodes request logger.warn(e.getMessage(), e); return null; } } } if (logger.isInfoEnabled()) { String extraNodes = selectedNodes.getExtraNodes() != null && selectedNodes.getExtraNodes().size() > 0 ? " and " + selectedNodes.getExtraNodes() .size() + " extra nodes" : ""; logger.info(client + " requested " + criteria.getSize() + " nodes with " + criteria.getTopology() + " and will get " + selectedNodes.size() + " nodes " + extraNodes + " [freeNodes:" + freeNodes.size() + ";filteredNodes:" + filteredNodes.size() + ";reordered after policy:" + afterPolicyNodes.size() + ";selection script present:" + hasScripts + ";nodes filtered by selection script:" + matchedNodes.size() + ";selectedNodes:" + selectedNodes.size() + "]"); } if (loggerIsDebugEnabled) { for (Node n : selectedNodes) { logger.debug(n.getNodeInformation().getURL()); } } return selectedNodes; } /** * Checks is all scripts are authorized. If not throws an exception. */ private void checkAuthorizedScripts(List<SelectionScript> scripts) { updateAuthorizedScriptsSignatures(); if (authorizedSelectionScripts == null || scripts == null) return; for (SelectionScript script : scripts) { if (!authorizedSelectionScripts.contains(Script.digest(script.getScript().trim()))) { // unauthorized selection script throw new SecurityException("Cannot execute unauthorized script: " + System.getProperty("line.separator") + script.getScript()); } } } /** * Runs scripts on given set of nodes and returns matched nodes. * It blocks until all results are obtained. * * @param candidates nodes to execute scripts on * @param criteria contains a set of scripts to execute on each node * @return nodes matched to all scripts */ private List<Node> runScripts(List<RMNode> candidates, Criteria criteria) { List<Node> matched = new LinkedList<>(); if (candidates.size() == 0) { return matched; } // creating script executors object to be run in dedicated thread pool List<Callable<Node>> scriptExecutors = new LinkedList<>(); synchronized (inProgress) { if (inProgress.size() > 0) { logger.warn(inProgress.size() + " nodes are in process of script execution"); for (String nodeName : inProgress) { logger.warn(nodeName); } logger.warn("Something is wrong on these nodes"); } for (RMNode node : candidates) { if (!inProgress.contains(node.getNodeURL())) { inProgress.add(node.getNodeURL()); scriptExecutors.add(new ScriptExecutor(node, criteria, this)); } } } try { // launching Collection<Future<Node>> matchedNodes = scriptExecutorThreadPool.invokeAll(scriptExecutors, PAResourceManagerProperties.RM_SELECT_SCRIPT_TIMEOUT.getValueAsLong(), TimeUnit.MILLISECONDS); int index = 0; // waiting for the results for (Future<Node> futureNode : matchedNodes) { if (!futureNode.isCancelled()) { Node node; try { node = futureNode.get(); if (node != null) { matched.add(node); } } catch (InterruptedException e) { logger.warn("Interrupting the selection manager"); return matched; } catch (ExecutionException e) { logger.warn("Ignoring exception in selection script: " + e.getMessage()); } } else { // no script result was obtained logger.warn("Timeout on " + scriptExecutors.get(index)); // in this case scriptExecutionFinished may not be called scriptExecutionFinished(((ScriptExecutor) scriptExecutors.get(index)).getRMNode().getNodeURL()); } index++; } } catch (InterruptedException e1) { logger.warn("Interrupting the selection manager"); } return matched; } /** * Removes exclusion nodes and nodes not accessible for the client */ private List<RMNode> filterOut(List<RMNode> freeNodes, Criteria criteria, Client client) { NodeSet exclusion = criteria.getBlackList(); Set<String> inclusion = criteria.getAcceptableNodesUrls(); boolean nodeWithTokenRequested = criteria.getNodeAccessToken() != null && criteria.getNodeAccessToken().length() > 0; TokenPrincipal tokenPrincipal = null; if (nodeWithTokenRequested) { logger.debug("Node access token specified " + criteria.getNodeAccessToken()); tokenPrincipal = new TokenPrincipal(criteria.getNodeAccessToken()); client.getSubject().getPrincipals().add(tokenPrincipal); } List<RMNode> filteredList = new ArrayList<>(); HashSet<Permission> clientPermissions = new HashSet<>(); for (RMNode node : freeNodes) { // checking the permission try { if (!clientPermissions.contains(node.getUserPermission())) { client.checkPermission(node.getUserPermission(), client + " is not authorized to get the node " + node.getNodeURL() + " from " + node.getNodeSource().getName()); clientPermissions.add(node.getUserPermission()); } } catch (SecurityException e) { // client does not have an access to this node logger.debug(e.getMessage()); continue; } // if the node access token is specified we filtered out all nodes // with other tokens but must also filter out nodes without tokens if (nodeWithTokenRequested && !node.isProtectedByToken()) { continue; } // if client has AllPermissions he still can get a node with any token // we will avoid it here if (nodeWithTokenRequested) { PrincipalPermission perm = (PrincipalPermission) node.getUserPermission(); // checking explicitly that node has this token identity if (!perm.hasPrincipal(tokenPrincipal)) { if (logger.isDebugEnabled()) { logger.debug(client + " does not have required token to get the node " + node.getNodeURL() + " from " + node.getNodeSource().getName()); } continue; } } if (!contains(exclusion, node) && ((inclusion != null) ? inclusion.contains(node.getNodeURL()) : true)) { filteredList.add(node); } } return filteredList; } public <T> List<ScriptResult<T>> executeScript(final Script<T> script, final Collection<RMNode> nodes, final Map<String, Serializable> bindings) { // TODO: add a specific timeout for script execution final long timeout = PAResourceManagerProperties.RM_EXECUTE_SCRIPT_TIMEOUT.getValueAsLong(); final ArrayList<Callable<ScriptResult<T>>> scriptExecutors = new ArrayList<>(nodes.size()); // Execute the script on each selected node for (final RMNode node : nodes) { scriptExecutors.add(new Callable<ScriptResult<T>>() { @Override public ScriptResult<T> call() throws Exception { // Execute with a timeout the script by the remote handler // and always async-unlock the node, exceptions will be treated as ExecutionException try { ScriptResult<T> res = node.executeScript(script, bindings); PAFuture.waitFor(res, timeout); return res; //return PAFuture.getFutureValue(res, timeout); } finally { // cleaning the node try { node.clean(); } catch (Throwable ex) { logger.error("Cannot clean the node " + node.getNodeURL(), ex); } SelectionManager.this.rmcore.unlockNodes(Collections.singleton(node.getNodeURL())); } } @Override public String toString() { return "executing script on " + node.getNodeURL(); } }); } // Invoke all Callables and get the list of futures List<Future<ScriptResult<T>>> futures = null; try { futures = this.scriptExecutorThreadPool.invokeAll(scriptExecutors, timeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { logger.warn("Interrupted while waiting, unable to execute all scripts", e); Thread.currentThread().interrupt(); } final List<ScriptResult<T>> results = new LinkedList<>(); int index = 0; // waiting for the results for (final Future<ScriptResult<T>> future : futures) { final String description = scriptExecutors.get(index++).toString(); ScriptResult<T> result = null; try { result = future.get(); } catch (CancellationException e) { result = new ScriptResult<>(new ScriptException("Cancelled due to timeout expiration when " + description, e)); } catch (InterruptedException e) { result = new ScriptResult<>(new ScriptException("Cancelled due to interruption when " + description)); } catch (ExecutionException e) { // Unwrap the root exception Throwable rex = e.getCause(); result = new ScriptResult<>(new ScriptException("Exception occured in script call when " + description, rex)); } results.add(result); } return results; } /** * Indicates that script execution is finished for the node with specified url. */ public void scriptExecutionFinished(String nodeUrl) { synchronized (inProgress) { inProgress.remove(nodeUrl); } } /** * Handles shut down of the selection manager */ public void shutdown() { // shutdown the thread pool without waiting for script execution completions scriptExecutorThreadPool.shutdownNow(); PAActiveObject.terminateActiveObject(false); } /** * Return true if node contains the node set. * * @param nodeset - a list of nodes to inspect * @param node - a node to find * @return true if node contains the node set. */ private boolean contains(NodeSet nodeset, RMNode node) { if (nodeset == null) return false; for (Node n : nodeset) { try { if (n.getNodeInformation().getURL().equals(node.getNodeURL())) { return true; } } catch (Exception e) { continue; } } return false; } }