/* * ProActive Parallel Suite(TM): * The Open Source library for parallel and distributed * Workflows & Scheduling, Orchestration, Cloud Automation * and Big Data Analysis on Enterprise Grids & Clouds. * * Copyright (c) 2007 - 2017 ActiveEon * Contact: contact@activeeon.com * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation: version 3 of * the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * If needed, contact us to obtain a release under GPL Version 2 or 3 * or a different license than the AGPL. */ package org.ow2.proactive.resourcemanager.nodesource.infrastructure; import java.io.IOException; import java.security.KeyException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.objectweb.proactive.core.config.CentralPAPropertyRepository; import org.objectweb.proactive.core.node.Node; import org.ow2.proactive.authentication.crypto.Credentials; import org.ow2.proactive.process.ProcessExecutor; import org.ow2.proactive.resourcemanager.core.properties.PAResourceManagerProperties; import org.ow2.proactive.resourcemanager.exception.RMException; import org.ow2.proactive.resourcemanager.nodesource.common.Configurable; import org.ow2.proactive.resourcemanager.utils.CommandLineBuilder; import org.ow2.proactive.resourcemanager.utils.OperatingSystem; import org.ow2.proactive.resourcemanager.utils.RMNodeStarter; import com.google.common.base.Joiner; import com.google.common.base.Throwables; public class LocalInfrastructure extends InfrastructureManager { public static final int DEFAULT_NODE_NUMBER = Math.max(2, Runtime.getRuntime().availableProcessors() - 1); public static final int DEFAULT_TIMEOUT = 30000; @Configurable(description = "Absolute path to credentials file\nused to add the node to the Resource Manager", credential = true) private Credentials credentials; @Configurable(description = "Maximum number of nodes to\nbe deployed on Resource Manager machine") private int maxNodes = DEFAULT_NODE_NUMBER; // number of nodes which can still be acquired private AtomicInteger acquiredNodes; private AtomicInteger lostNodes; private AtomicInteger handledNodes; private AtomicBoolean commandLineStarted; @Configurable(description = "in ms. After this timeout expired\nthe node is considered to be lost") private int nodeTimeout = DEFAULT_TIMEOUT; @Configurable(description = "Additional ProActive properties") private String paProperties = ""; private transient ProcessExecutor processExecutor; public LocalInfrastructure() { } @Override public String getDescription() { return "Deploys nodes on Resource Manager's machine"; } @Override public void acquireAllNodes() { this.acquireNode(); } @Override public void acquireNode() { if (this.commandLineStarted.compareAndSet(false, true)) { this.nodeSource.executeInParallel(new Runnable() { public void run() { LocalInfrastructure.this.startNodeProcess(); } }); } else { logger.debug("Cannot acquire more nodes"); } } private void startNodeProcess() { acquiredNodes.set(0); lostNodes.set(0); String baseNodeName = "local-" + this.nodeSource.getName(); OperatingSystem os = OperatingSystem.UNIX; // assuming no cygwin, windows or the "others"... if (System.getProperty("os.name").contains("Windows")) { os = OperatingSystem.WINDOWS; } String rmHome = PAResourceManagerProperties.RM_HOME.getValueAsString(); if (!rmHome.endsWith(os.fs)) { rmHome += os.fs; } CommandLineBuilder clb = this.getDefaultCommandLineBuilder(os); // RM_Home set in bin/unix/env script clb.setRmHome(rmHome); ArrayList<String> paPropList = new ArrayList<>(); if (!this.paProperties.contains(CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getName())) { paPropList.add(CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine() + rmHome + "config" + os.fs + "security.java.policy-client"); } if (!this.paProperties.contains(CentralPAPropertyRepository.PA_CONFIGURATION_FILE.getName())) { paPropList.add(CentralPAPropertyRepository.PA_CONFIGURATION_FILE.getCmdLine() + rmHome + "config" + os.fs + "network" + os.fs + "node.ini"); } if (!this.paProperties.contains(PAResourceManagerProperties.RM_HOME.getKey())) { paPropList.add(PAResourceManagerProperties.RM_HOME.getCmdLine() + rmHome); } if (!this.paProperties.contains("java.library.path")) { paPropList.add("-Djava.library.path=" + System.getProperty("java.library.path")); } if (!paProperties.isEmpty()) { Collections.addAll(paPropList, this.paProperties.split(" ")); } clb.setPaProperties(paPropList); clb.setNodeName(baseNodeName); clb.setNumberOfNodes(handledNodes.get()); try { clb.setCredentialsValueAndNullOthers(new String(this.credentials.getBase64())); } catch (KeyException e) { createLostNodes(baseNodeName, "Cannot decrypt credentials value", e); return; } List<String> cmd; try { cmd = clb.buildCommandLineAsList(false); } catch (IOException e) { createLostNodes(baseNodeName, "Cannot build command line", e); return; } // The printed cmd with obfuscated credentials final String obfuscatedCmd = Joiner.on(' ').join(cmd); List<String> depNodeURLs = new ArrayList<>(handledNodes.get()); final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(baseNodeName, handledNodes.get()); try { depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmd, "Node launched locally", this.nodeTimeout)); // Deobfuscate the cred value Collections.replaceAll(cmd, CommandLineBuilder.OBFUSC, clb.getCredentialsValue()); processExecutor = new ProcessExecutor(baseNodeName, cmd, false, true); processExecutor.start(); Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { @Override public void run() { if (processExecutor != null && !processExecutor.isProcessFinished()) { processExecutor.killProcess(); } } })); logger.info("Local Nodes command started : " + obfuscatedCmd); } catch (IOException e) { String lf = System.lineSeparator(); String mess = "Cannot launch rm node " + baseNodeName + lf + Throwables.getStackTraceAsString(e); multipleDeclareDeployingNodeLost(depNodeURLs, mess); if (processExecutor != null) { cleanProcess(); } return; } // watching process int threshold = 10; while (!allNodesAcquiredOrLost()) { if (processExecutor.isProcessFinished()) { int exit = processExecutor.getExitCode(); if (exit != 0) { String lf = System.lineSeparator(); String message = "RMNode exit code == " + exit + lf; message += "Command: " + obfuscatedCmd + lf; String out = Joiner.on('\n').join(processExecutor.getOutput()); String err = Joiner.on('\n').join(processExecutor.getErrorOutput()); message += "stdout: " + out + lf + "stderr: " + err; multipleDeclareDeployingNodeLost(depNodeURLs, message); } } else { logger.debug("Waiting for nodes " + baseNodeName + " acquisition"); } try { Thread.sleep(500); } catch (InterruptedException e) { logger.warn("Interrupted while waiting for local process status", e); threshold--; if (threshold <= 0) { break; } } } logger.debug("Local Infrastructure manager exits watching loop for nodes " + baseNodeName); logNodeOutput(baseNodeName + " stdout: ", processExecutor.getOutput()); logNodeOutput(baseNodeName + " stderr: ", processExecutor.getErrorOutput()); if (allNodesLost()) { // clean up the process cleanProcess(); } } private void logNodeOutput(final String prefix, List<String> nodeOutputLines) { if (nodeOutputLines != null) { for (String processOutputLine : nodeOutputLines) { logger.debug(prefix + processOutputLine); } } } /** * Creates a lost node. The deployment has failed while building the command * line * * @param message * a message * @param e * the cause */ private void createLostNodes(String baseName, String message, Throwable e) { List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(baseName, handledNodes.get()); for (int nodeIndex = 0; nodeIndex < handledNodes.get(); nodeIndex++) { String name = createdNodeNames.get(nodeIndex); String lf = System.lineSeparator(); String url = super.addDeployingNode(name, "deployed as daemon", "Deploying a local infrastructure node", this.nodeTimeout); String st = Throwables.getStackTraceAsString(e); super.declareDeployingNodeLost(url, message + lf + st); } } private boolean allNodesAcquiredOrLost() { return (acquiredNodes.get() + lostNodes.get()) == handledNodes.get(); } private boolean allNodesLost() { return lostNodes.get() == handledNodes.get(); } private void cleanProcess() { if (processExecutor != null) { processExecutor.killProcess(); commandLineStarted.set(false); processExecutor = null; } } /** * args[0] = credentials args[1] = max nodes args[2] = timeout args[3] = pa * props */ @Override protected void configure(Object... args) { int index = 0; try { this.credentials = Credentials.getCredentialsBase64((byte[]) args[index++]); } catch (KeyException e1) { throw new IllegalArgumentException("Cannot decrypt credentials", e1); } try { this.maxNodes = Integer.parseInt(args[index++].toString()); } catch (Exception e) { throw new IllegalArgumentException("Cannot determine max node"); } this.acquiredNodes = new AtomicInteger(0); this.lostNodes = new AtomicInteger(0); this.commandLineStarted = new AtomicBoolean(false); this.handledNodes = new AtomicInteger(maxNodes); try { this.nodeTimeout = Integer.parseInt(args[index++].toString()); } catch (Exception e) { logger.warn("Cannot determine node timeout, using default:" + this.nodeTimeout, e); } this.paProperties = args[index++].toString(); } /** * {@inheritDoc} */ @Override protected void notifyDeployingNodeLost(String pnURL) { this.lostNodes.incrementAndGet(); } @Override protected void notifyAcquiredNode(Node arg0) throws RMException { this.acquiredNodes.incrementAndGet(); } @Override public void removeNode(Node node) throws RMException { logger.debug("Removing node " + node.getNodeInformation().getURL() + " from " + this.getClass().getSimpleName()); if (!this.nodeSource.getDownNodes().contains(node)) { // the node was manually removed handledNodes.decrementAndGet(); } int remainingNodesCount = this.acquiredNodes.decrementAndGet(); // If there is no remaining node, kill the JVM process if (remainingNodesCount == 0 && commandLineStarted.get()) { shutDown(); } } @Override public void onDownNodeReconnection(Node node) { acquiredNodes.incrementAndGet(); } @Override public void shutDown() { if (processExecutor != null) { processExecutor.killProcess(); } commandLineStarted.set(false); // do not set processExecutor to null here or NPE can appear in the startProcess method, running in a different thread. logger.info("Process associated with node source " + nodeSource.getName() + " destroyed"); } @Override public String toString() { return "Local Infrastructure"; } }