/*
* ProActive Parallel Suite(TM):
* The Open Source library for parallel and distributed
* Workflows & Scheduling, Orchestration, Cloud Automation
* and Big Data Analysis on Enterprise Grids & Clouds.
*
* Copyright (c) 2007 - 2017 ActiveEon
* Contact: contact@activeeon.com
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License
* as published by the Free Software Foundation: version 3 of
* the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If needed, contact us to obtain a release under GPL Version 2 or 3
* or a different license than the AGPL.
*/
package org.ow2.proactive.resourcemanager.nodesource.infrastructure;
import static com.google.common.base.Throwables.getStackTraceAsString;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.objectweb.proactive.core.node.Node;
import org.objectweb.proactive.core.util.ProActiveCounter;
import org.ow2.proactive.resourcemanager.exception.RMException;
import org.ow2.proactive.resourcemanager.nodesource.common.Configurable;
import org.ow2.proactive.resourcemanager.utils.RMNodeStarter;
import org.ow2.proactive.utils.FileToBytesConverter;
/**
*
* An infrastructure manager that operates custom scripts in order to
* deploy/remove nodes.
* <p>
* Deployment phase:
* <ul>
* <li>launch the script by providing host name, node name, node source name, rm
* url</li>
* <li>if no node within timeout => terminates the script.</li>
* </ul>
* <p>
* Removal phase:
* <ul>
* <li>remove node from the resource manager</li>
* <li>launch removal script giving host name and node url.</li>
* </ul>
*/
public class CLIInfrastructure extends HostsFileBasedInfrastructureManager {
@Configurable(description = "An interpreter that executes the script")
protected String interpreter = "bash";
@Configurable(fileBrowser = true, description = "A script that deploys a node on host (parameters: host, node, ns names and rm url).")
protected File deploymentScript;
@Configurable(fileBrowser = true, description = "A script that removes a node (parameters: host name and node url")
protected File removalScript;
private final AtomicInteger numberOfRemovalThread = new AtomicInteger(0);
/**
* Configures the Infrastructure
*
* @param parameters
* parameters[4] : An interpreter that launch the script
* parameters[5] : A script that deploys nodes on a single host
* parameters[6] : A script that removes a node
* @throws IllegalArgumentException
* configuration failed
*/
@Override
protected void configure(Object... parameters) {
super.configure(parameters);
int index = 4;
// TODO super admin rights check
if (parameters != null && parameters.length >= 7) {
this.interpreter = parameters[index++].toString();
try {
byte[] bytes = (byte[]) parameters[index++];
// putting .cmd as an extension so that it works on Windows
deploymentScript = File.createTempFile("deployment", ".cmd");
FileToBytesConverter.convertByteArrayToFile(bytes, deploymentScript);
// deploymentScript.setExecutable(true);
} catch (Exception e) {
throw new IllegalArgumentException("Could not read deployment script", e);
}
try {
byte[] bytes = (byte[]) parameters[index++];
// putting .cmd as an extension so that it works on Windows
removalScript = File.createTempFile("removal", ".cmd");
FileToBytesConverter.convertByteArrayToFile(bytes, removalScript);
// removalScript.setExecutable(true);
} catch (Exception e) {
throw new IllegalArgumentException("Could not read removal script file", e);
}
}
}
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using a custom script, register it
* manually in the nodesource.
*
* @param host The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws RMException
* acquisition failed
*/
protected void startNodeImpl(InetAddress host, int nbNodes, final List<String> depNodeURLs) throws RMException {
final String nodeName = "SCR-" + this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
final String commandLine = interpreter + " " + deploymentScript.getAbsolutePath() + " " + host.getHostName() +
" " + nodeName + " " + this.nodeSource.getName() + " " + rmUrl + " " + nbNodes;
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames,
commandLine,
"Deploying node on host " + host,
this.nodeTimeOut));
addTimeouts(depNodeURLs);
Process p;
try {
logger.debug("Launching the command: " + commandLine);
p = Runtime.getRuntime().exec(commandLine);
} catch (IOException e1) {
multipleDeclareDeployingNodeLost(depNodeURLs,
"Cannot run command: " + commandLine +
" - \n The following exception occured: " +
getStackTraceAsString(e1));
throw new RMException("Cannot run command: " + commandLine, e1);
}
String lf = System.lineSeparator();
int circuitBreakerThreshold = 5;
while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
try {
int exitCode = p.exitValue();
if (exitCode != 0) {
logger.error("Child process at " + host.getHostName() + " exited abnormally (" + exitCode + ").");
} else {
logger.error("Launching node script has exited normally whereas it shouldn't.");
}
String pOutPut = Utils.extractProcessOutput(p);
String pErrPut = Utils.extractProcessErrput(p);
final String description = "Script failed to launch a node on host " + host.getHostName() + lf +
" >Error code: " + exitCode + lf + " >Errput: " + pErrPut +
" >Output: " + pOutPut;
logger.error(description);
if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {
public void run() {
multipleDeclareDeployingNodeLost(depNodeURLs, description);
}
})) {
return;
} else {
// there isn't any race regarding node registration
throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
}
} catch (IllegalThreadStateException e) {
logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
}
if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
// registration is ok, we destroy the process
logger.debug("Destroying the process: " + p);
p.destroy();
return;
}
try {
Thread.sleep(1000);
} catch (Exception e) {
circuitBreakerThreshold--;
logger.trace("An exception occurred while monitoring a child process", e);
}
}
// if we exit because of a timeout
if (this.anyTimedOut(depNodeURLs)) {
// we remove it
removeTimeouts(depNodeURLs);
// we destroy the process
p.destroy();
throw new RMException("Deploying Node " + nodeName + " not expected any more");
}
if (circuitBreakerThreshold <= 0) {
logger.error("Circuit breaker threshold reached while monitoring a child process.");
throw new RMException("Several exceptions occurred while monitoring a child process.");
}
}
/**
* {@inheritDoc}
*/
@Override
protected void killNodeImpl(Node node, InetAddress h) {
final Node n = node;
final InetAddress host = h;
numberOfRemovalThread.incrementAndGet();
this.nodeSource.executeInParallel(new Runnable() {
public void run() {
try {
final String commandLine = interpreter + " " + removalScript.getAbsolutePath() + " " +
host.getHostName() + " " + n.getNodeInformation().getURL();
Process p;
try {
logger.debug("Launching the command: " + commandLine);
p = Runtime.getRuntime().exec(commandLine);
// TODO add timeout behavior
int exitCode = p.waitFor();
String pOutPut = Utils.extractProcessOutput(p);
String pErrPut = Utils.extractProcessErrput(p);
String lf = System.lineSeparator();
final String description = "Removal script ouput" + lf + " >Error code: " + exitCode + lf +
" >Errput: " + pErrPut + " >Output: " + pOutPut;
if (exitCode != 0) {
logger.error("Child process at " + host.getHostName() + " exited abnormally (" + exitCode +
").");
logger.error(description);
} else {
logger.info("Removal node process has exited normally for " +
n.getNodeInformation().getURL());
logger.debug(description);
}
} catch (IOException e1) {
logger.error(e1);
}
} catch (Exception e) {
logger.trace("An exception occurred during node removal", e);
}
numberOfRemovalThread.decrementAndGet();
}
});
}
/**
* @return short description of the IM
*/
@Override
public String getDescription() {
return "Creates remote runtimes using custom scripts";
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return "Script Infrastructure";
}
/**
* {@inheritDoc}
*/
@Override
public void shutDown() {
deploymentScript.delete();
// checking if we need to delete the removal script
if (this.numberOfRemovalThread.get() <= 0) {
removalScript.delete();
}
}
}