/* * ProActive Parallel Suite(TM): * The Open Source library for parallel and distributed * Workflows & Scheduling, Orchestration, Cloud Automation * and Big Data Analysis on Enterprise Grids & Clouds. * * Copyright (c) 2007 - 2017 ActiveEon * Contact: contact@activeeon.com * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation: version 3 of * the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * If needed, contact us to obtain a release under GPL Version 2 or 3 * or a different license than the AGPL. */ package org.ow2.proactive.resourcemanager.utils; import static com.google.common.base.Throwables.getStackTraceAsString; import static org.ow2.proactive.utils.ClasspathUtils.findSchedulerHome; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.security.KeyException; import java.security.Policy; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import javax.security.auth.login.LoginException; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.io.FileUtils; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.PropertyConfigurator; import org.hyperic.sigar.Sigar; import org.hyperic.sigar.SigarLoader; import org.objectweb.proactive.ActiveObjectCreationException; import org.objectweb.proactive.api.PAActiveObject; import org.objectweb.proactive.api.PAFuture; import org.objectweb.proactive.core.ProActiveException; import org.objectweb.proactive.core.ProActiveRuntimeException; import org.objectweb.proactive.core.config.CentralPAPropertyRepository; import org.objectweb.proactive.core.node.Node; import org.objectweb.proactive.core.node.NodeException; import org.objectweb.proactive.core.node.NodeFactory; import org.objectweb.proactive.core.runtime.ProActiveRuntimeImpl; import org.objectweb.proactive.core.util.wrapper.BooleanWrapper; import org.objectweb.proactive.extensions.dataspaces.exceptions.NotConfiguredException; import org.objectweb.proactive.utils.JVMPropertiesPreloader; import org.ow2.proactive.authentication.crypto.Credentials; import org.ow2.proactive.jmx.PermissionChecker; import org.ow2.proactive.jmx.naming.JMXTransportProtocol; import org.ow2.proactive.resourcemanager.authentication.RMAuthentication; import org.ow2.proactive.resourcemanager.common.RMConstants; import org.ow2.proactive.resourcemanager.core.properties.PAResourceManagerProperties; import org.ow2.proactive.resourcemanager.exception.AddingNodesException; import org.ow2.proactive.resourcemanager.exception.NotConnectedException; import org.ow2.proactive.resourcemanager.frontend.RMConnection; import org.ow2.proactive.resourcemanager.frontend.ResourceManager; import org.ow2.proactive.resourcemanager.node.jmx.SigarExposer; import org.ow2.proactive.resourcemanager.nodesource.dataspace.DataSpaceNodeConfigurationAgent; import org.ow2.proactive.utils.CookieBasedProcessTreeKiller; import org.ow2.proactive.utils.Tools; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; /** * This class is responsible for creating a local node. You can define different settings to * register the node to an appropriate Resource Manager, ping it... * * @author ProActive team */ public class RMNodeStarter { protected Credentials credentials = null; protected String rmURL = null; protected String nodeSourceName = null; // While logger is not configured and it not set with sys properties, use Console logger static { if (System.getProperty(CentralPAPropertyRepository.LOG4J.getName()) == null) { Logger.getRootLogger().getLoggerRepository().resetConfiguration(); BasicConfigurator.configure(new ConsoleAppender(new PatternLayout("%m%n"))); Logger.getRootLogger().setLevel(Level.INFO); } } static final Logger logger = Logger.getLogger(RMNodeStarter.class); /** Prefix for temp files that store nodes URL */ private static final String URL_TMPFILE_PREFIX = "PA-AGENT_URL"; /** Name of the java property to set the rank */ public final static String RANK_PROP_NAME = "proactive.agent.rank"; /** Name of the java property to set the data spaces configuration status */ public final static String DATASPACES_STATUS_PROP_NAME = "proactive.dataspaces.status"; /** Name of the node property that stores the Sigar JMX connection URL*/ public static final String JMX_URL = "proactive.node.jmx.sigar."; /** If this property is added to node properties then this * node will be provides for * computations only if criteria have the same access token. * */ public static final String NODE_ACCESS_TOKEN = "proactive.node.access.token"; private static final int DEFAULT_NODE_AVAILABILITY_REPORT_TIMEOUT_DELAY = 5000; // in ms private static final String NODE_AVAILABILITY_REPORT_TIMEOUT_DELAY_PROP_NAME = "proactive.node.availability.reporting.timeout"; /** * The maximum time to wait in milliseconds for retrieving the answer * for the node availability report that is pushed periodically. */ private int nodeAvailabilityReportTimeoutDelay = DEFAULT_NODE_AVAILABILITY_REPORT_TIMEOUT_DELAY; /** * The starter will try to connect to the Resource Manager before killing * itself that means that it will try to connect during * WAIT_ON_JOIN_TIMEOUT_IN_MS milliseconds */ private static int WAIT_ON_JOIN_TIMEOUT_IN_MS = 60000; /** to inform that the user supplied a value from the command line for the join rm timeout */ private static boolean WAIT_ON_JOIN_TIMEOUT_IN_MS_USER_SUPPLIED = false; /** Name of the java property to set the timeout value used to join the resource manager */ public final static String WAIT_ON_JOIN_PROP_NAME = "proactive.node.joinrm.timeout"; /** * The ping delay used in RMPinger that pings the RM and exists if the * Resource Manager is down */ private static long PING_DELAY_IN_MS = 30000; /** to inform that the user supplied a value from the command line for the ping */ private static boolean PING_DELAY_IN_MS_USER_SUPPLIED = false; /** Name of the java property to set the node -> rm ping frequency value */ public final static String PING_DELAY_PROP_NAME = "proactive.node.ping.delay"; /** The number of attempts to add the local node to the RM before quitting */ private static int NB_OF_ADD_NODE_ATTEMPTS = 10; /** to inform that the user supplied a value from the command line for the number of "add" attempts */ private static boolean NB_OF_ADD_NODE_ATTEMPTS_USER_SUPPLIED = false; /** Name of the java property to set the number of attempts performed to add a node to the resource manager */ public final static String NB_OF_ADD_NODE_ATTEMPTS_PROP_NAME = "proactive.node.add.attempts"; /** The number of attempts to reconnect the node to the RM before quitting (interval between each attempt is * given by {@link #PING_DELAY_IN_MS}) */ protected static int NB_OF_RECONNECTION_ATTEMPTS = 2 * 5; // so 5 minutes by default public final static String SECONDS_TASK_CLEANUP_TIMEOUT_PROP_NAME = "proactive.node.task.cleanup.time"; public final static String SECONDS_TASK_CLEANUP_TIMEOUT_PROP_NAME_PROACTIVE_PROGRAMMING = "proactive.process.builder.cleanup.time.seconds"; /** Name of the java property to set the number of attempts performed to add a node to the resource manager */ public final static String NB_OF_RECONNECTION_ATTEMPTS_PROP_NAME = "proactive.node.reconnection.attempts"; /** The delay, in millis, between two attempts to add a node */ private static int ADD_NODE_ATTEMPTS_DELAY_IN_MS = 5000; /** to inform that the user supplied a value from the command line for the delay between two add attempts*/ private static boolean ADD_NODE_ATTEMPTS_DELAY_IN_MS_USER_SUPPLIED = false; /** Name of the java property to set the delay between two attempts performed to add a node to the resource manager */ public final static String ADD_NODE_ATTEMPTS_DELAY_PROP_NAME = "proactive.node.add.delay"; /** Name of the java property to set the node source name */ public final static String NODESOURCE_PROP_NAME = "proactive.node.nodesource"; private int discoveryTimeoutInMs = 3 * 1000; public final static String DISCOVERY_TIMEOUT_IN_MS_NAME = "proactive.node.discovery.timeout"; private int discoveryPort = 64739; public final static String DISCOVERY_PORT_NAME = "proactive.node.discovery.port"; private int workers = 1; public final static String NUMBER_OF_WORKERS_PROPERTY_NAME = "proactive.node.workers"; // the rank of this node private int rank; // if true, previous nodes with different URLs are removed from the RM private boolean removePrevious; private boolean disabledMonitoring = false; private int numberOfReconnectionAttemptsLeft; private static final long DATASPACE_CLOSE_TIMEOUT = 3 * 1000; // seconds static final char OPTION_CREDENTIAL_FILE = 'f'; static final char OPTION_CREDENTIAL_ENV = 'e'; static final char OPTION_CREDENTIAL_VAL = 'v'; static final char OPTION_RM_URL = 'r'; static final char OPTION_NODE_NAME = 'n'; static final char OPTION_SOURCE_NAME = 's'; private static final char OPTION_PING_DELAY = 'p'; private static final String OPTION_AVAILABILITY_REPORT_TIMEOUT = "art"; private static final char OPTION_ADD_NODE_ATTEMPTS = 'a'; private static final char OPTION_ADD_NODE_ATTEMPTS_DELAY = 'd'; private static final String OPTION_WAIT_AND_JOIN_TIMEOUT = "wt"; static final String OPTION_WORKERS = "w"; private static final String OPTION_DISCOVERY_PORT = "dp"; private static final String OPTION_DISCOVERY_TIMEOUT = "dt"; private static final char OPTION_HELP = 'h'; private static final String OPTION_DISABLE_MONITORING = "dm"; public RMNodeStarter() { } /** * Fills the command line options. * @param options the options to fill */ protected void fillOptions(final Options options) { // The path to the file that contains the credential final Option credentialFile = new Option(Character.toString(OPTION_CREDENTIAL_FILE), "credentialFile", true, "path to file that contains the credential"); credentialFile.setRequired(false); credentialFile.setArgName("path"); options.addOption(credentialFile); // The credential passed as environment variable final Option credentialEnv = new Option(Character.toString(OPTION_CREDENTIAL_ENV), "credentialEnv", true, "name of the environment variable that contains the credential"); credentialEnv.setRequired(false); credentialEnv.setArgName("name"); options.addOption(credentialEnv); // The credential passed as value final Option credVal = new Option(Character.toString(OPTION_CREDENTIAL_VAL), "credentialVal", true, "explicit value of the credential"); credVal.setRequired(false); credVal.setArgName("credential"); options.addOption(credVal); // The url of the resource manager final Option rmURL = new Option(Character.toString(OPTION_RM_URL), "rmURL", true, "URL of the resource manager. If no URL is provided, the node won't register."); rmURL.setRequired(false); rmURL.setArgName("url"); options.addOption(rmURL); // The node name final Option nodeName = new Option(Character.toString(OPTION_NODE_NAME), "nodeName", true, "node name (default is hostname_pid)"); nodeName.setRequired(false); nodeName.setArgName("name"); options.addOption(nodeName); // The node source name final Option sourceName = new Option(Character.toString(OPTION_SOURCE_NAME), "sourceName", true, "node source name"); sourceName.setRequired(false); sourceName.setArgName("name"); options.addOption(sourceName); // The wait on join timeout in millis final Option waitOnJoinTimeout = new Option(OPTION_WAIT_AND_JOIN_TIMEOUT, "waitOnJoinTimeout", true, "wait on join the resource manager timeout in millis (default is " + WAIT_ON_JOIN_TIMEOUT_IN_MS + ")"); waitOnJoinTimeout.setRequired(false); waitOnJoinTimeout.setArgName("millis"); options.addOption(waitOnJoinTimeout); // The ping delay in millis final Option pingDelay = new Option(Character.toString(OPTION_PING_DELAY), "pingDelay", true, "ping delay in millis used by RMPinger thread that calls System.exit(1) if the resource manager is down (default is " + PING_DELAY_IN_MS + "). A null or negative frequency means no ping at all."); pingDelay.setRequired(false); pingDelay.setArgName("millis"); options.addOption(pingDelay); // The number of attempts option final Option addNodeAttempts = new Option(Character.toString(OPTION_ADD_NODE_ATTEMPTS), "addNodeAttempts", true, "number of attempts to add the node(s) to the resource manager. Default is " + NB_OF_ADD_NODE_ATTEMPTS + "). When 0 is specified node(s) remains alive without " + "trying to add itself to the RM. Otherwise the process is terminated when number " + "of attempts exceeded."); addNodeAttempts.setRequired(false); addNodeAttempts.setArgName("number"); options.addOption(addNodeAttempts); // The delay between attempts option final Option addNodeAttemptsDelay = new Option(Character.toString(OPTION_ADD_NODE_ATTEMPTS_DELAY), "addNodeAttemptsDelay", true, "delay in millis between attempts to add the node(s) to the resource manager (default is " + ADD_NODE_ATTEMPTS_DELAY_IN_MS + ")"); addNodeAttemptsDelay.setRequired(false); addNodeAttemptsDelay.setArgName("millis"); options.addOption(addNodeAttemptsDelay); // The discovery port final Option discoveryPort = new Option(OPTION_DISCOVERY_PORT, "discoveryPort", true, "port to use for RM discovery (default is " + this.discoveryPort + ")"); discoveryPort.setRequired(false); options.addOption(discoveryPort); // The discovery timeout final Option discoveryTimeout = new Option(OPTION_DISCOVERY_TIMEOUT, "discoveryTimeout", true, "timeout to use for RM discovery (default is " + discoveryTimeoutInMs + "ms)"); discoveryTimeout.setRequired(false); options.addOption(discoveryTimeout); // The number of workers final Option workers = new Option(OPTION_WORKERS, "workers", true, "Number of workers, i.e number of tasks that can be executed in parallel on this node (default is 1). If no value specified, number of cores."); workers.setRequired(false); workers.setOptionalArg(true); options.addOption(workers); final Option availabilityReportTimeout = new Option(OPTION_AVAILABILITY_REPORT_TIMEOUT, "availabilityReportTimeout", true, "The maximum time to wait in milliseconds for retrieving the answer for the node availability report that is pushed periodically."); availabilityReportTimeout.setRequired(false); availabilityReportTimeout.setArgName("timeInMilliseconds"); options.addOption(availabilityReportTimeout); // Displays the help final Option help = new Option(Character.toString(OPTION_HELP), "help", false, "to display this help"); help.setRequired(false); options.addOption(help); // Disable monitoring final Option monitorOption = new Option(OPTION_DISABLE_MONITORING, "disableMonitoring", false, "to disable JMX node monitoring functionality"); monitorOption.setRequired(false); options.addOption(monitorOption); } /** * Creates a new instance of this class and calls registersInRm method. * @param args The arguments needed to join the Resource Manager */ public static void main(String[] args) { try { args = JVMPropertiesPreloader.overrideJVMProperties(args); CookieBasedProcessTreeKiller.registerKillChildProcessesOnShutdown("node"); RMNodeStarter passiveStarter = new RMNodeStarter(); String baseNodeName = passiveStarter.configure(args); passiveStarter.createNodesAndConnect(baseNodeName); } catch (Throwable t) { System.err.println("A major problem occurred when trying to start a node and register it into the Resource Manager, see the stacktrace below"); // Fix for SCHEDULING-1588 if (t instanceof java.lang.NoClassDefFoundError) { System.err.println("Unable to load a class definition, maybe the classpath is not accessible"); } t.printStackTrace(System.err); System.exit(-2); } } protected String configure(final String args[]) { configureSecurityManager(); configureRMAndProActiveHomes(); configureProActiveDefaultConfigurationFile(); loadSigarIfRunningWithOneJar(); String nodeName = parseCommandLine(args); configureLogging(nodeName); logger.info("Using ProActive configuration file : " + System.getProperty(CentralPAPropertyRepository.PA_CONFIGURATION_FILE.getName())); selectNetworkInterface(); readAndSetTheRank(); return nodeName; } public boolean createNodesAndConnect(final String nodeName) { Map<String, Node> nodes = createNodes(nodeName); Tools.logAvailableScriptEngines(logger); if (nodeSourceName != null && nodeSourceName.length() > 0) { // setting system the property with node source name System.setProperty(NODESOURCE_PROP_NAME, nodeSourceName); } if (rmURL == null) { rmURL = tryBroadcastDiscoveryOrExit(); } connectToResourceManager(nodeName, nodes); return true; } public static List<String> getWorkersNodeNames(String baseNodeName, int nbWorkers) { List<String> createdNodeNames = new ArrayList<>(nbWorkers); for (int nodeIndex = 0; nodeIndex < nbWorkers; nodeIndex++) { String indexedNodeName = baseNodeName; if (nbWorkers > 1) { indexedNodeName += "_" + nodeIndex; } createdNodeNames.add(indexedNodeName); } return createdNodeNames; } private Map<String, Node> createNodes(String nodeName) { Map<String, Node> nodes = new HashMap<>(workers); List<String> createdNodeNames = getWorkersNodeNames(nodeName, workers); for (int nodeIndex = 0; nodeIndex < workers; nodeIndex++) { Node node = createLocalNode(createdNodeNames.get(nodeIndex)); configureForDataSpace(node); String nodeUrl = node.getNodeInformation().getURL(); nodes.put(nodeUrl, node); logger.info("URL of node " + nodeIndex + " " + nodeUrl); } return nodes; } private String tryBroadcastDiscoveryOrExit() { try { return new BroadcastDiscoveryClient(discoveryPort).discover(discoveryTimeoutInMs); } catch (IOException e) { logger.info("No URL to connect to was specified and discovery failed, please specify a URL with -r parameter."); System.exit(ExitStatus.RM_NO_PING.exitCode); return null; } } private void connectToResourceManager(String nodeName, Map<String, Node> nodes) { ResourceManager rm = this.registerInRM(credentials, rmURL, nodeName, nodes.values()); resetReconnectionAttemptsLeft(); pingAllNodes(nodes, rm); } private void pingAllNodes(Map<String, Node> nodes, ResourceManager rm) { if (rm != null) { logger.info("Connected to the resource manager at " + rmURL); // NB_OF_ADD_NODE_ATTEMPTS is used here to disable pinging if (PING_DELAY_IN_MS > 0 && NB_OF_ADD_NODE_ATTEMPTS > 0) { while (numberOfReconnectionAttemptsLeft >= 0) { try { pingAllNodesIndefinitely(nodes, rm); } catch (NotConnectedException e) { logger.warn("Authentication issue, reconnecting to the Resource Manager"); rm = reconnectToResourceManager(); } catch (ProActiveRuntimeException e) { logger.warn("Node disconnected from the Resource Manager, reconnection in progress"); rm = reconnectToResourceManager(); } catch (Throwable e) { logger.error(ExitStatus.RM_NO_PING.description, e); } finally { try { logger.warn("Disconnected from the resource manager"); logger.warn("Node will try to reconnect in " + PING_DELAY_IN_MS + " ms"); logger.warn("Number of attempts left is " + numberOfReconnectionAttemptsLeft); numberOfReconnectionAttemptsLeft--; if (numberOfReconnectionAttemptsLeft != 0) { Thread.sleep(PING_DELAY_IN_MS); } } catch (InterruptedException ignored) { logger.debug("Ignored interrupted exception", ignored); } } } // if we are here it means we lost the connection. just exit.. logger.error(ExitStatus.RM_IS_SHUTDOWN.description); System.exit(ExitStatus.RM_IS_SHUTDOWN.exitCode); } } else { // Force system exit to bypass daemon threads logger.error(ExitStatus.RMNODE_EXIT_FORCED.description); System.exit(ExitStatus.RMNODE_EXIT_FORCED.exitCode); } } private void selectNetworkInterface() { if (rmURL != null) { try { logger.debug("Detecting a network interface to bind the node"); String networkInterface = RMConnection.getNetworkInterfaceFor(rmURL); logger.info("Node will be bounded to the following network interface " + networkInterface); CentralPAPropertyRepository.PA_NET_INTERFACE.setValue(networkInterface); } catch (Exception e) { logger.debug("Unable to detect the network interface", e); } } } private ResourceManager reconnectToResourceManager() { try { // trying to reconnect to the resource manager RMAuthentication auth = RMConnection.waitAndJoin(rmURL, WAIT_ON_JOIN_TIMEOUT_IN_MS); return auth.login(credentials); } catch (Exception ex) { logger.error(ex.getMessage(), ex); } return null; } private void pingAllNodesIndefinitely(Map<String, Node> nodes, ResourceManager rm) { while (allNodesAreAvailable(nodes, rm)) { try { if (numberOfReconnectionAttemptsLeft < NB_OF_RECONNECTION_ATTEMPTS) { logger.info("Node successfully reconnected to the resource manager"); resetReconnectionAttemptsLeft(); } Thread.sleep(PING_DELAY_IN_MS); } catch (InterruptedException e) { logger.warn("Node ping activity is interrupted", e); Thread.currentThread().interrupt(); } } } private void resetReconnectionAttemptsLeft() { numberOfReconnectionAttemptsLeft = NB_OF_RECONNECTION_ATTEMPTS; } private boolean allNodesAreAvailable(Map<String, Node> nodes, ResourceManager rm) { if (rm == null) { throw new NotConnectedException("No connection to RM"); } Set<String> unknownNodeUrls = PAFuture.getFutureValue(rm.setNodesAvailable(ImmutableSet.copyOf(nodes.keySet())), nodeAvailabilityReportTimeoutDelay); for (String unknownNodeUrl : unknownNodeUrls) { killWorkerNodeIfRemovedByUser(nodes, unknownNodeUrl); } int nodeCount = nodes.size(); if (logger.isDebugEnabled()) { logger.debug("Node count is equal to " + nodeCount); } return true; } private void killWorkerNodeIfRemovedByUser(Map<String, Node> nodes, String unknownNodeUrl) { Node node = nodes.get(unknownNodeUrl); if (node == null) { logger.warn("The RM has sent back an URL that was not published by the Node: " + unknownNodeUrl); } else { // The node URL which has been published to the RMCore is unknown. // It means the node has been removed by a user String nodeName = node.getNodeInformation().getName(); try { // Kill the local worker node node.getProActiveRuntime().killNode(nodeName); } catch (Exception e) { logger.warn("Killing the local node has failed: " + nodeName, e); } finally { nodes.remove(unknownNodeUrl); logger.info("Node " + unknownNodeUrl + " has been removed "); } } } private void configureRMAndProActiveHomes() { if (System.getProperty(PAResourceManagerProperties.RM_HOME.getKey()) == null) { System.setProperty(PAResourceManagerProperties.RM_HOME.getKey(), findSchedulerHome()); } if (System.getProperty(CentralPAPropertyRepository.PA_HOME.getName()) == null) { System.setProperty(CentralPAPropertyRepository.PA_HOME.getName(), System.getProperty(PAResourceManagerProperties.RM_HOME.getKey())); } } private void configureProActiveDefaultConfigurationFile() { if (System.getProperty(CentralPAPropertyRepository.PA_CONFIGURATION_FILE.getName()) == null) { File defaultProActiveConfiguration = new File(System.getProperty(PAResourceManagerProperties.RM_HOME.getKey()), "config/network/node.ini"); if (defaultProActiveConfiguration.exists()) { System.setProperty(CentralPAPropertyRepository.PA_CONFIGURATION_FILE.getName(), defaultProActiveConfiguration.getAbsolutePath()); } } } private void loadSigarIfRunningWithOneJar() { if (OneJar.isRunningWithOneJar()) { String nativeLibraryName = SigarLoader.getNativeLibraryName(); String nativeLibraryNameToLoad = nativeLibraryName.replace(SigarLoader.getLibraryExtension(), "") .replace(SigarLoader.getLibraryPrefix(), ""); System.loadLibrary(nativeLibraryNameToLoad); } } private void configureSecurityManager() { if (System.getProperty("java.security.policy") == null) { System.setProperty("java.security.policy", RMNodeStarter.class.getResource("/config/security.java.policy-client").toString()); Policy.getPolicy().refresh(); } } /* * Sets system properties "proactive.home" and "node.name" (used to parameterize the default * node.properties configuration file). Re-configures log4j for the new values of the properties * to * take effect. */ private static void configureLogging(String nodeName) { String proActiveHome = System.getProperty(CentralPAPropertyRepository.PA_HOME.getName()); if (proActiveHome == null) { try { proActiveHome = ProActiveRuntimeImpl.getProActiveRuntime().getProActiveHome(); } catch (ProActiveException e) { logger.debug("Cannot find proactive home using ProActiveRuntime, will use RM home as ProActive home."); proActiveHome = PAResourceManagerProperties.RM_HOME.getValueAsString(); } System.setProperty(CentralPAPropertyRepository.PA_HOME.getName(), proActiveHome); } System.setProperty("node.name", nodeName); LogManager.resetConfiguration(); String log4jConfigPropertyValue = System.getProperty(CentralPAPropertyRepository.LOG4J.getName()); // (re-)configure log4j so that system properties set above take effect if (log4jConfigPropertyValue != null) { // log4j.configuration property is set (to a URL), use its value URL url; try { url = new URL(log4jConfigPropertyValue); } catch (MalformedURLException e) { throw new RuntimeException("Malformed log4j.configuration value: " + log4jConfigPropertyValue, e); } PropertyConfigurator.configure(url); logger.info("Reconfigured log4j using " + log4jConfigPropertyValue); } else { // log4j.configuration property is not set, use default log4j configuration for node String log4jConfig = proActiveHome + File.separator + "config" + File.separator + "log" + File.separator + "node.properties"; // set log4j.configuration to stop ProActiveLogger#load from reconfiguring log4j once again if (new File(log4jConfig).exists()) { System.setProperty(CentralPAPropertyRepository.LOG4J.getName(), "file:" + log4jConfig); PropertyConfigurator.configure(log4jConfig); logger.info("Configured log4j using " + log4jConfig); } else { // use log4j config from JAR URL log4jConfigFromJar = RMNodeStarter.class.getResource("/config/log/node.properties"); System.setProperty(CentralPAPropertyRepository.LOG4J.getName(), log4jConfigFromJar.toString()); PropertyConfigurator.configure(log4jConfigFromJar); logger.info("Configured log4j using " + log4jConfigFromJar.toString()); } } } /** * Configure node for dataSpaces * * @param node the node to be configured */ private void configureForDataSpace(final Node node) { try { boolean dataspaceConfigured = RMNodeStarter.configureNodeForDataSpace(node); if (!dataspaceConfigured) { throw new NotConfiguredException("Failed to configure dataspaces, check the logs for more details"); } closeDataSpaceOnShutdown(node); node.setProperty(DATASPACES_STATUS_PROP_NAME, Boolean.TRUE.toString()); } catch (Throwable t) { logger.error("Cannot configure dataSpace", t); try { node.setProperty(DATASPACES_STATUS_PROP_NAME, getStackTraceAsString(t)); } catch (ProActiveException e) { logger.error("Cannot contact the node", e); } } } private void closeDataSpaceOnShutdown(final Node node) { Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { @Override public void run() { try { DataSpaceNodeConfigurationAgent conf = (DataSpaceNodeConfigurationAgent) PAActiveObject.newActive(DataSpaceNodeConfigurationAgent.class.getName(), null, node); BooleanWrapper closeNodeConfiguration = conf.closeNodeConfiguration(); PAFuture.waitFor(closeNodeConfiguration, DATASPACE_CLOSE_TIMEOUT); if (closeNodeConfiguration.getBooleanValue()) { logger.debug("Dataspaces are successfully closed for node " + node.getNodeInformation().getURL()); } } catch (Throwable t) { logger.debug("Cannot close data spaces configuration", t); } } })); } public static boolean configureNodeForDataSpace(Node node) throws ActiveObjectCreationException, NodeException { DataSpaceNodeConfigurationAgent nodeConfigurationAgent = (DataSpaceNodeConfigurationAgent) PAActiveObject.newActive(DataSpaceNodeConfigurationAgent.class.getName(), null, node); boolean result = nodeConfigurationAgent.configureNode(); PAActiveObject.terminateActiveObject(nodeConfigurationAgent, false); return result; } protected String fillParameters(final CommandLine cl, final Options options) { boolean printHelp = false; try { // Optional rmURL option if (cl.hasOption(OPTION_RM_URL)) { rmURL = cl.getOptionValue(OPTION_RM_URL); } // if the user doesn't provide a rm URL, we don't care about the credentials // The path to the file that contains the credential if (cl.hasOption(OPTION_CREDENTIAL_FILE)) { try { credentials = Credentials.getCredentials(cl.getOptionValue(OPTION_CREDENTIAL_FILE)); } catch (KeyException ke) { logger.error(ExitStatus.CRED_UNREADABLE.description, ke); System.exit(ExitStatus.CRED_UNREADABLE.exitCode); } // The name of the env variable that contains } else if (cl.hasOption(OPTION_CREDENTIAL_ENV)) { final String variableName = cl.getOptionValue(OPTION_CREDENTIAL_ENV); final String value = System.getenv(variableName); if (value == null) { logger.error(ExitStatus.CRED_ENVIRONMENT.description); System.exit(ExitStatus.CRED_ENVIRONMENT.exitCode); } try { credentials = Credentials.getCredentialsBase64(value.getBytes()); } catch (KeyException ke) { logger.error(ExitStatus.CRED_DECODE.description, ke); System.exit(ExitStatus.CRED_DECODE.exitCode); } // Read the credentials directly from the command-line argument } else if (cl.hasOption(OPTION_CREDENTIAL_VAL)) { final String str = cl.getOptionValue(OPTION_CREDENTIAL_VAL); try { credentials = Credentials.getCredentialsBase64(str.getBytes()); } catch (KeyException ke) { logger.error(ExitStatus.CRED_DECODE.description, ke); System.exit(ExitStatus.CRED_DECODE.exitCode); } } else { credentials = getDefaultCredentials(); } String nodeName; // Optional node name if (cl.hasOption(OPTION_NODE_NAME)) { nodeName = cl.getOptionValue(OPTION_NODE_NAME); } else { nodeName = getDefaultNodeName(); } // Optional node source name if (cl.hasOption(OPTION_SOURCE_NAME)) { nodeSourceName = cl.getOptionValue(OPTION_SOURCE_NAME); } // Optional wait on join option if (cl.hasOption(OPTION_WAIT_AND_JOIN_TIMEOUT)) { RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS = Integer.valueOf(cl.getOptionValue(OPTION_WAIT_AND_JOIN_TIMEOUT)); RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS_USER_SUPPLIED = true; } // Optional ping delay if (cl.hasOption(OPTION_PING_DELAY)) { RMNodeStarter.PING_DELAY_IN_MS = Integer.valueOf(cl.getOptionValue(OPTION_PING_DELAY)); RMNodeStarter.PING_DELAY_IN_MS_USER_SUPPLIED = true; } // Optional number of add node attempts before quitting if (cl.hasOption(OPTION_ADD_NODE_ATTEMPTS)) { RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS = Integer.valueOf(cl.getOptionValue(OPTION_ADD_NODE_ATTEMPTS)); RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS_USER_SUPPLIED = true; } // Optional delay between add node attempts if (cl.hasOption(OPTION_ADD_NODE_ATTEMPTS_DELAY)) { RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS = Integer.valueOf(cl.getOptionValue(OPTION_ADD_NODE_ATTEMPTS_DELAY)); RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS_USER_SUPPLIED = true; } setNodeAvailabilityReportTimeoutDelay(cl); // Discovery if (cl.hasOption(OPTION_DISCOVERY_PORT)) { discoveryPort = Integer.valueOf(cl.getOptionValue(OPTION_DISCOVERY_PORT)); } else if (System.getProperty(DISCOVERY_PORT_NAME) != null) { discoveryPort = Integer.valueOf(System.getProperty(DISCOVERY_PORT_NAME)); } if (cl.hasOption(OPTION_DISCOVERY_TIMEOUT)) { discoveryTimeoutInMs = Integer.valueOf(cl.getOptionValue(OPTION_DISCOVERY_TIMEOUT)); } else if (System.getProperty(DISCOVERY_TIMEOUT_IN_MS_NAME) != null) { discoveryPort = Integer.valueOf(System.getProperty(DISCOVERY_TIMEOUT_IN_MS_NAME)); } readWorkersOption(cl); // Optional help option if (cl.hasOption(OPTION_HELP)) { printHelp = true; } // Optional help option if (cl.hasOption(OPTION_DISABLE_MONITORING)) { disabledMonitoring = true; } return nodeName; } catch (Throwable t) { printHelp = true; logger.info(t.getMessage()); t.printStackTrace(System.err); System.exit(ExitStatus.FAILED_TO_LAUNCH.exitCode); } finally { if (printHelp) { // Automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); // Prints usage formatter.printHelp("java " + RMNodeStarter.class.getName(), options); System.exit(ExitStatus.OK.exitCode); } } return null; } @VisibleForTesting int getNodeAvailabilityReportTimeoutDelay() { return nodeAvailabilityReportTimeoutDelay; } @VisibleForTesting void setNodeAvailabilityReportTimeoutDelay(CommandLine cl) { String property = System.getProperty(NODE_AVAILABILITY_REPORT_TIMEOUT_DELAY_PROP_NAME); if (property != null) { try { nodeAvailabilityReportTimeoutDelay = Integer.parseInt(property); } catch (NumberFormatException e) { nodeAvailabilityReportTimeoutDelay = DEFAULT_NODE_AVAILABILITY_REPORT_TIMEOUT_DELAY; } } if (cl.hasOption(OPTION_AVAILABILITY_REPORT_TIMEOUT)) { nodeAvailabilityReportTimeoutDelay = Integer.valueOf(cl.getOptionValue(OPTION_AVAILABILITY_REPORT_TIMEOUT)); } if (logger.isTraceEnabled()) { logger.trace("Node availability report timeout delay set to " + nodeAvailabilityReportTimeoutDelay + " ms."); } } // positive integer, empty (number of available cores or 1 (default if nothing specified) private void readWorkersOption(CommandLine cl) throws Exception { try { if (cl.hasOption(OPTION_WORKERS)) { if (cl.getOptionValue(OPTION_WORKERS) == null) { workers = Runtime.getRuntime().availableProcessors(); } else { workers = Integer.valueOf(cl.getOptionValue(OPTION_WORKERS)); } } else if (System.getProperty(NUMBER_OF_WORKERS_PROPERTY_NAME) != null) { if ("".equals(System.getProperty(NUMBER_OF_WORKERS_PROPERTY_NAME))) { workers = Runtime.getRuntime().availableProcessors(); } else { workers = Integer.valueOf(System.getProperty(NUMBER_OF_WORKERS_PROPERTY_NAME)); } } else { workers = 1; } } catch (NumberFormatException e) { throw new Exception("Number of workers should be a positive integer", e); } if (workers <= 0) { throw new Exception("Number of workers should be at least 1, was " + workers); } } private String getDefaultNodeName() { try { return InetAddress.getLocalHost().getHostName().replace('.', '_') + "_" + new Sigar().getPid(); } catch (Throwable error) { logger.warn("Failed to retrieve hostname or pid to compute node name, will fallback to default value", error); return "PA-AGENT_NODE"; } } private Credentials getDefaultCredentials() { try { return Credentials.getCredentials(); } catch (KeyException fromDiskKeyException) { try { Credentials credentialsFromRMHome = Credentials.getCredentials(new File(PAResourceManagerProperties.RM_HOME.getValueAsStringOrNull(), "config/authentication/rm.cred").getAbsolutePath()); logger.info("Using default credentials from ProActive home, authenticating as user rm"); return credentialsFromRMHome; } catch (KeyException fromRMHomeKeyException) { try { Credentials credentialsFromJar = Credentials.getCredentials(RMNodeStarter.class.getResourceAsStream("/config/authentication/rm.cred")); logger.info("Using default credentials from ProActive jars, authenticating as user rm"); return credentialsFromJar; } catch (Exception fromJarKeyException) { logger.error("Failed to read credentials, from location obtained using system property, RM home or ProActive jars", fromJarKeyException); System.exit(ExitStatus.CRED_UNREADABLE.exitCode); } } } return null; } protected String parseCommandLine(String[] args) { final Options options = new Options(); //we fill int the options object, child classes can override this method //to add new options... fillOptions(options); final CommandLineParser parser = new DefaultParser(); CommandLine cl; try { cl = parser.parse(options, args); //now we update this object's fields given the options. String nodeName = fillParameters(cl, options); //check the user supplied values //performed after fillParameters to be able to override fillParameters in subclasses checkUserSuppliedParameters(); return nodeName; } catch (ParseException pe) { pe.printStackTrace(); System.exit(ExitStatus.RMNODE_PARSE_ERROR.exitCode); } return null; } /** * Checks that user has supplied parameters or override them with java properties values... */ private void checkUserSuppliedParameters() { //need an exhaustive list... //first, the number of add attempts if (!NB_OF_ADD_NODE_ATTEMPTS_USER_SUPPLIED) { String tmpNBAddString = System.getProperty(RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS_PROP_NAME); if (tmpNBAddString != null) { try { RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS = Integer.parseInt(tmpNBAddString); logger.debug("Number of add node attempts not supplied by user, using java property: " + RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS); } catch (Exception e) { logger.warn("Cannot use the value supplied by java property " + RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS_PROP_NAME + " : " + tmpNBAddString + ". Using default " + RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS); } } else { logger.debug("Using default value for the number of add node attempts: " + RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS); } } else { logger.debug("Using value supplied by user for the number of add node attempts: " + RMNodeStarter.NB_OF_ADD_NODE_ATTEMPTS); } String numberOfReconnection = System.getProperty(RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS_PROP_NAME); if (numberOfReconnection != null) { try { RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS = Integer.parseInt(numberOfReconnection); logger.debug("Number of attempts to reconnect a node to the resource manager when connection is lost: " + RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS); } catch (Exception e) { logger.warn("Cannot use the value supplied by java property " + RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS_PROP_NAME + " : " + numberOfReconnection + ". Using default " + RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS); } } else { logger.debug("Using default value for the number of reconnection attempts: " + RMNodeStarter.NB_OF_RECONNECTION_ATTEMPTS); } //the delay between two add node attempts if (!ADD_NODE_ATTEMPTS_DELAY_IN_MS_USER_SUPPLIED) { String tmpADDNodeDelay = System.getProperty(RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_PROP_NAME); if (tmpADDNodeDelay != null) { try { RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS = Integer.parseInt(tmpADDNodeDelay); logger.debug("Add node attempts delay not supplied by user, using java property: " + RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS); } catch (Exception e) { logger.warn("Cannot use the value supplied by java property " + RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_PROP_NAME + " : " + tmpADDNodeDelay + ". Using default " + RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS); } } else { logger.debug("Using default value for the add node attempts delay: " + RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS); } } else { logger.debug("Using value supplied by user for the number the add node attempts delay: " + RMNodeStarter.ADD_NODE_ATTEMPTS_DELAY_IN_MS); } //the delay of the node -> rm ping if (!PING_DELAY_IN_MS_USER_SUPPLIED) { String tmpPingDelay = System.getProperty(RMNodeStarter.PING_DELAY_PROP_NAME); if (tmpPingDelay != null) { try { RMNodeStarter.PING_DELAY_IN_MS = Integer.parseInt(tmpPingDelay); logger.debug("RM Ping delay not supplied by user, using java property: " + RMNodeStarter.PING_DELAY_IN_MS); } catch (Exception e) { logger.warn("Cannot use the value supplied by java property " + RMNodeStarter.PING_DELAY_PROP_NAME + " : " + tmpPingDelay + ". Using default " + RMNodeStarter.PING_DELAY_IN_MS); } } else { logger.debug("Using default value for the rm ping delay: " + RMNodeStarter.PING_DELAY_IN_MS); } } else { logger.debug("Using value supplied by user for the rm ping delay: " + RMNodeStarter.PING_DELAY_IN_MS); } //the "joinRM" timeout if (!WAIT_ON_JOIN_TIMEOUT_IN_MS_USER_SUPPLIED) { String tmpWait = System.getProperty(RMNodeStarter.WAIT_ON_JOIN_PROP_NAME); if (tmpWait != null) { try { RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS = Integer.parseInt(tmpWait); logger.debug("Wait on join not supplied by user, using java property: " + RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS); } catch (Exception e) { logger.warn("Cannot use the value supplied by java property " + RMNodeStarter.WAIT_ON_JOIN_PROP_NAME + " : " + tmpWait + ". Using default " + RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS); } } else { logger.debug("Using default value for the wait on join: " + RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS); } } else { logger.debug("Using value supplied by user for the wait on join timeout: " + RMNodeStarter.WAIT_ON_JOIN_TIMEOUT_IN_MS); } } private RMAuthentication joinResourceManager(String rmURL) { // Create the full url to contact the Resource Manager logger.info("Joining Resource Manager at " + rmURL); final String fullUrl = rmURL.endsWith("/") ? rmURL + RMConstants.NAME_ACTIVE_OBJECT_RMAUTHENTICATION : rmURL + "/" + RMConstants.NAME_ACTIVE_OBJECT_RMAUTHENTICATION; // Try to join the Resource Manager with a specified timeout try { RMAuthentication auth = RMConnection.waitAndJoin(fullUrl, WAIT_ON_JOIN_TIMEOUT_IN_MS); if (auth == null) { logger.error(ExitStatus.RMAUTHENTICATION_NULL.description); System.exit(ExitStatus.RMAUTHENTICATION_NULL.exitCode); } logger.info("Resource Manager joined."); return auth; } catch (Throwable t) { logger.error("Unable to join the Resource Manager at " + rmURL, t); System.exit(ExitStatus.RMNODE_ADD_ERROR.exitCode); } return null; } private ResourceManager loginToResourceManager(final Credentials credentials, final RMAuthentication auth) { try { ResourceManager rm = auth.login(credentials); if (rm == null) { logger.error(ExitStatus.RM_NULL.description); System.exit(ExitStatus.RM_NULL.exitCode); } return rm; } catch (Throwable t) { logger.error("Unable to log into the Resource Manager at " + rmURL, t); System.exit(ExitStatus.RMNODE_ADD_ERROR.exitCode); } return null; } /** * Tries to join to the Resource Manager with a specified timeout * at the given URL, logs with provided credentials and adds the local node to * the Resource Manager. Handles all errors/exceptions. */ protected ResourceManager registerInRM(final Credentials credentials, final String rmURL, final String nodeName, final Collection<Node> nodes) { RMAuthentication auth = joinResourceManager(rmURL); final ResourceManager rm = loginToResourceManager(credentials, auth); SigarExposer sigarExposer = null; if (!disabledMonitoring) { // initializing JMX server with Sigar beans sigarExposer = new SigarExposer(nodeName); final RMAuthentication rmAuth = auth; sigarExposer.boot(auth, false, new PermissionChecker() { @Override public boolean checkPermission(Credentials cred) { ResourceManager rm = null; try { rm = rmAuth.login(cred); if (NB_OF_ADD_NODE_ATTEMPTS == 0) return true; boolean isAdmin = rm.isNodeAdmin(nodes.iterator().next().getNodeInformation().getURL()) .getBooleanValue(); if (!isAdmin) { throw new SecurityException("Permission denied"); } return true; } catch (LoginException e) { throw new SecurityException(e); } finally { if (rm != null) { rm.disconnect(); } } } }); } else { logger.info("JMX monitoring is disabled."); } for (final Node node : nodes) { nodeSetJmxUrl(sigarExposer, node); addNodeToResourceManager(rmURL, node, rm); } return rm; } private void nodeSetJmxUrl(SigarExposer sigarExposer, Node node) { try { if (!disabledMonitoring) { node.setProperty(JMX_URL + JMXTransportProtocol.RMI, sigarExposer.getAddress(JMXTransportProtocol.RMI).toString()); node.setProperty(JMX_URL + JMXTransportProtocol.RO, sigarExposer.getAddress(JMXTransportProtocol.RO).toString()); } } catch (Exception e) { logger.error("", e); } } private void addNodeToResourceManager(String rmURL, Node node, ResourceManager rm) { // Add the created node to the Resource Manager with a specified // number of attempts and a timeout between each attempt boolean isNodeAdded = false; int attempts = 0; String nodeUrl = node.getNodeInformation().getURL(); String nodeName = node.getNodeInformation().getName(); logger.info("Adding node " + nodeName + " to Resource Manager."); while ((!isNodeAdded) && (attempts < NB_OF_ADD_NODE_ATTEMPTS)) { attempts++; try { if (this.nodeSourceName != null) { isNodeAdded = rm.addNode(nodeUrl, this.nodeSourceName).getBooleanValue(); } else { isNodeAdded = rm.addNode(nodeUrl).getBooleanValue(); } } catch (AddingNodesException addException) { addException.printStackTrace(); System.exit(ExitStatus.RMNODE_ADD_ERROR.exitCode); } if (isNodeAdded) { if (removePrevious) { // try to remove previous URL if different... String previousURL = this.getAndDeleteNodeURL(nodeName, rank); if (previousURL != null && !previousURL.equals(nodeUrl)) { logger.info("Different previous URL registered by this agent has been found. Remove previous registration."); rm.removeNode(previousURL, true); } // store the node URL this.storeNodeURL(nodeName, rank, nodeUrl); logger.info("Node " + nodeUrl + " added. URL is stored in " + getNodeURLFilename(nodeName, rank)); } else { logger.info("Node " + nodeUrl + " added."); } } else { // not yet registered logger.info("Attempt number " + attempts + " out of " + NB_OF_ADD_NODE_ATTEMPTS + " to add the local node to the Resource Manager at " + rmURL + " has failed."); try { Thread.sleep(ADD_NODE_ATTEMPTS_DELAY_IN_MS); } catch (InterruptedException e) { logger.info("Interrupted", e); } } } // while if (!isNodeAdded) { // if not registered logger.error("The Resource Manager was unable to add the local node " + nodeUrl + " after " + NB_OF_ADD_NODE_ATTEMPTS + " attempts. The application will exit."); System.exit(ExitStatus.RMNODE_ADD_ERROR.exitCode); } } protected void readAndSetTheRank() { String rankAsString = System.getProperty(RANK_PROP_NAME); if (rankAsString == null) { logger.debug("Rank is not set. Previous URLs will not be stored"); this.removePrevious = false; } else { try { this.rank = Integer.parseInt(rankAsString); this.removePrevious = true; logger.info("Rank is " + this.rank); } catch (Throwable e) { logger.warn("Rank cannot be read due to " + e.getMessage() + ". Previous URLs will not be stored"); this.removePrevious = false; } } } /** * Creates the node with the name given as parameter and returns it. * @param nodeName The expected name of the node * @return the newly created node. */ protected Node createLocalNode(String nodeName) { Node localNode = null; try { localNode = NodeFactory.createLocalNode(nodeName, false, null, nodeName + "vnname"); if (localNode == null) { logger.error(ExitStatus.RMNODE_NULL.description); System.exit(ExitStatus.RMNODE_NULL.exitCode); } // setting system properties to node (they will be accessible remotely) for (Object key : System.getProperties().keySet()) { localNode.setProperty(key.toString(), System.getProperty(key.toString())); } } catch (Throwable t) { logger.error("Unable to create the local node " + nodeName, t); System.exit(ExitStatus.RMNODE_ADD_ERROR.exitCode); } return localNode; } /** * Store in a temp file the current URL of the node started by the agent * @param nodeName the name of the node * @param rank the rank of the node * @param nodeURL the URL of the node */ protected void storeNodeURL(String nodeName, int rank, String nodeURL) { try { File f = new File(getNodeURLFilename(nodeName, rank)); if (f.exists()) { logger.warn("NodeURL file already exists ; delete it."); FileUtils.forceDelete(f); } BufferedWriter out = new BufferedWriter(new FileWriter(f)); out.write(nodeURL); out.write(System.lineSeparator()); out.close(); } catch (IOException e) { logger.warn("NodeURL cannot be created.", e); } } /** * Return the previous URL of this node * @param nodeName the name of the node started by the Agent * @param rank the rank of the node * @return the previous URL of this node, null if none can be found */ protected String getAndDeleteNodeURL(String nodeName, int rank) { try { File f = new File(getNodeURLFilename(nodeName, rank)); if (f.exists()) { BufferedReader in = new BufferedReader(new FileReader(f)); String read = in.readLine(); in.close(); FileUtils.deleteQuietly(f); return read; } } catch (IOException e) { e.printStackTrace(); } return null; } /** * Create the name of the temp file for storing node URL. */ private String getNodeURLFilename(String nodeName, int rank) { final String tmpDir = System.getProperty("java.io.tmpdir"); return new File(tmpDir, URL_TMPFILE_PREFIX + "_" + nodeName + "-" + rank).getAbsolutePath(); } private enum ExitStatus { OK(0, "Exit success."), //mustn't be changed, return value set in the JVM itself JVM_ERROR(1, "Problem with the Java process itself ( classpath, main method... )."), RM_NO_PING(100, "Cannot ping the Resource Manager because of a Throwable."), RM_IS_SHUTDOWN(101, "The Resource Manager has been shutdown."), CRED_UNREADABLE(200, "Cannot read the submitted credential's key."), CRED_DECODE(201, "Cannot decode credential's key from base64."), CRED_ENVIRONMENT(202, "Environment variable not set for credential but it should be."), RMNODE_NULL(300, "NodeFactory returned null as RMNode."), RMAUTHENTICATION_NULL(301, "RMAuthentication instance is null."), RM_NULL(302, "Resource Manager instance is null."), RMNODE_ADD_ERROR(303, "Was not able to add RMNode to the Resource Manager."), RMNODE_PARSE_ERROR( 304, "Problem encountered while parsing " + RMNodeStarter.class.getName() + " command line."), RMNODE_EXIT_FORCED( 305, "Was not able to add RMNode to the Resource Manager. Force system to exit to bypass daemon threads."), FAILED_TO_LAUNCH(-1, RMNodeStarter.class.getSimpleName() + " process hasn't been started at all."), UNKNOWN(-2, "Cannot determine exit status."); public final int exitCode; public final String description; private ExitStatus(int exitCode, String description) { this.exitCode = exitCode; this.description = description; } public String getDescription() { return this.description; } } }