/*******************************************************************************
* ===========================================================
* Ankush : Big Data Cluster Management Solution
* ===========================================================
*
* (C) Copyright 2014, by Impetus Technologies
*
* This is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL v3) as
* published by the Free Software Foundation;
*
* This software is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
******************************************************************************/
package com.impetus.ankush2.agent;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Semaphore;
import net.neoremind.sshxcute.core.SSHExec;
import net.neoremind.sshxcute.exception.TaskExecFailException;
import net.neoremind.sshxcute.task.CustomTask;
import net.neoremind.sshxcute.task.impl.ExecCommand;
import com.impetus.ankush.AppStoreWrapper;
import com.impetus.ankush.common.config.ConfigurationReader;
import com.impetus.ankush.common.exception.AnkushException;
import com.impetus.ankush.common.scripting.AnkushTask;
import com.impetus.ankush.common.scripting.impl.Copy;
import com.impetus.ankush.common.scripting.impl.ExecSudoCommand;
import com.impetus.ankush.common.scripting.impl.Remove;
import com.impetus.ankush.common.scripting.impl.ReplaceText;
import com.impetus.ankush.common.scripting.impl.RunInBackground;
import com.impetus.ankush.common.utils.JmxMonitoringUtil;
import com.impetus.ankush2.constant.Constant;
import com.impetus.ankush2.framework.Serviceable;
import com.impetus.ankush2.framework.config.ClusterConfig;
import com.impetus.ankush2.framework.config.ComponentConfig;
import com.impetus.ankush2.framework.config.NodeConfig;
import com.impetus.ankush2.ganglia.GangliaConstants;
import com.impetus.ankush2.logger.AnkushLogger;
import com.impetus.ankush2.utils.AnkushUtils;
public class AgentServiceManager implements Serviceable {
private String errMessage(String action) {
return "Could not " + action + " " + getComponentName()
+ " . Please view server logs for more details.";
}
/** The ankushConf Reader. */
ConfigurationReader ankushConf = AppStoreWrapper.getAnkushConfReader();
/** The compConfig */
private ComponentConfig componentConfig;
private static AnkushLogger logger = new AnkushLogger(
AgentServiceManager.class);
String componentName;
public AgentServiceManager() {
}
public AgentServiceManager(ClusterConfig clusterConfig) {
logger.setCluster(clusterConfig);
}
@Override
public String getComponentName() {
return Constant.Component.Name.AGENT;
}
@Override
public void setComponentName(String componentName) {
this.componentName = componentName;
}
@Override
public boolean startServices(ClusterConfig clusterConfig, String host,
Set<String> services) {
try {
return manageService(clusterConfig, host, services,
Constant.ServiceAction.START);
} catch (AnkushException e) {
logger.error(e.getMessage(), componentName, host, e);
} catch (Exception e) {
logger.error(errMessage("start"), componentName, host, e);
}
return false;
}
@Override
public boolean stopServices(ClusterConfig clusterConfig, String host,
Set<String> services) {
try {
return manageService(clusterConfig, host, services,
Constant.ServiceAction.STOP);
} catch (AnkushException e) {
logger.error(e.getMessage(), componentName, host, e);
} catch (Exception e) {
logger.error(errMessage("start"), componentName, host, e);
}
return false;
}
public boolean manageService(ClusterConfig clusterConfig, String host,
Set<String> services, Constant.ServiceAction action)
throws AnkushException, Exception {
try {
// setting logger
logger.setCluster(clusterConfig);
SSHExec connection = clusterConfig.getNodes().get(host)
.getConnection();
if (connection == null) {
throw new AnkushException(
Constant.Strings.ExceptionsMessage.CONNECTION_NULL_STRING);
}
if (!clusterConfig.getNodes().containsKey(host)) {
throw new AnkushException("Could not find " + host
+ " in cluster nodes. Please provide valid host.");
}
NodeConfig node = clusterConfig.getNodes().get(host);
if (action.equals(Constant.ServiceAction.START)) {
return startAgent(clusterConfig, node);
} else if (action.equals(Constant.ServiceAction.STOP)) {
return stopAgent(clusterConfig, node);
} else {
throw new AnkushException(action.toString().toLowerCase()
+ " operation not supported.");
}
} catch (AnkushException e) {
throw e;
} catch (Exception e) {
throw e;
}
}
public boolean startAgent(ClusterConfig clusterConfig, NodeConfig nodeConfig)
throws AnkushException {
boolean status = false;
try {
System.out.println("clusterConfig.getInstallationType() : "
+ clusterConfig.getInstallationType());
// Add Agent Cron entry foe Sudo only
if (clusterConfig.getInstallationType() == Constant.Cluster.InstallationType.SUDO) {
// Copying cron file
String filePathCronConf = clusterConfig.getAgentHomeDir()
+ AgentConstant.Relative_Path.CRON_CONF_FILE;
String destinationCronConfPath = AgentConstant.Strings.ETC_PATH_AGENT_CRON_CONF;
AnkushTask copyCronConf = new Copy(filePathCronConf,
destinationCronConfPath);
AnkushTask execSudo = new ExecSudoCommand(clusterConfig
.getAuthConf().getPassword(), copyCronConf.getCommand());
if (!(status = nodeConfig.getConnection().exec(execSudo).rc == 0)) {
logger.warn(
"Could not copy agent cron configuration file from "
+ filePathCronConf + " to "
+ destinationCronConfPath,
Constant.Component.Name.AGENT, nodeConfig.getHost());
}
}
// Agent start script command
String startAgent = "sh " + clusterConfig.getAgentHomeDir()
+ AgentConstant.Relative_Path.START_SCRIPT;
CustomTask task = new ExecCommand(startAgent);
if (!(status = nodeConfig.getConnection().exec(task).rc == 0)) {
logger.warn("Could not start Agent",
Constant.Component.Name.AGENT, nodeConfig.getHost());
}
// Starting jmxtrans
return startJmxTrans(clusterConfig, nodeConfig);
} catch (AnkushException e) {
throw e;
} catch (TaskExecFailException e) {
throw new AnkushException(
"Either copying Agent cron file or starting Agent fail.", e);
} catch (Exception e) {
throw new AnkushException(errMessage("start"), e);
}
}
public boolean stopAgent(ClusterConfig clusterConfig, NodeConfig nodeConfig)
throws AnkushException {
String etcCronConfPath = AgentConstant.Strings.ETC_PATH_AGENT_CRON_CONF;
try {
// Remove Agent Cron entry foe Sudo only
if (clusterConfig.getInstallationType() == Constant.Cluster.InstallationType.SUDO) {
AnkushTask removeCronFile = new Remove(etcCronConfPath);
AnkushTask execSudo = new ExecSudoCommand(clusterConfig
.getAuthConf().getPassword(),
removeCronFile.getCommand());
if (nodeConfig.getConnection().exec(execSudo).rc != 0) {
logger.warn("Could not delete agent cron configuration file from "
+ etcCronConfPath + ". Please remove it manually.");
}
}
// stopping jmxTrans
stopJmxTrans(clusterConfig, nodeConfig.getConnection(),
nodeConfig.getHost());
// Agent stop script command
String stopAgent = "sh " + clusterConfig.getAgentHomeDir()
+ AgentConstant.Relative_Path.STOP_SCRIPT;
CustomTask task = new ExecCommand(stopAgent);
if (nodeConfig.getConnection().exec(task).rc != 0) {
logger.warn("Could not stop Agent. Please stop it manually.",
Constant.Component.Name.AGENT, nodeConfig.getHost());
}
} catch (Exception e) {
logger.error(errMessage("stop"), Constant.Component.Name.AGENT,
nodeConfig.getHost(), e);
}
return true;
}
public boolean startJmxTrans(ClusterConfig clusterConfig,
NodeConfig nodeConfig) throws AnkushException {
boolean status = false;
try {
// Validating jmxtrans jar
String validateJmxtransCmd = "jps | grep ankush-jmxtrans-all.jar";
CustomTask task = new ExecCommand(validateJmxtransCmd);
if (nodeConfig.getConnection().exec(task).rc == 0) {
// stopping jmxTrans
status = stopJmxTrans(clusterConfig,
nodeConfig.getConnection(), nodeConfig.getHost());
}
// starting jmxTrans
status = startJmxTrans(clusterConfig, nodeConfig.getConnection(),
nodeConfig.getHost());
} catch (AnkushException e) {
logger.error(e.getMessage(), Constant.Component.Name.AGENT,
nodeConfig.getHost(), e);
} catch (Exception e) {
logger.error("Could not start jmxtrans.",
Constant.Component.Name.AGENT, nodeConfig.getHost(), e);
status = false;
}
return status;
}
public boolean stopJmxTrans(ClusterConfig clusterConfig,
NodeConfig nodeConfig) throws AnkushException {
try {
// stopping jmxTrans
return stopJmxTrans(clusterConfig, nodeConfig.getConnection(),
nodeConfig.getHost());
} catch (Exception e) {
throw new AnkushException("Could not stop jmxtrans");
}
}
private void validateNode(ClusterConfig clusterConfig, String host,
String action) throws AnkushException {
try {
if (clusterConfig == null || host == null || host.isEmpty()) {
throw new AnkushException(
"Either clusterConfig or host is empty or undefined.");
}
logger.setCluster(clusterConfig);
logger.info("Validating connection...", getComponentName(), host);
// get connection object
SSHExec connection = clusterConfig.getNodes().get(host)
.getConnection();
if (connection == null) {
throw new AnkushException(
Constant.Strings.ExceptionsMessage.CONNECTION_NULL_STRING);
}
} catch (AnkushException e) {
throw e;
} catch (Exception e) {
throw new AnkushException(errMessage(action));
}
}
@Override
public boolean startNode(ClusterConfig clusterConfig, String host) {
try {
validateNode(clusterConfig, host, "start");
logger.info("Starting " + getComponentName() + "...",
getComponentName(), host);
// getting services list
logger.info(
"Getting " + getComponentName() + " services on nodes.",
getComponentName(), host);
Set<String> services = clusterConfig.getNodes().get(host)
.getRoles().get(Constant.Component.Name.AGENT);
return manageService(clusterConfig, host, services,
Constant.ServiceAction.START);
} catch (AnkushException e) {
logger.error(e.getMessage(), getComponentName(), host, e);
} catch (Exception e) {
logger.error(errMessage("start"), componentName, host, e);
}
return false;
}
@Override
public boolean stopNode(ClusterConfig clusterConfig, String host) {
try {
validateNode(clusterConfig, host, "stop");
logger.info("Stopping " + getComponentName() + "...",
getComponentName(), host);
// getting services list
logger.info(
"Getting " + getComponentName() + " services on nodes.",
getComponentName(), host);
Set<String> services = clusterConfig.getNodes().get(host)
.getRoles().get(Constant.Component.Name.AGENT);
return manageService(clusterConfig, host, services,
Constant.ServiceAction.STOP);
} catch (AnkushException e) {
logger.error(e.getMessage(), getComponentName(), host, e);
} catch (Exception e) {
logger.error(errMessage("start"), componentName, host, e);
}
return false;
}
@Override
public boolean start(ClusterConfig clusterConfig) {
try {
return manageClusterServices(clusterConfig,
Constant.ServiceAction.START);
} catch (AnkushException e) {
logger.error(e.getMessage(), componentName, e);
} catch (Exception e) {
logger.error(errMessage("start"), componentName, e);
}
return false;
}
@Override
public boolean stop(ClusterConfig clusterConfig) {
try {
return manageClusterServices(clusterConfig,
Constant.ServiceAction.STOP);
} catch (AnkushException e) {
logger.error(e.getMessage(), componentName, e);
} catch (Exception e) {
logger.error(errMessage("stop"), componentName, e);
}
return false;
}
private boolean manageClusterServices(final ClusterConfig clusterConfig,
final Constant.ServiceAction action) throws AnkushException {
try {
if (clusterConfig == null) {
throw new AnkushException("Could not get clusterConfig for "
+ getComponentName());
}
logger.setCluster(clusterConfig);
// Creating semaphores
final Semaphore semaphore = new Semaphore(clusterConfig.getNodes()
.size());
// starting service on each node in cluster
for (final String host : clusterConfig.getNodes().keySet()) {
semaphore.acquire();
AppStoreWrapper.getExecutor().execute(new Runnable() {
@Override
public void run() {
switch (action) {
case START:
clusterConfig.getNodes().get(host)
.setStatus(startNode(clusterConfig, host));
break;
case STOP:
clusterConfig.getNodes().get(host)
.setStatus(stopNode(clusterConfig, host));
break;
}
if (semaphore != null) {
semaphore.release();
}
}
});
}
semaphore.acquire(clusterConfig.getNodes().size());
} catch (AnkushException e) {
throw e;
} catch (Exception e) {
throw new AnkushException(errMessage(action.toString()
.toLowerCase()));
}
return AnkushUtils.getStatus(clusterConfig.getNodes());
}
public boolean startJmxTrans(ClusterConfig clusterConfig,
SSHExec connection, String host) throws AnkushException {
// Skipping to start JMXTrans,if Cluster doesn't contain Ganglia
if (!clusterConfig.getComponents().containsKey(
Constant.Component.Name.GANGLIA)) {
return true;
}
String message = "Starting JmxTrans";
try {
logger.info(message, getComponentName(), host);
final String targetText_JmxOpts = this.ankushConf
.getStringValue("jmx.opts.targetText");
String replacementText_JmxOpts = this.ankushConf
.getStringValue("jmx.opts.replacementText");
final String jmxTransScriptFilePath = getJmxTransScriptFilePath(clusterConfig);
ComponentConfig gangliaConfig = getGangliaConfig(clusterConfig);
replacementText_JmxOpts = replacementText_JmxOpts
.replaceAll(
this.ankushConf
.getStringValue("jmxtrans.script.template.gangliamasterip"),
gangliaConfig
.getAdvanceConfStringProperty(GangliaConstants.ClusterProperties.GMETAD_HOST));
int gangliaPort;
try {
gangliaPort = gangliaConfig
.getAdvanceConfIntegerProperty(GangliaConstants.ClusterProperties.GANGLIA_PORT);
} catch (Exception e) {
gangliaPort = AppStoreWrapper.getAnkushConfReader()
.getIntValue("ganglia.port");
}
replacementText_JmxOpts = replacementText_JmxOpts
.replaceAll(
this.ankushConf
.getStringValue("jmxtrans.script.template.gangliaport"),
String.valueOf(gangliaPort));
replacementText_JmxOpts = replacementText_JmxOpts
.replaceAll(
this.ankushConf
.getStringValue("jmxtrans.script.template.privateip"),
host);
replacementText_JmxOpts = replacementText_JmxOpts
.replaceAll(
this.ankushConf
.getStringValue("jmxtrans.script.template.serveralias"),
host);
final AnkushTask jmxTask = new ReplaceText(targetText_JmxOpts,
replacementText_JmxOpts, jmxTransScriptFilePath, false,
null);
if (!connection.exec(jmxTask).isSuccess) {
logger.warn("Unable to update JMXTRANS_OPTS in JmxTrans script file");
}
// Password is set to NULL to run the script command without sudo
// option
final String command = JmxMonitoringUtil.getJmxTransCommand(
jmxTransScriptFilePath, null,
Constant.JmxTransServiceAction.START);
final AnkushTask task = new RunInBackground(command);
if (!connection.exec(task).isSuccess) {
throw new AnkushException(
"Could not start jmxtrans service for JMX monitoring.");
}
} catch (AnkushException e) {
logger.error(e.getMessage());
} catch (Exception e) {
logger.error("Could not start Jmxtrans.", e);
}
return true;
}
public boolean stopJmxTrans(ClusterConfig clusterConfig,
SSHExec connection, String host) throws AnkushException {
try {
final String command = JmxMonitoringUtil.getJmxTransCommand(
getJmxTransScriptFilePath(clusterConfig), null,
Constant.JmxTransServiceAction.STOP);
final AnkushTask task = new RunInBackground(command);
if (!connection.exec(task).isSuccess) {
logger.warn(
"Could not stop jmxtrans service for JMX monitoring.",
Constant.Component.Name.AGENT, host);
}
} catch (Exception e) {
logger.error("Could not stop Jmxtrans",
Constant.Component.Name.AGENT, host, e);
}
return true;
}
private String getJmxTransScriptFilePath(ClusterConfig clusterConfig)
throws AnkushException {
try {
String jmxInstallPath = clusterConfig.getAgentHomeDir()
+ ankushConf
.getStringValue("jmxtrans.installation.relative.path");
return (jmxInstallPath + ankushConf
.getStringValue("jmx.script.file.name"));
} catch (Exception e) {
throw new AnkushException(
"Could not get Jmxtrans script file path.");
}
}
private ComponentConfig getGangliaConfig(ClusterConfig clusterConfig)
throws AnkushException {
String errMsg = "Could not get " + Constant.Component.Name.GANGLIA
+ " details for starting JmxTrans.";
try {
if (!clusterConfig.getComponents().containsKey(
Constant.Component.Name.GANGLIA)) {
throw new AnkushException(errMsg);
}
return clusterConfig.getComponents().get(
Constant.Component.Name.GANGLIA);
} catch (AnkushException e) {
throw e;
} catch (Exception e) {
throw new AnkushException(errMsg);
}
}
@Override
public Set<String> getServiceList(ClusterConfig clusterConfig) {
// TODO Auto-generated method stub
return null;
}
@Override
public boolean startRole(ClusterConfig clusterConfig, String role) {
// TODO Auto-generated method stub
return false;
}
@Override
public boolean stopRole(ClusterConfig clusterConfig, String role) {
// TODO Auto-generated method stub
return false;
}
@Override
public String getLogDirPath(ClusterConfig clusterConfig, String host,
String role) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getLogFilesRegex(ClusterConfig clusterConfig, String host,
String role, Map<String, Object> parameters) {
// TODO Auto-generated method stub
return null;
}
}