/*
* RHQ Management Platform
* Copyright (C) 2005-2013 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.plugins.cassandra;
import static java.util.Arrays.asList;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.rhq.core.system.OperatingSystemType.WINDOWS;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import javax.management.MBeanServerConnection;
import javax.management.ObjectName;
import javax.management.remote.JMXConnector;
import javax.management.remote.JMXConnectorFactory;
import javax.management.remote.JMXServiceURL;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hyperic.sigar.OperatingSystem;
import org.hyperic.sigar.SigarException;
import org.mc4j.ems.connection.EmsConnection;
import org.mc4j.ems.connection.bean.EmsBean;
import org.mc4j.ems.connection.bean.operation.EmsOperation;
import org.yaml.snakeyaml.DumperOptions;
import org.yaml.snakeyaml.Yaml;
import org.rhq.core.domain.configuration.Configuration;
import org.rhq.core.domain.configuration.Property;
import org.rhq.core.domain.configuration.PropertyList;
import org.rhq.core.domain.configuration.PropertySimple;
import org.rhq.core.domain.measurement.AvailabilityType;
import org.rhq.core.pluginapi.inventory.ResourceComponent;
import org.rhq.core.pluginapi.inventory.ResourceContext;
import org.rhq.core.pluginapi.operation.OperationFacet;
import org.rhq.core.pluginapi.operation.OperationResult;
import org.rhq.core.pluginapi.util.ProcessExecutionUtility;
import org.rhq.core.system.ProcessExecution;
import org.rhq.core.system.ProcessExecutionResults;
import org.rhq.core.system.ProcessInfo;
import org.rhq.core.system.ProcessInfo.ProcessInfoSnapshot;
import org.rhq.core.system.SystemInfo;
import org.rhq.core.util.StringUtil;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.core.util.stream.StreamUtil;
import org.rhq.plugins.cassandra.util.KeyspaceService;
import org.rhq.plugins.cassandra.util.TakeSnapshotOperation;
import org.rhq.plugins.jmx.JMXServerComponent;
/**
* @author John Sanda
*/
public class CassandraNodeComponent extends JMXServerComponent<ResourceComponent<?>> implements OperationFacet {
private static final Log log = LogFactory.getLog(CassandraNodeComponent.class);
private String host;
private ProcessInfo processInfo;
private InetAddress hostAddress;
@SuppressWarnings("rawtypes")
@Override
public void start(ResourceContext context) throws Exception {
super.start(context);
processInfo = context.getNativeProcess();
host = context.getPluginConfiguration().getSimpleValue("host", "localhost");
try {
hostAddress = InetAddress.getByName(this.host);
} catch (UnknownHostException e) {
log.error("Unable to convert hostname[" + this.host + "] into IP address for " + context.getResourceKey(),
e);
}
};
public InetAddress getHostAddress() {
return hostAddress;
}
@Override
public void stop() {
processInfo = null;
super.stop();
}
@Override
public AvailabilityType getAvailability() {
long start = System.nanoTime();
try {
if (isStorageServiceReachable()) {
return AvailabilityType.UP;
}
return AvailabilityType.DOWN;
} finally {
long totalTimeMillis = NANOSECONDS.toMillis(System.nanoTime() - start);
if (log.isDebugEnabled()) {
log.debug("Finished availability check in " + totalTimeMillis + " ms");
}
if (totalTimeMillis > SECONDS.toMillis(5)) {
log.warn("Availability check exceeded five seconds. Total time was " + totalTimeMillis + " ms");
}
}
}
private boolean isStorageServiceReachable() {
JMXConnector connector = null;
try {
Configuration pluginConfig = getResourceContext().getPluginConfiguration();
String url = pluginConfig.getSimpleValue("connectorAddress");
JMXServiceURL serviceURL = new JMXServiceURL(url);
connector = JMXConnectorFactory.connect(serviceURL, null);
MBeanServerConnection serverConnection = connector.getMBeanServerConnection();
ObjectName storageService = new ObjectName("org.apache.cassandra.db:type=StorageService");
// query an attribute to make sure it is in fact available
serverConnection.getAttribute(storageService, "NativeTransportRunning");
return true;
} catch (Exception e) {
if (log.isDebugEnabled()) {
log.debug("Failed to make JMX connection to StorageService", e);
}
return false;
} finally {
if (connector != null) {
try {
connector.close();
} catch (IOException e) {
if (log.isDebugEnabled()) {
log.debug("An error occurred closing the JMX connector", e);
}
}
}
}
}
@Override
public OperationResult invokeOperation(String name, Configuration parameters) throws Exception {
if (name.equals("shutdown")) {
OperationResult operationResult = shutdownNode();
waitForNodeToGoDown();
return operationResult;
} else if (name.equals("start")) {
return startNode();
} else if (name.equals("restart")) {
return restartNode();
} else if (name.equals("updateSeedsList")) {
return updateSeedsList(parameters);
} else if (name.equals("takeSnapshot")) {
if (isStorageServiceReachable()) {
return new TakeSnapshotOperation(new KeyspaceService(getEmsConnection()), parameters).invoke();
} else {
OperationResult result = new OperationResult();
result.setErrorMessage("Unable to take snaphost, Storage Node is not available");
return result;
}
}
return null;
}
@SuppressWarnings("rawtypes")
protected OperationResult shutdownNode() {
ResourceContext<?> context = getResourceContext();
if (log.isInfoEnabled()) {
log.info("Starting shutdown operation on " + CassandraNodeComponent.class.getName() +
" with resource key " + context.getResourceKey());
}
EmsConnection emsConnection = getEmsConnection();
EmsBean storageService = emsConnection.getBean("org.apache.cassandra.db:type=StorageService");
Class[] emptyParams = new Class[0];
if (log.isDebugEnabled()) {
log.debug("Disabling thrift...");
}
EmsOperation operation = storageService.getOperation("stopRPCServer", emptyParams);
operation.invoke((Object[]) emptyParams);
if (log.isDebugEnabled()) {
log.debug("Disabling gossip...");
}
operation = storageService.getOperation("stopGossiping", emptyParams);
operation.invoke((Object[]) emptyParams);
if (log.isDebugEnabled()) {
log.debug("Initiating drain...");
}
operation = storageService.getOperation("drain", emptyParams);
operation.invoke((Object[]) emptyParams);
return stopNode();
}
protected OperationResult stopNode() {
ProcessInfo process = getResourceContext().getNativeProcess();
if (process == null) {
log.warn("Failed to obtain process info. It appears Cassandra is already shutdown.");
return new OperationResult("Failed to obtain process info. It appears Cassandra is already shutdown.");
}
long pid = process.getPid();
try {
process.kill("KILL");
Configuration pluginConfig = getResourceContext().getPluginConfiguration();
File basedir = new File(pluginConfig.getSimpleValue("baseDir"));
File binDir = new File(basedir, "bin");
File pidFile = new File(binDir, "cassandra.pid");
pidFile.delete();
return new OperationResult("Successfully shut down Cassandra daemon with pid " + pid);
} catch (SigarException e) {
log.warn("Failed to shut down Cassandra node with pid " + pid, e);
OperationResult failure = new OperationResult("Failed to shut down Cassandra node with pid " + pid);
failure.setErrorMessage(ThrowableUtil.getAllMessages(e));
return failure;
}
}
protected void waitForNodeToGoDown() throws InterruptedException {
if (OperatingSystem.getInstance().getName().equals(OperatingSystem.NAME_MACOSX)) {
// See this thread on VMWare forum: http://communities.vmware.com/message/2187972#2187972
// Unfortunately there is no work around for this failure on Mac OSX so the method will silently return on
// this platform.
return;
}
for (ProcessInfoSnapshot processInfoSnapshot = getProcessInfoSnapshot();; processInfoSnapshot = getProcessInfoSnapshot()) {
if (processInfoSnapshot == null || !processInfoSnapshot.isRunning()) {
// Process not found, so it died, that's fine
// OR
// Process info says process is no longer running, that's fine as well
break;
}
if (getResourceContext().getComponentInvocationContext().isInterrupted()) {
// Operation canceled or timed out
throw new InterruptedException();
}
// Process is still running, wait a second and check again
Thread.sleep(SECONDS.toMillis(2));
}
}
private ProcessInfoSnapshot getProcessInfoSnapshot() {
ProcessInfoSnapshot processInfoSnapshot = (processInfo == null) ? null : processInfo.freshSnapshot();
if (processInfoSnapshot == null || !processInfoSnapshot.isRunning()) {
processInfo = getResourceContext().getNativeProcess();
// Safe to get prior snapshot here, we've just recreated the process info instance
processInfoSnapshot = (processInfo == null) ? null : processInfo.priorSnaphot();
}
return processInfoSnapshot;
}
protected OperationResult startNode() {
Configuration pluginConfig = getResourceContext().getPluginConfiguration();
String baseDir = pluginConfig.getSimpleValue("baseDir");
File binDir = new File(baseDir, "bin");
if (!startScriptExists(binDir)) {
OperationResult failure = new OperationResult("Failed to start Cassandra daemon");
failure.setErrorMessage("Start script does not exists");
return failure;
}
ProcessExecution scriptExe = getProcessExecution(binDir);
SystemInfo systemInfo = getResourceContext().getSystemInformation();
ProcessExecutionResults results = systemInfo.executeProcess(scriptExe);
if (results.getError() == null) {
return new OperationResult("Successfully started Cassandra daemon");
} else {
OperationResult failure = new OperationResult("Failed to start Cassandra daemon");
failure.setErrorMessage(ThrowableUtil.getAllMessages(results.getError()));
return failure;
}
}
private boolean startScriptExists(File binDir) {
File file = new File(binDir, getStartScript());
return file.exists() && !file.isDirectory();
}
private ProcessExecution getProcessExecution(File binDir) {
ProcessExecution scriptExe;
if (OperatingSystem.getInstance().getName().equals(OperatingSystem.NAME_WIN32)) {
File startScript = new File(binDir, getStartScript());
scriptExe = ProcessExecutionUtility.createProcessExecution(startScript);
} else {
// On Linux, when Cassandra is started with an absolute path, the command line is too long and is truncated
// in /proc/pid/cmdline (beacuse of a long CLASSPATH made of absolute paths)
// This prevents the process from being later discovered because the process query argument criteria
// expects org.apache.cassandra.service.CassandraDaemon to be found
File startScript = new File("./" + getStartScript());
scriptExe = ProcessExecutionUtility.createProcessExecution(startScript);
scriptExe.setCheckExecutableExists(false);
}
scriptExe.setWorkingDirectory(binDir.getAbsolutePath());
scriptExe.addArguments(asList("-p", "cassandra.pid"));
return scriptExe;
}
protected OperationResult restartNode() {
OperationResult result = shutdownNode();
if (result.getErrorMessage() == null) {
result = startNode();
}
return result;
}
protected OperationResult updateSeedsList(Configuration params) {
PropertyList list = params.getList("seedsList");
List<String> addresses = getAddresses(list);
OperationResult result = new OperationResult();
try {
updateSeedsList(addresses);
} catch (Exception e) {
log.error("An error occurred while updating the seeds list property", e);
Throwable rootCause = ThrowableUtil.getRootCause(e);
result.setErrorMessage(ThrowableUtil.getStackAsString(rootCause));
}
return result;
}
protected List<String> getAddresses(PropertyList seedsList) {
List<String> addresses = new ArrayList<String>();
for (Property property : seedsList.getList()) {
PropertySimple simple = (PropertySimple) property;
addresses.add(simple.getStringValue());
}
return addresses;
}
@SuppressWarnings({ "rawtypes", "unchecked" })
protected void updateSeedsList(List<String> seeds) throws IOException {
ResourceContext<?> context = getResourceContext();
Configuration pluginConfig = context.getPluginConfiguration();
String yamlProp = pluginConfig.getSimpleValue("yamlConfiguration");
if (yamlProp == null || yamlProp.isEmpty()) {
throw new IllegalStateException("Plugin configuration property [yamlConfiguration] is undefined. This " +
"property must specify be set and specify the location of cassandra.yaml in order to complete " +
"this operation");
}
File yamlFile = new File(yamlProp);
if (!yamlFile.exists()) {
throw new IllegalStateException("Plug configuration property [yamlConfiguration] has as its value a " +
"non-existent file.");
}
// Cassandra uses strong typing when reading and parsing cassandra.yaml. The
// document is parsed into a org.apache.cassandra.config.Config object. I tried
// using the config classes but ran into a couple different problems. When writing
// the config back out to cassandra.yaml, the generated yaml is not correct for the
// seed_provider property. The snakeyaml parser cannot even load the document
// because the SeedProviderDef class cannot be instantiated since it does not define
// a no-args constructor. Once I fixed that, I still was not able to get past the
// problems with the yaml generated for the seed_provider provider. Subsequent reads
// of cassandra.yaml would result in parsing errors. Given these problems, I decided
// to go with the untyped approach for updating cassandra.yaml for now.
//
// jsanda
DumperOptions options = new DumperOptions();
options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
Yaml yaml = new Yaml(options);
Map cassandraConfig = (Map) yaml.load(new FileInputStream(yamlFile));
List seedProviderList = (List) cassandraConfig.get("seed_provider");
Map seedProvider = (Map) seedProviderList.get(0);
List paramsList = (List) seedProvider.get("parameters");
Map params = (Map) paramsList.get(0);
params.put("seeds", StringUtil.listToString(seeds));
// create a backup of the configuration file in preparation of writing out the changes
File yamlFileBackup = new File(yamlProp + ".bak" + new Date().getTime());
StreamUtil.copy(new FileInputStream(yamlFile), new FileOutputStream(yamlFileBackup), true);
if (!yamlFile.delete()) {
String msg = "Failed to delete [" + yamlFile + "] in preparation of writing updated configuration. The " +
"changes will be aborted.";
log.error(msg);
deleteYamlBackupFile(yamlFileBackup);
throw new IOException(msg);
}
FileWriter writer = new FileWriter(yamlFile);
try {
yaml.dump(cassandraConfig, writer);
deleteYamlBackupFile(yamlFileBackup);
} catch (Exception e) {
log.error("An error occurred while trying to write the updated configuration back to " + yamlFile, e);
log.error("Reverting changes to " + yamlFile);
if (yamlFile.delete()) {
StreamUtil.copy(new FileInputStream(yamlFileBackup), new FileOutputStream(yamlFile));
deleteYamlBackupFile(yamlFileBackup);
} else {
String msg = "Failed updates to " + yamlFile.getName() + " cannot be rolled back. The file cannot be " +
"deleted. " + yamlFile + " should be replaced by " + yamlFileBackup;
log.error(msg);
throw new IOException(msg);
}
} finally {
writer.close();
}
}
private void deleteYamlBackupFile(File yamlBackup) {
if (!yamlBackup.delete()) {
log.warn("Failed to delete Cassandra configuration backup file [" + yamlBackup + "]. This file " +
"should be deleted.");
}
}
private String getStartScript() {
ResourceContext<?> context = getResourceContext();
SystemInfo systemInfo = context.getSystemInformation();
if (systemInfo.getOperatingSystemType() == WINDOWS) {
return "cassandra.bat";
} else {
return "cassandra";
}
}
public String getHost() {
return host;
}
}