/*
* RHQ Management Platform
* Copyright (C) 2005-2015 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.enterprise.server.storage;
import static org.rhq.server.metrics.StorageClientConstants.DATA_CENTER;
import static org.rhq.server.metrics.StorageClientConstants.LOAD_BALANCING;
import static org.rhq.server.metrics.StorageClientConstants.REQUEST_TIMEOUT_DAMPENING;
import static org.rhq.server.metrics.StorageClientConstants.REQUEST_TOPOLOGY_CHANGE_DELTA;
import static org.rhq.server.metrics.StorageClientConstants.REQUEST_WARMUP_PERIOD;
import static org.rhq.server.metrics.StorageClientConstants.REQUEST_WARMUP_PERIOD_MAX_COUNTER;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import javax.ejb.ConcurrencyManagement;
import javax.ejb.ConcurrencyManagementType;
import javax.ejb.EJB;
import javax.ejb.LocalBean;
import javax.ejb.Singleton;
import javax.ejb.Timeout;
import javax.ejb.Timer;
import javax.ejb.TimerConfig;
import javax.ejb.TimerService;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.management.ObjectName;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.HostDistance;
import com.datastax.driver.core.Metrics;
import com.datastax.driver.core.PoolingOptions;
import com.datastax.driver.core.ProtocolOptions;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.exceptions.NoHostAvailableException;
import com.datastax.driver.core.exceptions.QueryTimeoutException;
import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy;
import com.datastax.driver.core.policies.DefaultRetryPolicy;
import com.datastax.driver.core.policies.LoadBalancingPolicy;
import com.datastax.driver.core.policies.LoggingRetryPolicy;
import com.datastax.driver.core.policies.RoundRobinPolicy;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.rhq.cassandra.schema.SchemaManager;
import org.rhq.cassandra.util.ClusterBuilder;
import org.rhq.core.domain.cloud.StorageNode;
import org.rhq.core.domain.common.composite.SystemSetting;
import org.rhq.core.domain.common.composite.SystemSettings;
import org.rhq.core.util.ObjectNameFactory;
import org.rhq.core.util.PropertiesFileUpdate;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.enterprise.server.auth.SubjectManagerLocal;
import org.rhq.enterprise.server.cloud.StorageNodeManagerLocal;
import org.rhq.enterprise.server.core.CoreServer;
import org.rhq.enterprise.server.measurement.MeasurementScheduleManagerLocal;
import org.rhq.enterprise.server.system.SystemManagerLocal;
import org.rhq.enterprise.server.util.JMXUtil;
import org.rhq.server.metrics.DateTimeService;
import org.rhq.server.metrics.MetricsConfiguration;
import org.rhq.server.metrics.MetricsConstants;
import org.rhq.server.metrics.MetricsDAO;
import org.rhq.server.metrics.MetricsServer;
import org.rhq.server.metrics.StorageSession;
/**
* @author John Sanda
*/
@Singleton
@LocalBean
@ConcurrencyManagement(ConcurrencyManagementType.BEAN)
public class StorageClientManager implements StorageClientManagerMBean{
private static final ObjectName OBJECT_NAME = ObjectNameFactory.create("rhq:service=StorageClientManager");
private static final Log LOG = LogFactory.getLog(StorageClientManager.class);
private static final String RHQ_KEYSPACE = "rhq";
@EJB
private SubjectManagerLocal subjectManager;
@EJB
private StorageNodeManagerLocal storageNodeManager;
@EJB
private SystemManagerLocal systemManager;
@EJB
private CoreServer coreServer;
@EJB
private MeasurementScheduleManagerLocal measurementScheduleManager;
@javax.annotation.Resource
private TimerService timerService;
private Cluster cluster;
private StorageSession session;
private MetricsConfiguration metricsConfiguration;
private MetricsDAO metricsDAO;
private MetricsServer metricsServer;
private boolean initialized;
private StorageClusterMonitor storageClusterMonitor;
private String cachedStorageUsername;
private String cachedStoragePassword;
private Metrics driverMetrics;
private SessionAliveChecker aliveChecker;
public void scheduleStorageSessionMaintenance() {
// each time the webapp is reloaded, we don't want to create duplicate jobs
Collection<Timer> timers = timerService.getTimers();
for (Timer existingTimer : timers) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found timer - attempting to cancel: " + existingTimer.toString());
}
try {
existingTimer.cancel();
} catch (Exception e) {
LOG.warn("Failed in attempting to cancel timer: " + existingTimer.toString());
}
}
// timer that will trigger every 90 seconds after an initial wait of 30 seconds
timerService.createIntervalTimer(30000L, 90000L, new TimerConfig(null, false));
}
/**
* If the session is not initialized then attempt to initialize it.
* If the session is initialized then verify to ensure that the
* session uses the latest set of credentials.
*
* @param timer
*/
@Timeout
public void storageSessionMaintenance(Timer timer) {
if (!initialized) {
this.init();
} else {
boolean refreshResult = this.refreshCredentialsAndSession();
if (!refreshResult) {
LOG.error("Storage session credentials not succesfully refreshed!");
} else {
LOG.debug("Storage session credentials refreshed.");
}
}
}
/**
* @return <true> if the storage subsystem is running or if a session was initialized, <false> otherwise
*/
public synchronized boolean init() {
if (initialized) {
LOG.debug("Storage client subsystem is already initialized. Skipping initialization.");
return initialized;
}
LOG.info("Initializing storage client subsystem");
try {
Session wrappedSession = createSession();
session = new StorageSession(wrappedSession);
storageClusterMonitor = new StorageClusterMonitor(session);
session.addStorageStateListener(storageClusterMonitor);
metricsConfiguration = new MetricsConfiguration();
metricsDAO = new MetricsDAO(session, metricsConfiguration);
initMetricsServer();
JMXUtil.registerMBean(this, OBJECT_NAME);
aliveChecker = new SessionAliveChecker(this);
aliveChecker.setName("StorageNode SessionAliveChecker");
aliveChecker.start();
initialized = true;
LOG.info("Storage client subsystem is now initialized");
} catch (NoHostAvailableException e) {
initialized = false;
if (cluster != null) {
cluster.shutdown();
}
LOG.warn("Storage client subsystem wasn't initialized because it wasn't possible to connect to the"
+ " storage cluster. The RHQ server is set to MAINTENANCE mode. Please start the storage cluster"
+ " as soon as possible.", e);
} catch (Throwable t) {
initialized = false;
if (cluster != null) {
cluster.shutdown();
}
LOG.warn("Storage client subsystem wasn't initialized. The RHQ server will be set to MAINTENANCE mode. Please verify "
+ " that the storage cluster is operational.", t);
}
return initialized;
}
/**
* Checks the system configuration to see if the storage credentials
* changed from when the current session got initialized.
*
* 1) If the credentials are identical then no changes are required and
* the current session is good.
* 2) If the credentials are different then create a new session with the
* new credentials and register it with the session manager.
*
* @return <true> if a new session was successfully created or no new session is required; <false> otherwise
*/
public synchronized boolean refreshCredentialsAndSession() {
if (!initialized) {
LOG.debug("Storage client subsystem not initialized. Skipping session refresh.");
return false;
}
SystemSettings settings = systemManager.getObfuscatedSystemSettings(true);
String username = settings.get(SystemSetting.STORAGE_USERNAME);
String password = settings.get(SystemSetting.STORAGE_PASSWORD);
if ((username != null && !username.equals(this.cachedStorageUsername))
|| (password != null && !password.equals(this.cachedStoragePassword))) {
return refreshSession();
}
return true;
}
/**
* Recreates the session used to connect to the storage node.
* @return true if success, false otherwise.
*/
private synchronized boolean refreshSession() {
Session wrappedSession;
try {
wrappedSession = createSession();
initialized = true;
} catch (NoHostAvailableException e) {
if (cluster != null) {
cluster.shutdown();
}
LOG.warn("Storage client subsystem wasn't initialized because it wasn't possible to connect to the"
+ " storage cluster. The RHQ server is set to MAINTENANCE mode. Please start the storage cluster"
+ " as soon as possible.", e);
return false;
}
session.registerNewSession(wrappedSession);
storageClusterMonitor = new StorageClusterMonitor(session);
session.addStorageStateListener(storageClusterMonitor);
metricsDAO.initPreparedStatements();
return true;
}
/**
* Checks storage node schema compatibility.
*
* @param username username
* @param password password
* @param storageNodes storage nodes
*/
private void checkSchemaCompability(String username, String password, List<StorageNode> storageNodes) {
String[] nodes = new String[storageNodes.size()];
for (int index = 0; index < storageNodes.size(); index++) {
nodes[index] = storageNodes.get(index).getAddress();
}
int cqlPort = storageNodes.get(0).getCqlPort();
SchemaManager schemaManager = new SchemaManager(username, password, nodes, cqlPort);
try {
schemaManager.checkCompatibility();
} catch (NoHostAvailableException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
schemaManager.shutdown();
}
}
public synchronized void shutdown() {
LOG.info("Shutting down storage client subsystem");
aliveChecker.shutdown();
if (metricsServer != null) {
metricsServer.shutdown();
metricsServer = null;
}
metricsDAO = null;
try {
if (cluster != null) {
cluster.shutdown();
}
} catch (Exception e) {
LOG.error("Failed to shutdown the cluster connection manager for the storage cluster.", e);
}
cluster = null;
session = null;
JMXUtil.unregisterMBeanQuietly(OBJECT_NAME);
initialized = false;
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public MetricsDAO getMetricsDAO() {
return metricsDAO;
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public MetricsServer getMetricsServer() {
return metricsServer;
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public StorageSession getSession() {
return session;
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public MetricsConfiguration getMetricsConfiguration() {
return metricsConfiguration;
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public boolean isClusterAvailable() {
return storageClusterMonitor != null && storageClusterMonitor.isClusterAvailable();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getAggregationBatchSize() {
return metricsServer.getAggregationManager().getBatchSize();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setAggregationBatchSize(int batchSize) {
metricsServer.getAggregationManager().setBatchSize(batchSize);
persistStorageProperty(MetricsConstants.AGGREGATION_BATCH_SIZE, Integer.toString(batchSize));
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getAggregationParallelism() {
return metricsServer.getAggregationManager().getParallelism();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setAggregationParallelism(int parallelism) {
metricsServer.getAggregationManager().setParallelism(parallelism);
persistStorageProperty(MetricsConstants.AGGREGATION_PARALLELISM, Integer.toString(parallelism));
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getAggregationWorkers() {
return metricsServer.getAggregationManager().getNumWorkers();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setAggregationWorkers(int numWorkers) {
metricsServer.getAggregationManager().setNumWorkers(numWorkers);
persistStorageProperty(MetricsConstants.AGGREGATION_WORKERS, Integer.toString(numWorkers));
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getRawDataAgeLimit() {
return metricsServer.getRawDataAgeLimit();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setRawDataAgeLimit(int ageLimit) {
metricsServer.setRawDataAgeLimit(ageLimit);
persistStorageProperty(MetricsConstants.RAW_DATA_AGE_LIMIT, Integer.toString(ageLimit));
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public double getRequestLimit() {
return session.getRequestLimit();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public double getRequestLimitTopologyDelta() {
return session.getTopologyDelta();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setRequestWarmupPeriod(int requestWarmupPeriod) {
session.setWarmupTimePeriod(requestWarmupPeriod);
persistStorageProperty(REQUEST_WARMUP_PERIOD, Integer.toString(requestWarmupPeriod));
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getRequestWarmupPeriod() {
return session.getWarmupTimePeriod();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setRequestWarmupCounterMaximum(int requestWarmupCounterMaximum) {
session.setMaxWarmupCounter(requestWarmupCounterMaximum);
persistStorageProperty(REQUEST_WARMUP_PERIOD_MAX_COUNTER, Integer.toString(requestWarmupCounterMaximum));
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getRequestWarmupCounterMaximum() {
return session.getMaxWarmupCounter();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public int getCurrentWarmupTime() {
return session.getPreviousWarmupTime();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setRequestLimitTopologyDelta(double delta) {
session.setTopologyDelta(delta);
persistStorageProperty(REQUEST_TOPOLOGY_CHANGE_DELTA, Double.toString(delta));
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public long getRequestTimeoutDampening() {
return session.getTimeoutDampening();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void setRequestTimeoutDampening(long requestTimeoutDampening) {
session.setTimeoutDampening(requestTimeoutDampening);
persistStorageProperty(REQUEST_TIMEOUT_DAMPENING, Long.toString(requestTimeoutDampening));
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public long getReadRequestTimeouts() {
return driverMetrics.getErrorMetrics().getReadTimeouts().count();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public long getWriteRequestTimeouts() {
return driverMetrics.getErrorMetrics().getWriteTimeouts().count();
}
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public long getTotalRequests() {
return driverMetrics.getRequestsTimer().count();
}
@Override
public long getRetries() {
return driverMetrics.getErrorMetrics().getRetries().count();
}
@Override
public long getConnectionErrors() {
return driverMetrics.getErrorMetrics().getConnectionErrors().count();
}
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void persistStorageProperty(String key, String value) {
if (Boolean.getBoolean("running.itests-2")) {
// When running itests-2, there is no server props file, so avoid logging a confusing exception
return;
}
PropertiesFileUpdate updater = new PropertiesFileUpdate(getServerPropsFile().getAbsolutePath());
try {
updater.update(key, value);
} catch (IOException e) {
// TODO should we propagate the exception?
LOG.warn("Failed to persist property " + key + " due to unexpected I/O error",
ThrowableUtil.getRootCause(e));
}
}
private File getServerPropsFile() {
File installDir = coreServer.getInstallDir();
File binDir = new File(installDir, "bin");
return new File(binDir, "rhq-server.properties");
}
private Session createSession() {
// Always get the creds from the DB, system props may not be up to date at install time
// the code assumes the passwords to be obfuscated, because they can also come that way from other sources
// (like property files). So let's make our lives easy and always use obfuscated passwords.
SystemSettings settings = systemManager.getObfuscatedSystemSettings(true);
this.cachedStorageUsername = settings.get(SystemSetting.STORAGE_USERNAME);
this.cachedStoragePassword = settings.get(SystemSetting.STORAGE_PASSWORD);
List<StorageNode> storageNodes = new ArrayList<StorageNode>();
for (StorageNode storageNode : storageNodeManager.getStorageNodes()) {
// We only want clustered nodes here because we won't be able to connect to
// node that is not part of the cluster. The filtering here on the operation
// mode is somewhat convservative because we could also include ADD_MAINTENANCE
// and REMOVE_MAINTENANCE, but this errors on the side of being safe. Lastly,
// if a storage node does not have a resource, then that means it was was
// deployed prior to installing the server.
if (storageNode.getOperationMode() == StorageNode.OperationMode.NORMAL
|| storageNode.getOperationMode() == StorageNode.OperationMode.MAINTENANCE
|| storageNode.getResource() == null) {
storageNodes.add(storageNode);
}
}
if (storageNodes.isEmpty()) {
throw new IllegalStateException(
"There is no storage node metadata stored in the relational database. This may have happened as a "
+ "result of running dbsetup or deleting rows from rhq_storage_node table. Please re-install the "
+ "storage node to fix this issue.");
}
checkSchemaCompability(this.cachedStorageUsername, this.cachedStoragePassword, storageNodes);
LOG.debug("Initializing session to connect to storage node cluster");
List<String> hostNames = new ArrayList<String>();
for (StorageNode storageNode : storageNodes) {
hostNames.add(storageNode.getAddress());
}
int port = storageNodes.get(0).getCqlPort();
cluster = new ClusterBuilder().addContactPoints(hostNames.toArray(new String[hostNames.size()]))
.withCredentialsObfuscated(this.cachedStorageUsername, this.cachedStoragePassword).withPort(port)
.withLoadBalancingPolicy(getLoadBalancingPolicy())
.withRetryPolicy(new LoggingRetryPolicy(DefaultRetryPolicy.INSTANCE)).withCompression(
ProtocolOptions.Compression.NONE).build();
driverMetrics = cluster.getMetrics();
PoolingOptions poolingOptions = cluster.getConfiguration().getPoolingOptions();
poolingOptions.setCoreConnectionsPerHost(HostDistance.LOCAL, Integer.parseInt(
System.getProperty("rhq.storage.client.local-connections", "24")));
poolingOptions.setCoreConnectionsPerHost(HostDistance.REMOTE, Integer.parseInt(
System.getProperty("rhq.storage.client.remote-connections", "16")));
poolingOptions.setMaxConnectionsPerHost(HostDistance.LOCAL, Integer.parseInt(
System.getProperty("rhq.storage.client.max-local-connections", "32")));
poolingOptions.setMaxConnectionsPerHost(HostDistance.REMOTE, Integer.parseInt(
System.getProperty("rhq.storage.client.max-remote-connections", "24")));
return cluster.connect(RHQ_KEYSPACE);
}
private LoadBalancingPolicy getLoadBalancingPolicy() {
String policy = System.getProperty(LOAD_BALANCING);
if (policy == null || policy.equals("RoundRobin")) {
return new RoundRobinPolicy();
}
if (policy.equals("DCAwareRoundRobin")) {
String dataCenter = System.getProperty(DATA_CENTER);
if (dataCenter == null) {
LOG.warn(policy + " was specified for " + LOAD_BALANCING + " but " + DATA_CENTER + " is undefined." +
"Reverting to RoundRobin load balancing policy.");
return new RoundRobinPolicy();
} else {
return new DCAwareRoundRobinPolicy(dataCenter);
}
}
LOG.warn(policy + " is not a supported load balancing policy. Reverting to RoundRobin load balancing policy.");
return new RoundRobinPolicy();
}
private void initMetricsServer() {
if (LOG.isDebugEnabled()) {
LOG.debug("Initializing " + MetricsServer.class.getName());
}
metricsServer = new MetricsServer();
metricsServer.setDAO(metricsDAO);
metricsServer.setConfiguration(metricsConfiguration);
DateTimeService dateTimeService = new DateTimeService();
dateTimeService.setConfiguration(metricsConfiguration);
metricsServer.setDateTimeService(dateTimeService);
metricsServer.init();
}
@Override
public int getConnectedToHosts() {
return driverMetrics.getConnectedToHosts().value();
}
@Override
public int getKnownHosts() {
return driverMetrics.getKnownHosts().value();
}
@Override
public int getOpenConnections() {
return driverMetrics.getOpenConnections().value();
}
@Override
public double getOneMinuteAvgRate() {
return driverMetrics.getRequestsTimer().oneMinuteRate();
}
@Override
public double getFiveMinuteAvgRate() {
return driverMetrics.getRequestsTimer().fiveMinuteRate();
}
@Override
public double getFifteenMinuteAvgRate() {
return driverMetrics.getRequestsTimer().fifteenMinuteRate();
}
@Override
public double getMeanRate() {
return driverMetrics.getRequestsTimer().meanRate();
}
@Override
public double getMeanLatency() {
return driverMetrics.getRequestsTimer().mean();
}
@Override
public int getQueueAvailableCapacity() {
return metricsServer.getQueueAvailableCapacity();
}
/**
* A thread that checks for liveness of the given session.
*/
private static class SessionAliveChecker extends Thread {
private static long SLEEP_TIME = 5000L;
private static long EXTENDED_SLEEP = SLEEP_TIME * 5;
private static int ALLOWED_FAILS = 2;
private final StorageClientManager storageClientManager;
private boolean alive = true;
private int fails = 0;
public SessionAliveChecker(StorageClientManager manager) {
this.storageClientManager = manager;
}
@Override
public void run() {
while(alive) {
try {
Thread.sleep(SLEEP_TIME);
try {
storageClientManager.getMetricsDAO().checkLiveness(RHQ_KEYSPACE);
if(fails > 0) {
// Query succeeded, set fails to 0
fails = 0;
}
} catch(QueryTimeoutException e) {
LOG.error("Storage node connection check timed out");
} catch(NoHostAvailableException e) {
fails++;
if(fails >= ALLOWED_FAILS) {
LOG.error("Failed to contact the storage node for live check, recreating connection session");
// We have lost the connection to the storage node, refresh and try again..
storageClientManager.refreshSession();
Thread.sleep(EXTENDED_SLEEP); // Sleep for a longer time to allow storage node to restart
}
}
} catch (InterruptedException e) {
// Alive should be false in the next iteration if shutdown was called
} catch(Exception e) {
LOG.error("AliveCheck thread run into an unexpected exception: " + e.getLocalizedMessage());
}
}
}
public void shutdown() {
this.alive = false;
this.interrupt();
}
}
}