/* * RHQ Management Platform * Copyright (C) 2005-2013 Red Hat, Inc. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package org.rhq.cassandra; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Set; import javax.management.MBeanServerConnection; import javax.management.ObjectName; import javax.management.remote.JMXConnector; import javax.management.remote.JMXConnectorFactory; import javax.management.remote.JMXServiceURL; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * This class provides operations to ensure a cluster is initialized and in a consistent * state. It does not offer functionality for initializing a cluster but rather to make * sure that nodes have started up and are accepting client connections for example. * * @author John Sanda * @author Jirka Kremser */ public final class ClusterInitService { private final Log log = LogFactory.getLog(ClusterInitService.class); private static final String JMX_CONNECTION_STRING = "service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi"; /** * Sleep; if interrupted, throw a RuntimeException. */ private static void sleep(long time) { try { Thread.sleep(time); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); throw new RuntimeException(ex); } } /** * Pings the storage nodes to verify if they are available and native transport * is running. * * @param storageNodes storage node addresses * @param jmxPorts JMX ports * @param numHosts minimum number of active hosts * * @return [true] cluster available with at least minimum number of hosts available, [false] otherwise */ public boolean ping(String[] storageNodes, int[] jmxPorts, int numHosts) { int connections = 0; long sleep = 100; for (int index = 0; index < jmxPorts.length; index++) { try { boolean isNativeTransportRunning = this.isNativeTransportRunning(storageNodes[index], jmxPorts[index]); if (isNativeTransportRunning) { ++connections; } if (connections == numHosts) { return true; } } catch (Exception e) { if (log.isDebugEnabled()) { log.debug("Unable to open JMX connection on port [" + jmxPorts[index] + "] to cassandra node [" + storageNodes[index] + "]", e); } return false; } sleep(sleep); } return true; } /** * This method attempts to establish a Thrift RPC connection to each host. If the * connection fails, the host is retried after going through the other, remaining * hosts. A runtime exception will be thrown after 10 failed retries. * <br/><br/> * After connecting to all nodes, this method will then sleep for a fixed delay. * See {@link #waitForClusterToStart(int, java.util.List, int)} for details. * @param storageNodes The cluster nodes to which a connection should be made * @param jmxPorts JMX port for each cluster node address */ public void waitForClusterToStart(String[] storageNodes, int jmxPorts[]) { waitForClusterToStart(storageNodes, jmxPorts, storageNodes.length, 10); } /** * This method attempts to establish a Thrift RPC connection to each host for the * number specified. In other words, if there are four hosts and <code>numHosts</code> * is 2, this method will block only until it can connect to two of the hosts. If the * connection fails, the host is retried after going through the other, remaining * hosts. * <br/><br/> * After connecting to all cluster nodes, this method will sleep for 10 seconds * before returning. This is to give the cluster a chance to create the system auth * schema and to create the cassandra super user. Cassandra has a hard-coded delay of * 10 sceonds before it creates the super user, which means the rhq schema cannot be * created before that. * @param numHosts The number of hosts to which a successful connection has to be made * before returning. * @param retries The number of times to retry connecting. A runtime exception will be * thrown when the number of failed connections exceeds this value. */ public void waitForClusterToStart(String[] storageNodes, int jmxPorts[], int numHosts, int retries) { waitForClusterToStart(storageNodes, jmxPorts, numHosts, 250, retries, 1); } /** * This method attempts to establish a Thrift RPC connection to each host for the * number specified. In other words, if there are four hosts and <code>numHosts</code> * is 2, this method will block only until it can connect to two of the hosts. If the * connection fails, the host is retried after going through the other, remaining * hosts. * <br/><br/> * After connecting to all cluster nodes, this method will sleep for 10 seconds * before returning. This is to give the cluster a chance to create the system auth * schema and to create the cassandra super user. Cassandra has a hard-coded delay of * 10 sceonds before it creates the super user, which means the rhq schema cannot be * created before that. * @param numHosts The number of hosts to which a successful connection has to be made * before returning. * @param delay The amount of time wait between attempts to make a connection * @param retries The number of times to retry connecting. A runtime exception will be * thrown when the number of failed connections exceeds this value. * @param initialWait The amount of seconds before first try. */ public void waitForClusterToStart(String[] storageNodes, int jmxPorts[], int numHosts, long delay, int retries, int initialWait) { if (initialWait > 0) { if (log.isDebugEnabled()) { log.debug("Waiting before JMX calls to the storage nodes for " + initialWait + " seconds..."); } sleep(initialWait * 1000); } int connections = 0; int failedConnections = 0; Queue<Integer> queue = new LinkedList<Integer>(); for (int index = 0; index < storageNodes.length; index++) { queue.add(index); } Integer storageNodeIndex = queue.poll(); while (storageNodeIndex != null) { if (failedConnections >= retries) { throw new RuntimeException("Unable to verify that cluster nodes have started after " + failedConnections + " failed attempts"); } try { boolean isNativeTransportRunning = isNativeTransportRunning(storageNodes[storageNodeIndex], jmxPorts[storageNodeIndex]); if (log.isDebugEnabled() && isNativeTransportRunning) { log.debug("Successfully connected to cassandra node [" + storageNodes[storageNodeIndex] + "]"); } if (isNativeTransportRunning) { ++connections; } else { queue.offer(storageNodeIndex); } if (connections == numHosts) { if (log.isDebugEnabled()) { log.debug("Successdully connected to all nodes. Sleeping for 10 seconds to allow for the " + "cassandra superuser set up to complete."); } sleep(10 * 1000); return; } } catch (Exception e) { ++failedConnections; queue.offer(storageNodeIndex); if (log.isDebugEnabled()) { log.debug("Unable to open JMX connection on port [" + jmxPorts[storageNodeIndex] + "] to cassandra node [" + storageNodes[storageNodeIndex] + "].", e); } else if (log.isInfoEnabled()) { log.debug("Unable to open connection to cassandra node."); } } sleep(delay); storageNodeIndex = queue.poll(); } } /** * Waits for the cluster to reach schema agreement. During cluster initialization * before and while schema changes propagate throughout the cluster, there could be * multiple schema versions found among nodes. Schema agreement is reached when there * is a single schema version and all nodes are on that version. * * @param hosts The cluster nodes */ public void waitForSchemaAgreement(String[] storageNodes, int[] jmxPorts) throws Exception { if (storageNodes == null || storageNodes.length == 0) { return; } long sleep = 100L; boolean schemaInAgreement = false; while (!schemaInAgreement) { Set<String> schemaVersions = new HashSet<String>(); for (int index = 0; index < storageNodes.length; index++) { String otherSchchemaVersion = getSchemaVersionForNode(storageNodes[index], jmxPorts[index]); if (otherSchchemaVersion != null) { schemaVersions.add(otherSchchemaVersion); } } if (schemaVersions.size() > 1) { if (log.isInfoEnabled()) { log.info("Schema agreement has not been reached. Found " + schemaVersions.size() + " schema versions"); } if (log.isDebugEnabled()) { log.debug("Found the following schema versions: " + schemaVersions); } sleep(sleep); } else { String schemaVersion = schemaVersions.iterator().next(); if (schemaVersion != null) { schemaInAgreement = true; } else { if (log.isInfoEnabled()) { log.info("Schema agreement has not been reached. Unable to get the schema version from cassandra nodes [" + storageNodes + "]"); } sleep(sleep); } } } } public boolean isNativeTransportRunning(String storageNode, int jmxPort) throws Exception { Boolean nativeTransportRunning = false; String url = getJMXConnectionURL(storageNode, jmxPort); JMXServiceURL serviceURL = new JMXServiceURL(url); Map<String, String> env = new HashMap<String, String>(); JMXConnector connector = null; try { connector = JMXConnectorFactory.connect(serviceURL, env); MBeanServerConnection serverConnection = connector.getMBeanServerConnection(); ObjectName storageService = new ObjectName("org.apache.cassandra.db:type=StorageService"); String attribute = "NativeTransportRunning"; try { nativeTransportRunning = (Boolean) serverConnection.getAttribute(storageService, attribute); } catch (Exception e) { // It is ok to just catch and log exceptions here particularly in an integration // test environment where we could potentially try to do the JMX query before // Cassandra is fully initialized. We can query StorageService before the native // transport server is initialized which will result in Cassandra throwing a NPE. // We do not want propagate that exception because it is just a matter of waiting // for Cassandra to finish initializing. if (log.isDebugEnabled()) { log.debug("Failed to read attribute [" + attribute + "] from " + storageService, e); } else { log.info("Faied to read attribute [" + attribute + "] from " + storageService + ": " + e.getMessage()); } } } finally { if (connector != null) { connector.close(); } } return nativeTransportRunning; } private String getSchemaVersionForNode(String storageNode, int jmxPort) throws Exception { String url = this.getJMXConnectionURL(storageNode, jmxPort); JMXServiceURL serviceURL = new JMXServiceURL(url); Map<String, String> env = new HashMap<String, String>(); JMXConnector connector = null; try { connector = JMXConnectorFactory.connect(serviceURL, env); MBeanServerConnection serverConnection = connector.getMBeanServerConnection(); ObjectName storageService = new ObjectName("org.apache.cassandra.db:type=StorageService"); String attribute = "SchemaVersion"; try { return (String) serverConnection.getAttribute(storageService, attribute); } catch (Exception e) { // It is ok to just catch and log exceptions here particularly in an integration // test environment where we could potentially try to do the JMX query before // Cassandra is fully initialized. We can query StorageService before the native // transport server is initialized which will result in Cassandra throwing a NPE. // We do not want propagate that exception because it is just a matter of waiting // for Cassandra to finish initializing. if (log.isDebugEnabled()) { log.debug("Failed to read attribute [" + attribute + "] from " + storageService, e); } else { log.info("Faied to read attribute [" + attribute + "] from " + storageService + ": " + e.getMessage()); } } } finally { if (connector != null) { connector.close(); } } return null; } /** * Constructs the JMX connection URL based on the node address and * JMX port * * @param address * @param jmxPort * @return */ private String getJMXConnectionURL(String address, int jmxPort) { String[] split = JMX_CONNECTION_STRING.split("%s"); return split[0] + address + split[1] + jmxPort + split[2]; } }