/* * Copyright (c) 2008-2014 EMC Corporation * All Rights Reserved */ package com.emc.storageos.geo.vdccontroller.impl; import java.io.IOException; import java.net.InetAddress; import java.net.URI; import java.net.UnknownHostException; import java.util.*; import java.util.concurrent.TimeUnit; import javax.management.JMX; import javax.management.MBeanServerConnection; import javax.management.MalformedObjectNameException; import javax.management.ObjectName; import javax.management.remote.JMXConnector; import javax.management.remote.JMXConnectorFactory; import javax.management.remote.JMXServiceURL; import com.emc.storageos.coordinator.client.model.Constants; import com.emc.storageos.coordinator.client.model.Site; import com.emc.storageos.coordinator.client.service.DrUtil; import com.emc.storageos.management.jmx.recovery.DbManagerOps; import com.emc.vipr.model.sys.recovery.DbRepairStatus; import com.netflix.astyanax.Keyspace; import com.netflix.astyanax.connectionpool.exceptions.ConnectionException; import com.netflix.astyanax.ddl.KeyspaceDefinition; import org.apache.cassandra.locator.EndpointSnitchInfoMBean; import org.apache.cassandra.service.StorageServiceMBean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.emc.storageos.db.client.impl.DbClientContext; import com.emc.storageos.db.client.impl.DbClientImpl; import com.emc.storageos.db.client.model.DataObject; import com.emc.storageos.db.client.model.VirtualDataCenter; import com.emc.storageos.db.client.model.VirtualDataCenter.ConnectionStatus; import com.emc.storageos.db.client.model.VirtualDataCenter.GeoReplicationStatus; import com.emc.storageos.db.client.util.KeyspaceUtil; import com.emc.storageos.db.common.VdcUtil; import com.emc.storageos.db.server.geo.GeoInternodeAuthenticatorMBean; /** * Internal db client used for geosvc */ public class InternalDbClient extends DbClientImpl { private static final Logger log = LoggerFactory.getLogger(InternalDbClient.class); private static final int WAIT_INTERVAL_IN_SEC = 60; private static final int DB_RING_TIMEOUT = 10 * 60 * 1000; // 10 mins private static final int DB_STABLE_TIMEOUT = 30 * 60 * 1000; // 30 mins private static final int WAIT_QUERY_NODE_REPAIR_BEGIN = 5 * 60 * 1000; // query every 5min private static final int WAIT_QUERY_NODE_REPAIR_PROGRESS = 5 * 1000; // query every 5S private static String LOCALHOST = "127.0.0.1"; private DrUtil drUtil; @Override public synchronized void start() { super.start(); drUtil = new DrUtil(this.getCoordinatorClient()); } @Deprecated public String getMyVdcId() { return VdcUtil.getLocalShortVdcId(); } /** * Initialize local db context only. Geodb context will be initialized on demand */ protected void setupContext() { if (localContext != null) { setupContext(localContext, Constants.DBSVC_NAME); } } protected Keyspace getGeoKeyspace() { if (geoContext != null && !geoContext.isInitDone()) { setupContext(geoContext, Constants.GEODBSVC_NAME); } return geoContext.getKeyspace(); } @Override public <T extends DataObject> Keyspace getKeyspace(Class<T> clazz) { DbClientContext ctx = null; if (localContext == null || geoContext == null) { throw new IllegalStateException(); } ctx = KeyspaceUtil.isGlobal(clazz) ? geoContext : localContext; if (!ctx.isInitDone()) { String serviceName = ctx.equals(geoContext) ? Constants.GEODBSVC_NAME : Constants.DBSVC_NAME; log.info("Initialize db context {}", serviceName); setupContext(ctx, serviceName); } return ctx.getKeyspace(); } /** * Waits for the db instances joined in all sites */ public void waitAllSitesDbStable() { String prefix = "Waiting for DB cluster become stable on all sites ..."; log.info(prefix); DbJmxClient geoInstance = getJmxClient(LOCALHOST); // Loop all VDC List<URI> vdcIdIter = queryByType(VirtualDataCenter.class, true); for (URI vdcId : vdcIdIter) { log.info("loop db status check on {}", vdcId.toString()); VirtualDataCenter vdc = queryObject(VirtualDataCenter.class, vdcId); // filter out vdcs that are not connected in geo if (!shouldCheckDbStatus(vdc)) { log.error("ignore vdc for db status check {}", vdcId); continue; } if (vdc.getConnectionStatus() != ConnectionStatus.DISCONNECTED) { Site activeSite = drUtil.getActiveSite(vdc.getShortId()); waitDbNodesStable(geoInstance, vdc.getShortId(), activeSite.getNodeCount()); // short Id } } } /** * Check if we need wait for geodbsvc up on given vdc */ private boolean shouldCheckDbStatus(VirtualDataCenter vdc) { // local vdc is always connected with itself if (vdc.getLocal()) { return true; } // incomplete vdc record if (vdc.getShortId() == null) { log.error("invalid record in db status check {}", vdc.getId()); return false; } ConnectionStatus connStatus = vdc.getConnectionStatus(); GeoReplicationStatus repStatus = vdc.getRepStatus(); log.info("vdc connectionStatus {} repStatus {}", connStatus, repStatus); // geodb connected if (repStatus.equals(GeoReplicationStatus.REP_ALL)) { log.info("vdc {}, repStatus {}", vdc.getId(), repStatus); return true; } // connecting now, check db stable status as well if (connStatus.equals(ConnectionStatus.CONNECTING_SYNCED)) { return true; } return false; } /** * Stop gossiping on geodb of current cluster */ public void stopClusterGossiping() { DbJmxClient localClient = getJmxClient(LOCALHOST); List<String> liveNodes = localClient.getDcLiveNodes(VdcUtil.getLocalShortVdcId()); for (String ip : liveNodes) { log.info("Stop gossiping for {}", ip); ip = getEffectiveAddress(ip); try { DbJmxClient client = getJmxClient(ip); client.stopGossiping(); } catch (Exception ignored) { log.error("Ignored: stop gossiping failed on node {}", ip); } } } private DbJmxClient getJmxClient(String ip) { ip = getEffectiveAddress(ip); DbJmxClient geoInstance = null; try { geoInstance = new DbJmxClient(ip, DbJmxClient.DEFAULTGEOPORT); } catch (Exception e) { throw new IllegalStateException(String.format("Not able to connect via JMX %s", ip)); } return geoInstance; } private String getEffectiveAddress(String ip) { if ((!ip.startsWith("[")) && ip.contains(":")) { ip = "[" + ip + "]"; } return ip; } /** * Wait for num of db instances joined in a vdc. * * @param geoInstance jmx client * @param vdcShortId the short id of vdc * @param vdcHosts the total hosts of a vdc */ public void waitDbNodesStable(DbJmxClient geoInstance, String vdcShortId, int vdcHosts) { String prefix = "Waiting for DB cluster become stable for VDC with shortId ' " + vdcShortId + "'..."; log.info(prefix); long start = System.currentTimeMillis(); // quorum + 1 // it ensure at least quorum nodes data rebuild done int numHosts = (vdcHosts / 2 + 2) > vdcHosts ? vdcHosts : vdcHosts / 2 + 2; while (System.currentTimeMillis() - start < DB_STABLE_TIMEOUT) { try { List<String> liveNodes = geoInstance.getDcLiveNodes(vdcShortId); log.info("{} has live nodes of {}", vdcShortId, liveNodes); if (liveNodes.size() >= numHosts) { int i = 0; for (String host : liveNodes) { if (!geoInstance.getJoiningNodes().contains(host) && !geoInstance.getLeavingNodes().contains(host) && !geoInstance.getMovingNodes().contains(host)) { log.info("Node {} jumps to NORMAL", host); ++i; } } if (i >= numHosts) { log.info("Living nodes {} meet the requirement: {}", liveNodes.toString(), numHosts); log.info("{} Done", prefix); return; } } else { log.info("db {} not meet {} hosts yet", vdcShortId, numHosts); } TimeUnit.SECONDS.sleep(WAIT_INTERVAL_IN_SEC); } catch (InterruptedException ex) { // Ignore this exception } catch (Exception ex) { log.error("Exception checking DB cluster status", ex); } } log.info("{} Timed out", prefix); throw new IllegalStateException(String.format("%s : Timed out", prefix)); } /** * Wait for db ring rebuild finished in a vdc. * Quorum nodes owns full data, rebuild may need a long time * * @param vdcShortId the short id of vdc * @param vdcHosts total hosts of the vdc */ public void waitDbRingRebuildDone(String vdcShortId, int vdcHosts) { String prefix = new StringBuilder("Waiting for DB rebuild to finish for vdc with shortId '"). append(vdcShortId).append("' and "). append(vdcHosts).append(" hosts...").toString(); log.info(prefix); DbJmxClient geoInstance = getJmxClient(LOCALHOST); int quorum = vdcHosts / 2 + 1; long start = System.currentTimeMillis(); while (System.currentTimeMillis() - start < DB_RING_TIMEOUT) { try { List<String> fullOwners = geoInstance.getDcNodeFullOwnership(vdcShortId); if (fullOwners.size() >= quorum) { log.info("Full owner nodes: {}", fullOwners.toString()); return; } else { log.info("db {} rebuild not finish yet", vdcShortId); } TimeUnit.SECONDS.sleep(WAIT_INTERVAL_IN_SEC); } catch (InterruptedException ex) { // Ignore this exception } catch (Exception ex) { log.error("Exception checking DB cluster status", ex); } } log.info("{} Timed out", prefix); throw new IllegalStateException(String.format("%s : Timed out", prefix)); } /** * Wait for a vdc removed from current token ring. * * @param vdcShortId the short id of vdc */ public void waitVdcRemoveDone(String vdcShortId) { String prefix = String.format("Waiting for vdc removal from cassandra with shortId '%s' ...", vdcShortId); log.info(prefix); DbJmxClient geoInstance = getJmxClient(LOCALHOST); long start = System.currentTimeMillis(); while (System.currentTimeMillis() - start < DB_RING_TIMEOUT) { try { if (!geoInstance.isRingOwnedBy(vdcShortId)) { log.info("vdc remove done: {}", vdcShortId); return; } else { log.info("vdc removal {} not finish yet", vdcShortId); } TimeUnit.SECONDS.sleep(WAIT_INTERVAL_IN_SEC); } catch (InterruptedException ex) { // Ignore this exception } catch (Exception ex) { log.error("Exception checking DB cluster status", ex); } } log.info("{} Timed out", prefix); throw new IllegalStateException(String.format("%s : Timed out", prefix)); } public void removeVdcNodes(VirtualDataCenter vdc) { DbJmxClient geoInstance = getJmxClient(LOCALHOST); try { Collection<String> addrs = queryHostIPAddressesMap(vdc).values(); geoInstance.removeVdc(addrs); log.info("Hosts {} are removed", addrs); } catch (Exception e) { log.error("Failed to remove nodes in vdc {} e=", vdc.getShortId(), e); } } public Map<String, String> getGeoStrategyOptions() throws ConnectionException { Keyspace ks = getGeoKeyspace(); KeyspaceDefinition ksDef = ks.describeKeyspace(); return ksDef.getStrategyOptions(); } public void runNodeRepairBackEnd(String reconnVdcShortId) throws Exception { log.info("Node repair for reconnect operation is starting at vdc {}", reconnVdcShortId); DbJmxClient localJmxClient = getJmxClient(LOCALHOST); localJmxClient.dbMgrOps.resetRepairState(); localJmxClient.runNodeRepairBackEnd(); } public Map<String, List<String>> getGeoSchemaVersions() throws ConnectionException { Keyspace ks = getGeoKeyspace(); return ks.describeSchemaVersions(); } public void addVdcNodesToBlacklist(VirtualDataCenter vdc) { DbJmxClient localJmxClient = getJmxClient(LOCALHOST); List<String> liveNodes = localJmxClient.getDcLiveNodes(VdcUtil.getLocalShortVdcId()); for (String nodeIp : liveNodes) { DbJmxClient jmxClient = getJmxClient(nodeIp); Collection<String> addrs = queryHostIPAddressesMap(vdc).values(); jmxClient.addVdcNodesToBlacklist(addrs); log.info("Add node to blacklist {}", nodeIp); } } public void clearBlackList() { Map<String, List<String>> currBlackList = getBlacklist(); Set<Map.Entry<String, List<String>>> entrySet = currBlackList.entrySet(); for (Map.Entry<String, List<String>> entry : entrySet) { DbJmxClient jmxClient = getJmxClient(entry.getKey()); jmxClient.removeVdcNodesFromBlacklist(entry.getValue()); } } public void removeVdcNodesFromBlacklist(VirtualDataCenter vdc) { DbJmxClient localJmxClient = getJmxClient(LOCALHOST); List<String> liveNodes = localJmxClient.getDcLiveNodes(VdcUtil.getLocalShortVdcId()); for (String nodeIp : liveNodes) { DbJmxClient jmxClient = getJmxClient(nodeIp); Collection<String> addrs = queryHostIPAddressesMap(vdc).values(); jmxClient.removeVdcNodesFromBlacklist(addrs); log.info("Remove node from blacklist {}", nodeIp); } } public Map<String, List<String>> getBlacklist() { Map<String, List<String>> result = new HashMap<String, List<String>>(); DbJmxClient localJmxClient = getJmxClient(LOCALHOST); List<String> liveNodes = localJmxClient.getDcLiveNodes(VdcUtil.getLocalShortVdcId()); for (String nodeIp : liveNodes) { DbJmxClient jmxClient = getJmxClient(nodeIp); List<String> blacklist = jmxClient.getBlacklist(); if (!blacklist.isEmpty()) { result.put(nodeIp, blacklist); log.info("Get blacklist {} for node {}", blacklist, nodeIp); } } return result; } public boolean isGeoDbClientEncrypted() { return geoContext.isClientToNodeEncrypted(); } public Map<String, String> queryHostIPAddressesMap(VirtualDataCenter vdc) { Site activeSite = drUtil.getActiveSite(vdc.getShortId()); Map<String, String> hostIPv4AddressMap = activeSite.getHostIPv4AddressMap(); if (hostIPv4AddressMap != null && !hostIPv4AddressMap.isEmpty() && activeSite.isUsingIpv4()) { return hostIPv4AddressMap; } return activeSite.getHostIPv6AddressMap(); } /** * The JMX client of Cassandra */ public static class DbJmxClient { private static final String FMTURL = "service:jmx:rmi://%s:7300/jndi/rmi://%s:%d/jmxrmi"; private static final String SSOBJNAME = "org.apache.cassandra.db:type=StorageService"; private static final int DEFAULTPORT = 7199; private static final int DEFAULTGEOPORT = 7299; final String host; final int port; private String username; private String password; private JMXConnector jmxc; private MBeanServerConnection mbeanServerConn; private StorageServiceMBean ssProxy; private EndpointSnitchInfoMBean snitchProxy; private GeoInternodeAuthenticatorMBean internodeAuthProxy; private DbManagerOps dbMgrOps; /** * Create JMX client using the specified JMX host and port. * * @param host hostname or IP address of the JMX agent * @param port TCP port of the remote JMX agent * @throws IOException on connection failures */ public DbJmxClient(String host, int port) throws IOException, InterruptedException { this.host = host; this.port = port; connect(); } /** * Create JMX client using the specified JMX host and default port. * * @param host hostname or IP address of the JMX agent * @throws IOException on connection failures */ public DbJmxClient(String host) throws IOException, InterruptedException { this.host = host; this.port = DEFAULTPORT; connect(); } /** * Create a connection to the JMX agent and setup the M[X]Bean proxies. * * @throws IOException on connection failures */ private void connect() throws IOException { JMXServiceURL jmxUrl = new JMXServiceURL(String.format(FMTURL, host, host, port)); Map<String, Object> env = new HashMap<String, Object>(); if (username != null) { String[] creds = { username, password }; env.put(JMXConnector.CREDENTIALS, creds); } jmxc = JMXConnectorFactory.connect(jmxUrl, env); mbeanServerConn = jmxc.getMBeanServerConnection(); try { ObjectName name = new ObjectName(SSOBJNAME); ssProxy = JMX.newMBeanProxy(mbeanServerConn, name, StorageServiceMBean.class); snitchProxy = JMX.newMBeanProxy(mbeanServerConn, new ObjectName("org.apache.cassandra.db:type=EndpointSnitchInfo"), EndpointSnitchInfoMBean.class); internodeAuthProxy = JMX.newMBeanProxy(mbeanServerConn, new ObjectName(GeoInternodeAuthenticatorMBean.MBEAN_NAME), GeoInternodeAuthenticatorMBean.class); dbMgrOps = new DbManagerOps(mbeanServerConn); } catch (MalformedObjectNameException e) { throw new RuntimeException( "Invalid ObjectName? Please report this as a bug.", e); } } /** * get the live nodes of the dc * * @param dcId the vdc shortId */ public List<String> getDcLiveNodes(String dcId) { // An easy way is to iterate the host ips, but need figure it out in ipv4/6 support later List<String> dcLiveNodes = new ArrayList<String>(); Map<InetAddress, Float> ownerships; try { ownerships = effectiveOwnership(null); } catch (IllegalStateException ex) { ownerships = getOwnership(); } try { // go through the list, filter by dc for (Map.Entry<InetAddress, Float> ownership : ownerships.entrySet()) { String endpoint = ownership.getKey().getHostAddress(); String dc = snitchProxy.getDatacenter(endpoint); if (dc.equals(dcId)) { // check the status of this node if (getLiveNodes().contains(endpoint)) { dcLiveNodes.add(endpoint); } } } } catch (UnknownHostException e) { throw new RuntimeException(e); } return dcLiveNodes; } /** * get the nodes with full ownership for a given dc * * @param dcId the vdc shortId */ public List<String> getDcNodeFullOwnership(String dcId) { // An easy way is to iterate the host ips, but need figure it out in ipv4/6 support later List<String> fullOwners = new ArrayList<String>(); Map<InetAddress, Float> ownerships; try { ownerships = effectiveOwnership(null); } catch (IllegalStateException ex) { ownerships = getOwnership(); } try { // go through the list, filter by dc for (Map.Entry<InetAddress, Float> ownership : ownerships.entrySet()) { String endpoint = ownership.getKey().getHostAddress(); String dc = snitchProxy.getDatacenter(endpoint); Float owns = ownership.getValue(); if (dc.equals(dcId)) { // check if owns full data // due to changes in cassandra v2, ownership is not 100% any more log.info("owns by node {} {}", endpoint, owns); if (owns != null && (owns.compareTo(0.0f) > 0)) { fullOwners.add(endpoint); } } } } catch (UnknownHostException e) { throw new RuntimeException(e); } return fullOwners; } /** * check if the node in given vdc has ownership on token ring * * @param dcId the vdc shortId */ public boolean isRingOwnedBy(String dcId) { // An easy way is to iterate the host ips, but need figure it out in ipv4/6 support later Map<InetAddress, Float> ownerships; try { ownerships = effectiveOwnership(null); } catch (IllegalStateException ex) { ownerships = getOwnership(); } try { // go through the list, filter by dc for (Map.Entry<InetAddress, Float> ownership : ownerships.entrySet()) { String endpoint = ownership.getKey().getHostAddress(); String dc = snitchProxy.getDatacenter(endpoint); if (dc.equals(dcId)) { log.info("endpoint {} active on ring", endpoint); return true; } } } catch (UnknownHostException e) { throw new RuntimeException(e); } return false; } public Map<InetAddress, Float> getOwnership() { return ssProxy.getOwnership(); } public Map<InetAddress, Float> effectiveOwnership(String keyspace) throws IllegalStateException { return ssProxy.effectiveOwnership(keyspace); } public List<String> getLiveNodes() { return ssProxy.getLiveNodes(); } public List<String> getJoiningNodes() { return ssProxy.getJoiningNodes(); } public List<String> getLeavingNodes() { return ssProxy.getLeavingNodes(); } public List<String> getMovingNodes() { return ssProxy.getMovingNodes(); } public List<String> getUnreachableNodes() { return ssProxy.getUnreachableNodes(); } public void stopGossiping() { ssProxy.stopGossiping(); } public List<String> getHostIdMap(Collection<String> addrs) { List<String> ids = new ArrayList(); Map<String, String> idsMap = ssProxy.getHostIdMap(); for (String addr : addrs) { ids.add(idsMap.get(addr)); } return ids; } public void removeVdc(Collection<String> addrs) { List<String> ids = getHostIdMap(addrs); for (String id : ids) { log.info("Remove node {}", id); ssProxy.removeNode(id); } } public void runNodeRepairBackEnd() throws Exception { this.dbMgrOps.startNodeRepairAndWaitFinish(false, true); } public int getNodeRepairProgress() { DbRepairStatus status = dbMgrOps.getLastRepairStatus(true); if (status == null || status.getStatus() != DbRepairStatus.Status.IN_PROGRESS) { return -1; } return status.getProgress(); } public void addVdcNodesToBlacklist(Collection<String> addrs) { List<String> newBlacklist = new ArrayList<String>(); newBlacklist.addAll(addrs); internodeAuthProxy.addToBlacklist(newBlacklist); } public void removeVdcNodesFromBlacklist(Collection<String> addrs) { List<String> newBlacklist = new ArrayList<String>(); newBlacklist.addAll(addrs); internodeAuthProxy.removeFromBlacklist(newBlacklist); } public List<String> getBlacklist() { return internodeAuthProxy.getBlacklist(); } } }