/* * Copyright (c) 2015 EMC Corporation * All Rights Reserved */ package com.emc.storageos.systemservices.impl.ipreconfig; import com.emc.storageos.coordinator.client.model.Constants; import com.emc.storageos.coordinator.client.model.Site; import com.emc.storageos.coordinator.client.model.SiteInfo; import com.emc.storageos.coordinator.client.service.DrUtil; import com.emc.storageos.coordinator.client.service.NodeListener; import com.emc.storageos.coordinator.common.Configuration; import com.emc.storageos.coordinator.common.impl.ConfigurationImpl; import com.emc.storageos.model.property.PropertyConstants; import com.emc.storageos.services.util.FileUtils; import com.emc.storageos.services.util.NamedThreadPoolExecutor; import com.emc.storageos.services.util.PlatformUtils; import com.emc.storageos.svcs.errorhandling.resources.APIException; import com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt; import com.emc.storageos.systemservices.impl.upgrade.LocalRepository; import com.emc.storageos.db.common.VdcUtil; import com.emc.vipr.model.sys.ClusterInfo; import com.emc.vipr.model.sys.ipreconfig.*; import org.apache.commons.codec.binary.Base64; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.recipes.locks.InterProcessLock; import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.framework.state.ConnectionStateListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import java.nio.charset.Charset; import java.util.*; import java.util.concurrent.ThreadPoolExecutor; /** * Ip Reconfig Manager drives whole procedure of cluster ip reconfiguration. * It persists both cluster and node status both in ZK and local node. */ public class IpReconfigManager implements Runnable { private static final Logger log = LoggerFactory.getLogger(IpReconfigManager.class); private static Charset UTF_8 = Charset.forName("UTF-8"); private static final long IPRECONFIG_TIMEOUT = 24 * 60 * 60 * 1000; // 24 hours timeout for the procedure private static final long POLL_INTERVAL = 10 * 1000; // 10 second polling interval private static final String UPDATE_ZKIP_LOCK = "update_zkip"; // ipreconfig entry in ZK Configuration config = null; private ClusterIpInfo localIpinfo = null; // local/current ip info private ClusterIpInfo newIpinfo = null; // new ip info private Integer localNodeId; // local node id (1~5) private Integer nodeCount; private long expiration_time = 0L; // ipreconfig would fail if not finished at this time @Autowired private CoordinatorClientExt _coordinator; @Autowired private LocalRepository localRepository; private ThreadPoolExecutor _pollExecutor; private IpReconfigListener ipReconfigListener = null; private DrUtil drUtil; public void setDrUtil(DrUtil drUtil) { this.drUtil = drUtil; } private Properties ovfProperties; // local ovfenv properties public void setOvfProperties(Properties ovfProps) { ovfProperties = ovfProps; } // this shouldn't be named the same as the default getter, since the return type // is different with the argument type of the setter. public Map<String, String> getOvfProps() { return (Map) ovfProperties; } /** * Responds to connection drops / reconnects. */ private final ConnectionStateListener _connectionListener = new ConnectionStateListener() { @Override public void stateChanged(final CuratorFramework client, final ConnectionState newState) { log.info("Entering stateChanged method : {}", newState); if (newState == ConnectionState.CONNECTED || newState == ConnectionState.RECONNECTED) { addIpreconfigListener(); } } }; public IpReconfigManager() { } /** * Initialize ipreconfig manager * 1. Load local ovfenv properties * 2. Register node listener for ipreconfig config znode in ZK */ public void init() { loadLocalOvfProps(); addIpreconfigListener(); _coordinator.getZkConnection().curator().getConnectionStateListenable().addListener(_connectionListener); } /* * Load local ovfenv properties */ private void loadLocalOvfProps() { Map<String, String> ovfprops = getOvfProps(); localIpinfo = new ClusterIpInfo(); localIpinfo.loadFromPropertyMap(ovfprops); String node_id = ovfprops.get(PropertyConstants.NODE_ID_KEY); if (node_id == null || node_id.equals(Constants.STANDALONE_ID)) { localNodeId = 1; } else { localNodeId = Integer.valueOf(node_id.split("vipr")[1]); } nodeCount = Integer.valueOf(ovfprops.get(PropertyConstants.NODE_COUNT_KEY)); } /** * Register ipreconfig listener to monitor ipreconfig related changes (new IPs, status etc.) */ private void addIpreconfigListener() { try { if (ipReconfigListener != null) { _coordinator.getCoordinatorClient().removeNodeListener(ipReconfigListener); } ipReconfigListener = new IpReconfigListener(); _coordinator.getCoordinatorClient().addNodeListener(ipReconfigListener); } catch (Exception e) { log.error("Fail to add node listener for ip reconfig config znode", e); throw APIException.internalServerErrors.addListenerFailed(); } log.info("Succeed to add node listener for ip reconfig config znode"); } /** * Main retry loop of the IP reconfiguration procedure. */ @Override public void run() { init(); while (true) { try { handleIpReconfig(); // wait for any ipreconfig related changes (new IPs, procedure status or node status change etc.) await(); } catch (Exception e) { log.error(e.getMessage(), e); } } } /** * Handle the ip reconfiguration procedure for any ipreconfiguration info/status changes. * 1. It drives ip reconfiguration state machine to move forward the whole procedure * 2. It launches a polling executor to handle two special scenarios -- procedure timeout and missed ZK event * 3. It also will cleanup env if the procedure is failed etc. */ private synchronized void handleIpReconfig() throws Exception { config = _coordinator.getCoordinatorClient().queryConfiguration(IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); if (config == null) { log.info("no ipreconfig request coming in yet."); assureIPConsistent(); return; } if (isRollback()) { assureIPConsistent(); return; } if (!isStarted(config)) { // Barely do nothing if stauts is SUCCEED or FAILED log.info("ip reconfig procedure is not started."); if (isFailed(config)) { // cleanup local temp files when the procedure failed log.info("ip reconfig procedure failed. cleanup..."); IpReconfigUtil.cleanupLocalFiles(); // stop polling executor } stopPollExecutor(); return; } expiration_time = Long.valueOf(config.getConfig(IpReconfigConstants.CONFIG_EXPIRATION_KEY)); if (System.currentTimeMillis() >= expiration_time) { // set procedure failed when it is expired setFailed(IpReconfigConstants.ERRSTR_TIMEOUT); return; } // start polling executor startPollExecutor(); // drive ip reconfiguration status machine driveIpReconfigStateMachine(); } /** * Handle core status change for the ip reconfig procedure * For each node, the status could be * 1. None * System just receiving ip reconfiguration request. * 2. LOCALAWARE_LOCALPERSISTENT * Local node has got the new IPs persisted while it has no idea of other nodes' status. * 3. LOCALAWARE_CLUSTERPERSISTENT * Local node knows the new IPs has been persisted in cluster domain, but not sure if all other nodes know about the fact yet. * Local node would try to guess if the new IPs has been committed in cluster domain in some failure scenarios. * 4. CLUSTERACK_CLUSTERPERSISTENT * Every node knows the new IPs has been persisted in cluster domain and get the same acknowledgement from others. * During next reboot, local node would commit the new IPs directly at this status. * 5. LOCAL_SUCCEED (Set after reboot) * New IP has taken effect in local node. * The whole procedure would be set to SUCCEED when all the nodes' status are set to LOCAL_SUCCEED * Each node would go to next status only if all the cluster nodes are at least in the same status. * * @throws Exception */ private void driveIpReconfigStateMachine() throws Exception { log.info("driving ipreconfig state machine ..."); // Start to handle ip reconfig procedure if it is started and not expired. String localnode_status_key = String.format(IpReconfigConstants.CONFIG_NODESTATUS_KEY, localNodeId); IpReconfigConstants.NodeStatus localnode_status = IpReconfigConstants.NodeStatus.valueOf(config.getConfig(localnode_status_key)); IpReconfigConstants.NodeStatus target_nodestatus = null; String base64Encoded_newipinfo = config.getConfig(IpReconfigConstants.CONFIG_IPINFO_KEY); newIpinfo = ClusterIpInfo.deserialize(Base64.decodeBase64(base64Encoded_newipinfo.getBytes(UTF_8))); if (!newIpinfo.equals(localIpinfo)) { // NewIP has not been applied yet, in the process of syncing among all nodes. switch (localnode_status) { case None: // NewIP is just set in ZK. IpReconfigUtil.writeIpinfoFile(localIpinfo, IpReconfigConstants.OLDIP_PATH); IpReconfigUtil.writeIpinfoFile(newIpinfo, IpReconfigConstants.NEWIP_PATH); String strExpirationTime = config.getConfig(IpReconfigConstants.CONFIG_EXPIRATION_KEY); FileUtils.writeObjectToFile(strExpirationTime, IpReconfigConstants.NEWIP_EXPIRATION); target_nodestatus = IpReconfigConstants.NodeStatus.LOCALAWARE_LOCALPERSISTENT; setNodeStatus(target_nodestatus.toString()); break; case LOCALAWARE_LOCALPERSISTENT: // Local node persists the NewIP, while it has no idea of other nodes' status. target_nodestatus = IpReconfigConstants.NodeStatus.LOCALAWARE_CLUSTERPERSISTENT; if (isReadyForNextStatus(localnode_status, target_nodestatus)) { setNodeStatus(target_nodestatus.toString()); } break; case LOCALAWARE_CLUSTERPERSISTENT: // Local node is aware of NewIP is persisted in cluster domain, but has no idea if other nodes know the fact. target_nodestatus = IpReconfigConstants.NodeStatus.CLUSTERACK_CLUSTERPERSISTENT; if (isReadyForNextStatus(localnode_status, target_nodestatus)) { setNodeStatus(target_nodestatus.toString()); } break; case CLUSTERACK_CLUSTERPERSISTENT: // Every node knows the new IPs has been persisted in cluster domain and get the same acknowledgement from others. target_nodestatus = IpReconfigConstants.NodeStatus.LOCAL_SUCCEED; if (isReadyForNextStatus(localnode_status, target_nodestatus)) { // After all nodes are in ClusterACK_ClusterPersistent, we will // 1. powoff cluster // 2. commit new IP during next reboot // 3. set local node status to "Local_Succed" // 4. set total status to "Succeed" when all nodes are "Local_Succeed". haltNode(config.getConfig(IpReconfigConstants.CONFIG_POST_OPERATION_KEY)); } break; default: log.error("unexpected node status before reboot: {}", localnode_status); // if installer is used before the procedure finished, we will get unexpected node status setFailed(IpReconfigConstants.ERRSTR_MANUAL_CONFIGURED); break; } } else { // newIP has taken effect, we need to set procedure status SUCCEED after all nodes got new IP. switch (localnode_status) { case LOCALAWARE_CLUSTERPERSISTENT: // The current node confirms there are quorum nodes are using new IPs and then // adopt the newIPs during bootstrap. We need to jump it to ClusterACK_ClusterPersistent status log.info("jumping to ClusterACK_ClusterPersistent status..."); setNodeStatus(IpReconfigConstants.NodeStatus.CLUSTERACK_CLUSTERPERSISTENT.toString()); break; case CLUSTERACK_CLUSTERPERSISTENT: // set Local_Succeed after // 1. local node already adopted new IP // 2. Every node knows the new IPs has been persisted in cluster domain and get the same acknowledgement from others. setNodeStatus(IpReconfigConstants.NodeStatus.LOCAL_SUCCEED.toString()); break; case LOCAL_SUCCEED: // New IP has taken effect in local node, set total status to "Succeed" when all nodes are "Local_Succeed". target_nodestatus = IpReconfigConstants.NodeStatus.CLUSTER_SUCCEED; if (isReadyForNextStatus(localnode_status, target_nodestatus)) { assureIPConsistent(); setSucceed(); } break; default: log.error("unexpected node status after reboot: {}", localnode_status); // if installer is used before the procedure finished, we will get unexpected node status setFailed(IpReconfigConstants.ERRSTR_MANUAL_CONFIGURED); break; } } } /** * check the current node is read to go to next status * * @param currNodeStatus * @param targetNodeStatus * @return */ private boolean isReadyForNextStatus(IpReconfigConstants.NodeStatus currNodeStatus, IpReconfigConstants.NodeStatus targetNodeStatus) { boolean bReadyForNextStatus = true; for (int i = 1; i <= nodeCount; i++) { String node_status_key = String.format(IpReconfigConstants.CONFIG_NODESTATUS_KEY, i); IpReconfigConstants.NodeStatus node_status = IpReconfigConstants.NodeStatus.valueOf(config.getConfig(node_status_key)); if (node_status.ordinal() < currNodeStatus.ordinal()) { bReadyForNextStatus = false; log.info("local node is not ready to step into next status: {}", targetNodeStatus); break; } } if (bReadyForNextStatus) { log.info("local node is ready to step into next status: {}", targetNodeStatus); } return bReadyForNextStatus; } /** * set local node status * * @param nodestatus * @throws Exception */ private void setNodeStatus(String nodestatus) throws Exception { log.info("changing to node status:{}", nodestatus); IpReconfigUtil.writeNodeStatusFile(nodestatus); persistZKNodeStatus(nodestatus); } /** * persist node status into ZK * * @param nodestatus * @throws Exception */ private void persistZKNodeStatus(String nodestatus) throws Exception { String nodestatus_key = String.format(IpReconfigConstants.CONFIG_NODESTATUS_KEY, localNodeId); config.setConfig(nodestatus_key, nodestatus); _coordinator.getCoordinatorClient().persistServiceConfiguration(config); } /** * Set ipreconfig status as successful and set end time * * @throws Exception */ private void setSucceed() throws Exception { log.info("Succeed to reconfig cluster ip!"); setStatus(ClusterNetworkReconfigStatus.Status.SUCCEED); FileUtils.deleteFile(IpReconfigConstants.NODESTATUS_PATH); } /** * Set ipreconfig status as failed and error message * * @param error * @throws Exception */ private void setFailed(String error) throws Exception { log.error("ipreconfig failed. Error: {}", error); config.setConfig(IpReconfigConstants.CONFIG_STATUS_KEY, ClusterNetworkReconfigStatus.Status.FAILED.toString()); config.setConfig(IpReconfigConstants.CONFIG_ERROR_KEY, error); _coordinator.getCoordinatorClient().persistServiceConfiguration(config); } /** * set final status for ip reconfig procedure * * @param reconfigStatus * @throws Exception */ private void setStatus(ClusterNetworkReconfigStatus.Status reconfigStatus) throws Exception { config.setConfig(IpReconfigConstants.CONFIG_STATUS_KEY, reconfigStatus.toString()); _coordinator.getCoordinatorClient().persistServiceConfiguration(config); } /** * set final error info for ip reconfig procedure * * @param error * @throws Exception */ private void setError(String error) throws Exception { config.setConfig(IpReconfigConstants.CONFIG_ERROR_KEY, error); _coordinator.getCoordinatorClient().persistServiceConfiguration(config); } /** * check if ip reconfiguraiton is started * * @param config * @return true/false */ private boolean isStarted(Configuration config) { String status = config.getConfig(IpReconfigConstants.CONFIG_STATUS_KEY); return status.equals(ClusterNetworkReconfigStatus.Status.STARTED.toString()); } /** * check if ip reconfiguraiton is succeed * * @param config * @return true/false */ private boolean isSucceed(Configuration config) { String status = config.getConfig(IpReconfigConstants.CONFIG_STATUS_KEY); return status.equals(ClusterNetworkReconfigStatus.Status.SUCCEED.toString()); } /** * check if ip reconfiguraiton is failed * * @param config * @return true/false */ private boolean isFailed(Configuration config) { String status = config.getConfig(IpReconfigConstants.CONFIG_STATUS_KEY); return status.equals(ClusterNetworkReconfigStatus.Status.FAILED.toString()); } /** * Check if cluster is under ip reconfiguration procedure * * @return */ public synchronized boolean underIpReconfiguration() { config = _coordinator.getCoordinatorClient().queryConfiguration(IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); if (config != null && isStarted(config)) { return true; } return false; } /** * the listener class to listen the ipconfig node change. */ class IpReconfigListener implements NodeListener { public String getPath() { String path = String.format("/config/%s/%s", IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); return path; } /** * called when user modify IPs, procedure or node status from ipreconfig point of view */ @Override public void nodeChanged() { log.info("IpReconfig info/status changed. Waking up the ip reconfig procedure..."); wakeup(); } /** * called when connection state changed. */ @Override public void connectionStateChanged(State state) { log.info("ipreconfig connection state changed to {}", state); if (state.equals(State.CONNECTED)) { log.info("Curator (re)connected. Waking up the ip reconfig procedure..."); wakeup(); } } } private synchronized void await() throws InterruptedException { this.wait(); } private synchronized void wakeup() { this.notifyAll(); } /** * Poweroff/Reboot the node */ public void haltNode(String postOperation) throws Exception { Thread.sleep(6 * 1000); if (postOperation.equals("poweroff")) { localRepository.poweroff(); } else { localRepository.reboot(); } } /** * Launch polling executor which will handle * 1. Procedure expiration scenario * 2. Underlying missed notification event * * @throws Exception */ private void startPollExecutor() throws Exception { if (_pollExecutor != null && !_pollExecutor.isTerminated()) { return; } log.info("starting polling executor ..."); _pollExecutor = new NamedThreadPoolExecutor(IpReconfigManager.class.getSimpleName() + "_Polling", 1); _pollExecutor.execute(new Runnable() { @Override public void run() { try { while (true) { synchronized (this) { // Check if procedure expired periodically, set failed and exit polling thread. config = _coordinator.getCoordinatorClient().queryConfiguration(IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); if (config != null && isStarted(config)) { expiration_time = Long.valueOf(config.getConfig(IpReconfigConstants.CONFIG_EXPIRATION_KEY)); if (expiration_time < System.currentTimeMillis()) { setFailed(IpReconfigConstants.ERRSTR_TIMEOUT); return; } } } // NodeCacheListener recipe might miss event occasionally. // We could wakeup main loop to drive the procedure periodically. Thread.sleep(POLL_INTERVAL); wakeup(); } } catch (Exception e) { log.error(e.getMessage(), e); } } }); } /** * Stop polling executor * * @throws Exception */ private void stopPollExecutor() throws Exception { if (_pollExecutor != null && !_pollExecutor.isTerminated()) { log.info("stopping polling executor ..."); _pollExecutor.shutdownNow(); } _pollExecutor = null; } /** * User might rollback even before the last ip reconfigured is finished. * So we always need to set the last ip reconfiguration status to failure to avoid * that ip reconfiguration is triggered again. * * @return true if user is trying to rollback ip conf. */ private boolean isRollback() { try { if (FileUtils.exists(IpReconfigConstants.NODESTATUS_PATH)) { IpReconfigConstants.NodeStatus localnode_status = IpReconfigConstants.NodeStatus.valueOf(IpReconfigUtil.readNodeStatusFile()); if (localnode_status == IpReconfigConstants.NodeStatus.LOCAL_ROLLBACK) { log.info("User is trying to rollback last ip reconfiguration."); setFailed(IpReconfigConstants.ERRSTR_ROLLBACK); FileUtils.deleteFile(IpReconfigConstants.NODESTATUS_PATH); return true; } } } catch (Exception e) { log.error("Failed to check if user is trying to rollback.", e); } return false; } /** * trigger ip reconfiguration * * @param clusterIpInfo * @param postOperation * @throws Exception */ public void triggerIpReconfig(ClusterIpInfo clusterIpInfo, String postOperation) throws Exception { // 1. validate cluster ip reconfig parameter validateParameter(clusterIpInfo, postOperation); // 2. check env sanityCheckEnv(); // 3. check if there is already another ip reconfiguration procedure is in progress synchronized (this) { config = _coordinator.getCoordinatorClient().queryConfiguration(IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); if (config != null) { if (isStarted(config)) { String errmsg = "Cluster is already under ip reconfiguration."; log.error(errmsg); throw new IllegalStateException(errmsg); } } } // 4. Initial ip reconfig procedure initIpReconfig(clusterIpInfo, postOperation); } /** * Sanity check if cluster env is qualified */ private void sanityCheckEnv() throws Exception { // 1. check if platform is supported checkPlatform(); // 2. check if cluster is in stable status checkClusterStatus(); // 3. check if cluster is in GEO env if (!VdcUtil.isLocalVdcSingleSite()) { String errmsg = "Cluster is in GEO env."; log.error(errmsg); throw new IllegalStateException(errmsg); } } /** * Valide cluster ip reconfig param * * @param clusterIpInfo * @param postOperation * @return error msg * @throws Exception */ private void validateParameter(ClusterIpInfo clusterIpInfo, String postOperation) throws Exception { boolean bValid = true; String errmsg = ""; if (!postOperation.equals("poweroff") && !postOperation.equals("reboot")) { bValid = false; errmsg = "post operation is invalid."; } errmsg = clusterIpInfo.validate(nodeCount); if (!errmsg.isEmpty()) { bValid = false; } if (!bValid) { throw new IllegalStateException(errmsg); } } /** * Check if platform is supported */ private void checkPlatform() { if (PlatformUtils.isVMwareVapp()) { log.info("Platform(VApp) is unsupported for ip reconfiguraiton"); throw new UnsupportedOperationException("VApp is unsupported for ip reconfiguration"); } if (PlatformUtils.hasMultipleSites()) { log.info("Multiple sites env is unsupported for ip reconfiguraiton"); throw new UnsupportedOperationException("Multiple sites env is unsupported for ip reconfiguration"); } } /** * Check if cluster is in health status */ private void checkClusterStatus() throws Exception { ClusterInfo.ClusterState controlNodeState = _coordinator.getCoordinatorClient().getControlNodesState(); if (controlNodeState == null || !controlNodeState.equals(ClusterInfo.ClusterState.STABLE)) { String errmsg = "Cluster is not stable."; log.error(errmsg); throw new IllegalStateException(errmsg); } } /** * Initiate ip reconfig procedure by creating ipreconfig config znode in ZK * The config znode include: * ipinfo * procedure status * each node's status * expiration time for the procedure * * @param clusterIpInfo The new cluster ip info * @param postOperation * @throws Exception */ private void initIpReconfig(ClusterIpInfo clusterIpInfo, String postOperation) throws Exception { ClusterIpInfo ipinfo = new ClusterIpInfo(clusterIpInfo.getIpv4Setting(), clusterIpInfo.getIpv6Setting()); log.info("Initiating ip reconfiguraton procedure {}", ipinfo.toString()); ConfigurationImpl cfg = new ConfigurationImpl(); cfg.setKind(IpReconfigConstants.CONFIG_KIND); cfg.setId(IpReconfigConstants.CONFIG_ID); cfg.setConfig(IpReconfigConstants.CONFIG_IPINFO_KEY, new String(Base64.encodeBase64(ipinfo.serialize()), UTF_8)); cfg.setConfig(IpReconfigConstants.CONFIG_STATUS_KEY, ClusterNetworkReconfigStatus.Status.STARTED.toString()); for (int i = 1; i <= ipinfo.getIpv4Setting().getNetworkAddrs().size(); i++) { String nodestatus_key = String.format(IpReconfigConstants.CONFIG_NODESTATUS_KEY, i); cfg.setConfig(nodestatus_key, IpReconfigConstants.NodeStatus.None.toString()); } // Set ip reconfiguration timeout to 1 day // 1. For poweroff case, user might need to change subnet or even migrate VMs, // thus we should set timeout longer for the procedure to be finished. // Later we should extend API for user to set desired expiration time // 2. For directly reboot case, it would be better to set longer timeout as well to cover // underlying unexpected node bootstrap issue which needs manual recovery etc. expiration_time = System.currentTimeMillis() + IPRECONFIG_TIMEOUT; cfg.setConfig(IpReconfigConstants.CONFIG_EXPIRATION_KEY, String.valueOf(expiration_time)); cfg.setConfig(IpReconfigConstants.CONFIG_POST_OPERATION_KEY, postOperation); config = cfg; _coordinator.getCoordinatorClient().persistServiceConfiguration(config); } /** * query current status&error for ip reconfig procedure * * @return * @throws Exception */ public synchronized ClusterNetworkReconfigStatus queryClusterNetworkReconfigStatus() throws Exception { ClusterNetworkReconfigStatus ipReconfigStatus = new ClusterNetworkReconfigStatus(); config = _coordinator.getCoordinatorClient().queryConfiguration(IpReconfigConstants.CONFIG_KIND, IpReconfigConstants.CONFIG_ID); if (config != null) { ClusterNetworkReconfigStatus.Status status = ClusterNetworkReconfigStatus.Status.valueOf(config .getConfig(IpReconfigConstants.CONFIG_STATUS_KEY)); ipReconfigStatus.setStatus(status); if (isFailed(config)) { String errmsg = config.getConfig(IpReconfigConstants.CONFIG_ERROR_KEY); ipReconfigStatus.setMessage(errmsg); } ipReconfigStatus.setExpiration(config.getConfig(IpReconfigConstants.CONFIG_EXPIRATION_KEY)); } return ipReconfigStatus; } /** * query current cluster ip info * * @return * @throws Exception */ public ClusterIpInfo queryCurrentClusterIpinfo() throws Exception { return localIpinfo; } /** * Assure local site IP info is consistent with that in ZK. * (DR/GEO procedures store IP info in ZK even for single site since Yoda.) */ void assureIPConsistent() { InterProcessLock lock = null; try { log.info("Assuring local site IPs are consistent with ZK ..."); lock = _coordinator.getCoordinatorClient().getLock(UPDATE_ZKIP_LOCK); lock.acquire(); log.info("Got lock for updating local site IPs into ZK ..."); Site site = drUtil.getLocalSite(); if (localIpinfo.weakEqual(site.getVip(), site.getVip6(), site.getHostIPv4AddressMap(), site.getHostIPv6AddressMap())) { log.info("local site IPs are consistent with ZK, no need to update."); return; } else { log.info("local site IPs are not consistent with ZK, updating."); log.info(" local ipinfo:{}", localIpinfo.toString()); log.info(" zk ipinfo: vip={}", site.getVip()); log.info(" zk ipinfo: vip6={}", site.getVip6()); SortedSet<String> nodeIds = new TreeSet<String>(site.getHostIPv4AddressMap().keySet()); for (String nodeId : nodeIds) { log.info(" {}: ipv4={}", nodeId, site.getHostIPv4AddressMap().get(nodeId)); log.info(" {}: ipv6={}", nodeId, site.getHostIPv6AddressMap().get(nodeId)); } } site.setVip6(localIpinfo.getIpv6Setting().getNetworkVip6()); site.setVip(localIpinfo.getIpv4Setting().getNetworkVip()); Map<String, String> ipv4Addresses = new HashMap<>(); Map<String, String> ipv6Addresses = new HashMap<>(); int nodeIndex = 1; for (String nodeip : localIpinfo.getIpv4Setting().getNetworkAddrs()) { String nodeId; nodeId = IpReconfigConstants.VDC_NODE_PREFIX + nodeIndex++; ipv4Addresses.put(nodeId, nodeip); } nodeIndex = 1; for (String nodeip : localIpinfo.getIpv6Setting().getNetworkAddrs()) { String nodeId; nodeId = IpReconfigConstants.VDC_NODE_PREFIX + nodeIndex++; ipv6Addresses.put(nodeId, nodeip); } site.setHostIPv4AddressMap(ipv4Addresses); site.setHostIPv6AddressMap(ipv6Addresses); site.setNodeCount(localIpinfo.getNodeCount()); _coordinator.getCoordinatorClient().persistServiceConfiguration(site.toConfiguration()); // wake up syssvc to regenerate configurations drUtil.updateVdcTargetVersion(_coordinator.getCoordinatorClient().getSiteId(), SiteInfo.IP_OP_CHANGE, System.currentTimeMillis()); log.info("Finished update local site IPs into ZK"); } catch (Exception e) { log.warn("Unexpected exception during updating local site IPs into ZK", e); } finally { if (lock != null) { try { lock.release(); } catch (Exception e) { log.warn("Unexpected exception during unlocking update_zkip lock", e); } } } } }