/*
* Copyright (c) 2015 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.systemservices.impl.recovery;
import java.net.URI;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.emc.storageos.coordinator.client.model.*;
import com.emc.storageos.db.common.DbConfigConstants;
import com.emc.storageos.services.util.*;
import com.emc.vipr.model.sys.recovery.DbOfflineStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.apache.curator.framework.recipes.locks.InterProcessLock;
import org.apache.curator.framework.recipes.leader.LeaderSelector;
import com.emc.vipr.model.sys.ClusterInfo;
import com.emc.vipr.model.sys.recovery.RecoveryStatus;
import com.emc.vipr.model.sys.recovery.RecoveryConstants;
import com.emc.storageos.model.property.PropertyConstants;
import com.emc.storageos.coordinator.client.service.DrUtil;
import com.emc.storageos.coordinator.client.service.NodeListener;
import com.emc.storageos.coordinator.client.service.impl.LeaderSelectorListenerImpl;
import com.emc.storageos.coordinator.common.impl.ZkPath;
import com.emc.storageos.coordinator.common.Configuration;
import com.emc.storageos.coordinator.common.impl.ConfigurationImpl;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt;
import com.emc.storageos.systemservices.impl.upgrade.LocalRepository;
import com.emc.storageos.systemservices.impl.client.SysClientFactory;
import com.emc.storageos.systemservices.exceptions.SysClientException;
import com.emc.storageos.management.jmx.recovery.DbManagerOps;
import com.emc.storageos.svcs.errorhandling.resources.APIException;
/**
* Recovery Manager drives whole lifecycle of node recovery. It maintains status machine in ZK.
* See RecoveryStatus.Status on the detailed description of status transition
*/
public class RecoveryManager implements Runnable {
private static final Logger log = LoggerFactory.getLogger(RecoveryManager.class);
private List<String> serviceNames = Arrays.asList(Constants.DBSVC_NAME, Constants.GEODBSVC_NAME);
private final AtomicBoolean isLeader = new AtomicBoolean(false);
private List<String> aliveNodes = new ArrayList<String>();
private List<String> corruptedNodes = new ArrayList<String>();
private int nodeCount;
private NamedThreadPoolExecutor recoveryExecutor;
private NamedThreadPoolExecutor multicastExecutor;
private boolean waitOnRecoveryTriggering = false;
private LeaderSelector leaderSelector;
private static final long REDEPLOY_MULTICAST_TIMEOUT = 120 * 60 * 1000; // 2 hours
@Autowired
private CoordinatorClientExt coordinator;
@Autowired
private DbClient dbClient;
@Autowired
private LocalRepository localRepository;
public RecoveryManager() {
}
/**
* Initialize recovery manager
*/
public void init() {
startRecoveryLeaderSelector();
addRecoveryStatusListener();
}
/**
* Main loop of Recovery manager. Execute node recovery in case that it is elected as leader.
*/
@Override
public void run() {
while (isLeader.get()) {
try {
checkRecoveryStatus();
checkClusterStatus();
runNodeRecovery();
} catch (Exception e) {
log.warn("Internal error of Recovery manager: {}", e.getMessage());
}
}
}
/**
* Check the recovery status saved in ZK.
* a. No recovery required(DONE/FAILED/NULL): nothing to do. simply wait
* b. In progress(PREPARING/REPAIRING/SYNCING): fail new request if there is one in progress
* c. Triggering(INIT): current node should take charge of node recovery
*/
private void checkRecoveryStatus() throws Exception {
while (true) {
InterProcessLock lock = null;
try {
lock = getRecoveryLock();
RecoveryStatus status = queryNodeRecoveryStatus();
if (isRecovering(status)) {
log.warn("This is a stale recovery request due to recovery leader change");
return;
} else if (isTriggering(status)) {
log.info("The recovery status is triggering so run recovery directly");
return;
}
setWaitingRecoveryTriggeringFlag(true);
} catch (Exception e) {
markRecoveryFailed(RecoveryStatus.ErrorCode.INTERNAL_ERROR);
throw e;
} finally {
releaseLock(lock);
}
log.info("Wait to be triggered");
waitOnRecoveryTriggering();
}
}
private boolean getWaitingRecoveryTriggeringFlag() {
return waitOnRecoveryTriggering;
}
private void setWaitingRecoveryTriggeringFlag(boolean waiting) {
waitOnRecoveryTriggering = waiting;
log.info("Setting waiting flag to {}", waiting);
}
/**
* Check if cluster is triggering recovery
*/
private boolean isTriggering(RecoveryStatus status) {
return status.getStatus() == RecoveryStatus.Status.INIT;
}
/**
* Check if cluster is recovering
*/
private boolean isRecovering(RecoveryStatus status) {
boolean recovering = (status.getStatus() == RecoveryStatus.Status.PREPARING
|| status.getStatus() == RecoveryStatus.Status.REPAIRING
|| status.getStatus() == RecoveryStatus.Status.SYNCING);
return recovering;
}
/**
* Check if cluster is in minority nodes corrupted scenario
*/
private void checkClusterStatus() throws Exception {
if (isVMwareVapp()) {
initNodeListByCheckOfflineTime();
purgeDataForVappRecovery(corruptedNodes);
}else {
initNodeListByCheckDbStatus();
}
if (corruptedNodes.contains(coordinator.getMyNodeId())) {
String errMsg = "Close RecoveryManager leadership as node corrupted";
log.info(errMsg);
closeRecoveryLeaderSelector();
throw new Exception(errMsg);
} else {
log.info("Proceed RecoveryManager leadership as node is good");
}
validateNodesStatus();
}
/**
* Init alive node list and corrupted node list by checking db status and geodb status
*/
private void initNodeListByCheckDbStatus() throws Exception {
aliveNodes.clear();
corruptedNodes.clear();
for (String serviceName : serviceNames) {
try (DbManagerOps dbManagerOps = new DbManagerOps(serviceName)) {
Map<String, Boolean> statusMap = dbManagerOps.getNodeStates();
for (Map.Entry<String, Boolean> statusEntry : statusMap.entrySet()) {
log.info("status map entry: {}-{}", statusEntry.getKey(), statusEntry.getValue());
String nodeId = statusEntry.getKey();
if (statusEntry.getValue().equals(Boolean.TRUE)) {
if (!aliveNodes.contains(nodeId)) {
aliveNodes.add(nodeId);
}
} else {
if (!corruptedNodes.contains(nodeId)) {
corruptedNodes.add(nodeId);
}
if (aliveNodes.contains(nodeId)) {
aliveNodes.remove(nodeId);
}
}
}
}
}
log.info("Alive nodes:{}, corrupted nodes: {}", aliveNodes, corruptedNodes);
}
/**
* Validate cluster is in minority node corrupted scenario
*/
private void validateNodesStatus() {
nodeCount = coordinator.getNodeCount();
if (aliveNodes.size() == nodeCount) {
markRecoveryCancelled();
log.warn("All nodes are alive, no need to do recovery");
throw new IllegalStateException("No need to do recovery");
} else if (aliveNodes.size() < (nodeCount / 2 + 1)) {
markRecoveryCancelled();
log.warn("This procedure doesn't support majority nodes corrupted scenario");
throw new IllegalStateException("Majority nodes are corrupted");
}
}
/**
* Start cluster recovery in minority nodes corrupted scenario
* a. PREPARING: start a multicast thread and then the user do node redeployment
* b. REPAIRING: run db node repair between the alive nodes to make sure the consistency
* c. SYNCING: wake the redeployed nodes from hibernate status and do data syncing
* d. DONE: dbsvc and geodbsvc on all nodes are get started
* e. FAILED: any error occurred during node recovery
*/
private synchronized void runNodeRecovery() throws Exception {
InterProcessLock lock = null;
try {
log.info("Node recovery begins");
lock = getRecoveryLock();
setRecoveryStatus(RecoveryStatus.Status.PREPARING);
if (!isVMwareVapp()) {
startMulticastService();
}
setRecoveryStatus(RecoveryStatus.Status.REPAIRING);
runDbRepair();
if (isVMwareVapp()) {
restartServices();
}
setRecoveryStatus(RecoveryStatus.Status.SYNCING);
waitDbsvcStarted();
validateAutoBootFlag();
markRecoverySuccessful();
log.info("Node recovery is done successful");
} catch (Exception ex) {
markRecoveryFailed(RecoveryStatus.ErrorCode.INTERNAL_ERROR);
log.error("Node recovery failed:", ex);
throw ex;
} finally {
releaseLock(lock);
}
}
/**
* Start multicast service for node redeployment
* TODO - we are going to remove it after Hyper-V installer is discarded in jedi.
*/
private void startMulticastService() throws Exception {
multicastExecutor = new NamedThreadPoolExecutor("Redeploy multicast thread", 1);
Runnable multicast = new Runnable() {
@Override
public void run() {
try {
log.info("Start to multicast cluster configuration for node redeploy.");
String version = coordinator.getTargetInfo(RepositoryInfo.class).getCurrentVersion().toString();
com.emc.storageos.services.util.Configuration config = PlatformUtils.getLocalConfiguration();
config.setScenario(PropertyConstants.REDEPLOY_MODE);
config.setAliveNodes(aliveNodes);
MulticastUtil.doBroadcast(version, config, REDEPLOY_MULTICAST_TIMEOUT);
log.info("Finished multicast cluster configuration for node redeploy.");
} catch (Exception e) {
log.warn("Multicast failed", e);
}
}
};
multicastExecutor.execute(multicast);
}
/**
* Remove the corrupted nodes and then run db node repair between the alive nodes
*/
private void runDbRepair() {
try {
for (String svcName : serviceNames) {
try (DbManagerOps dbManagerOps = new DbManagerOps(svcName)) {
dbManagerOps.removeNodes(corruptedNodes);
dbManagerOps.startNodeRepairAndWaitFinish(true, false);
}
}
} catch (Exception e) {
log.error("Node repair failed", e);
markRecoveryFailed(RecoveryStatus.ErrorCode.REPAIR_FAILED);
throw APIException.internalServerErrors.nodeRepairFailed();
}
}
/**
* Wait dbsvc and geodbsvc of all nodes get started
*/
private void waitDbsvcStarted() throws Exception {
log.info("Wait dbsvc and geodbsvc get started..");
waitHibernateNodeStarted();
validateClusterStatus();
if (isVMwareVapp()) {
removeOfflineInfo();
}
}
private void removeOfflineInfo() {
Configuration config = coordinator.getCoordinatorClient().queryConfiguration(coordinator.getCoordinatorClient().getSiteId(),
Constants.DB_DOWNTIME_TRACKER_CONFIG, Constants.DBSVC_NAME);
DbOfflineEventInfo dbOfflineEventInfo = new DbOfflineEventInfo(config);
for (int i = 1; i <= nodeCount; i++) {
String nodeId = "vipr" + i;
if (corruptedNodes.contains(nodeId)) {
if (dbOfflineEventInfo.getOfflineTimeInMS(nodeId) != null) {
dbOfflineEventInfo.setOfflineTimeInMS(nodeId, null);
log.info("Removed offline Time info of {}", nodeId);
}
}
}
config = dbOfflineEventInfo.toConfiguration(Constants.DBSVC_NAME);
coordinator.getCoordinatorClient().persistServiceConfiguration(coordinator.getCoordinatorClient().getSiteId(), config);
log.info("Clean offlineTime and Persist db tracker info to zk successfully");
}
private void informHibernateNodeToReconfigure() {
DrUtil drUtil = new DrUtil(coordinator.getCoordinatorClient());
if (drUtil.isMultisite()) {
InterProcessLock lock = null;
try {
lock = drUtil.getDROperationLock();
long vdcConfigVersion = DrUtil.newVdcConfigVersion();
log.info("Has multi sites, informing the hibernate nodes to reconfigure..");
drUtil.updateVdcTargetVersion(coordinator.getCoordinatorClient().getSiteId(),
SiteInfo.DR_OP_NODE_RECOVERY, vdcConfigVersion);
} catch (Exception e) {
log.error("Failed to inform the hibernate nodes to reconfigure", e);
} finally {
try {
if (lock != null) {
lock.release();
}
} catch (Exception ignore) {
log.error("Release lock failed when node recovery", ignore);
}
}
}
}
/**
* Wait dbsvc and geodbsvc on the redeployed nodes get started
*/
private void waitHibernateNodeStarted() throws Exception {
long expireTime = System.currentTimeMillis() + RecoveryConstants.RECOVERY_CHECK_TIMEOUT;
while (true) {
informHibernateNodeToReconfigure();
List<String> hibernateNodes = getHibernateNodes();
if (hibernateNodes.isEmpty()) {
log.info("Db node rebuild finished");
break;
}
Thread.sleep(RecoveryConstants.RECOVERY_CHECK_INTERVAL);
if (System.currentTimeMillis() >= expireTime) {
log.error("Hibernate nodes({}) can't get started within the stipulated time({})",
hibernateNodes, RecoveryConstants.RECOVERY_CHECK_TIMEOUT);
markRecoveryFailed(RecoveryStatus.ErrorCode.SYNC_FAILED);
throw APIException.internalServerErrors.nodeRebuildFailed();
}
}
}
/**
* Double check dbsvc status on all nodes
*/
private void validateClusterStatus() throws Exception {
for (int i = 0; i < RecoveryConstants.RECOVERY_RETRY_COUNT; i++) {
List<String> unavailableNodes = getUnavailableNodes();
if (unavailableNodes.isEmpty()) {
log.info("Dbsvc on all nodes are available");
break;
}
Thread.sleep(RecoveryConstants.RECOVERY_CHECK_INTERVAL);
log.error("Healthy nodes({}) get unavailable during node recovery", unavailableNodes);
markRecoveryFailed(RecoveryStatus.ErrorCode.NEW_NODE_FAILURE);
throw APIException.internalServerErrors.newNodeFailureInNodeRecovery(unavailableNodes.toString());
}
}
/**
* Get hibernate nodes by check if it exist in cassandra node list
*/
private List<String> getHibernateNodes() {
List<String> hibernateNodes = new ArrayList<String>();
for (int i = 1; i <= nodeCount; i++) {
String nodeId = "vipr" + i;
if (aliveNodes.contains(nodeId)) {
log.debug("No need to check {} which is not a redeployed node", nodeId);
continue;
}
if (isNodeHibernating(nodeId)) {
hibernateNodes.add(nodeId);
continue;
}
if (!isNodeAvailable(nodeId)) {
hibernateNodes.add(nodeId);
}
}
log.debug("Get hibernate nodes: {}", hibernateNodes);
return hibernateNodes;
}
private boolean isNodeHibernating(String nodeId) {
for (String serviceName : serviceNames) {
try (DbManagerOps dbManagerOps = new DbManagerOps(serviceName)) {
Map<String, Boolean> statusMap = dbManagerOps.getNodeStates();
if (!statusMap.keySet().contains(nodeId)) {
log.debug("Node({}) is still hibernating", nodeId);
return true;
}
} catch (Exception e) {
log.warn("Failed to get hibernate node by checking {}", serviceName);
}
}
log.debug("Node({}) is not hibernated any more", nodeId);
return false;
}
/**
* Get unavailable nodes by check dbsvc and geodbsvc beacon
*/
private List<String> getUnavailableNodes() {
List<String> unavailableNodes = new ArrayList<String>();
for (int i = 1; i <= nodeCount; i++) {
String nodeId = "vipr" + i;
if (!isNodeAvailable(nodeId)) {
unavailableNodes.add(nodeId);
}
}
log.debug("Get unavailable nodes: {}", unavailableNodes);
return unavailableNodes;
}
private boolean isNodeAvailable(String nodeId) {
for (String serviceName : serviceNames) {
List<String> availableNodes = coordinator.getServiceAvailableNodes(serviceName);
if (!availableNodes.contains(nodeId)) {
log.debug("Service({}) on node({}) is unavailable");
return false;
}
}
return true;
}
/**
* Trigger node recovery by update recovery status to 'INIT'
*/
public void triggerNodeRecovery() {
InterProcessLock lock = null;
try {
lock = getRecoveryLock();
validateNodeRecoveryStatus();
validateClusterState();
RecoveryStatus status = new RecoveryStatus();
status.setStatus(RecoveryStatus.Status.INIT);
status.setStartTime(new Date());
persistNodeRecoveryStatus(status);
} finally {
releaseLock(lock);
}
}
/**
* Check if platform is supported
*/
private void validatePlatform() {
if (isVMwareVapp()) {
log.warn("Platform(vApp) is unsupported for node recovery");
throw new UnsupportedOperationException("Platform(vApp) is unsupported for node recovery");
}
}
private boolean isVMwareVapp() {
return PlatformUtils.isVMwareVapp();
}
/**
* Check if have triggered node recovery already
*/
private void validateNodeRecoveryStatus() {
RecoveryStatus status = queryNodeRecoveryStatus();
if (isTriggering(status) || isRecovering(status)) {
log.warn("Have triggered node recovery already");
throw new IllegalStateException("Have triggered node recovery already");
}
}
/**
* Check if cluster need to do node recovery
*/
private void validateClusterState() {
ClusterInfo.ClusterState state = null;
if (!isVMwareVapp()) {
state = coordinator.getCoordinatorClient().getControlNodesState();
log.info("Current control nodes' state: {}", state);
if (state == ClusterInfo.ClusterState.STABLE) {
log.warn("Cluster is stable and no need to do node recovery");
throw new IllegalStateException("Cluster is stable and no need to do node recovery");
}
} else {
initNodeListByCheckOfflineTime();
if (aliveNodes.size() == coordinator.getNodeCount()) {
log.warn("all nodes in vapp is available and no need to do node recovery");
throw new IllegalStateException("all nodes in vapp is available and no need to do node recovery");
}
}
// Disable node recovery when standby site state is unexpected as db repair would be failed in these scenarios.
DrUtil drUtil = new DrUtil(coordinator.getCoordinatorClient());
if (drUtil.isMultisite()) {
List<Site> allStandbySites = drUtil.listStandbySites();
for (Site site : allStandbySites) {
if (!site.getState().equals(SiteState.STANDBY_SYNCED)
&& !site.getState().equals(SiteState.STANDBY_PAUSED)
&& !site.getState().equals(SiteState.STANDBY_DEGRADED)) {
log.error("Node recovery is not allowed as standby site({}) status is unexpected({})",
site.getName(), site.getState());
throw new IllegalStateException("Node recovery is not allowed as standby site status is unexpected");
}
}
}
// Disable node recovery when other connected vdc cluster state is DEGRADED as geo db repair would be failed then.
if (drUtil.isMultivdc()) {
List<String> allOtherVdcs = drUtil.getOtherVdcIds();
for (String vdc : allOtherVdcs) {
state = coordinator.getCoordinatorClient().getControlNodesState(vdc);
if (state == ClusterInfo.ClusterState.DEGRADED) {
log.error("Node recovery is not allowed as a connected vdc({}) status is degraded", vdc);
throw new IllegalStateException("Node recovery is not allowed as a connected vdc status is degraded");
}
}
}
}
/**
* Update node recovery status to ZK
*/
private void setRecoveryStatus(RecoveryStatus.Status status) {
if (!isLeader.get()) {
log.warn("This node is not the recovery leader");
throw new IllegalStateException("This node is not the recovery leader");
}
RecoveryStatus recoveryStatus = queryNodeRecoveryStatus();
recoveryStatus.setStatus(status);
persistNodeRecoveryStatus(recoveryStatus);
}
/**
* Update node recovery status to ZK
*/
private void setRecoveryStatusWithEndTimeMarked(RecoveryStatus.Status status) {
if (!isLeader.get()) {
log.warn("This node is not the recovery leader");
throw new IllegalStateException("This node is not the recovery leader");
}
RecoveryStatus recoveryStatus = queryNodeRecoveryStatus();
recoveryStatus.setStatus(status);
recoveryStatus.setEndTime(new Date());
persistNodeRecoveryStatus(recoveryStatus);
}
/**
* Set node recovery status as 'CANCELLED'
*/
private void markRecoveryCancelled() {
InterProcessLock lock = null;
try {
lock = getRecoveryLock();
setRecoveryStatusWithEndTimeMarked(RecoveryStatus.Status.CANCELLED);
} finally {
releaseLock(lock);
}
}
/**
* Mark recovery status as successful and set end time
*/
private void markRecoverySuccessful() {
setRecoveryStatusWithEndTimeMarked(RecoveryStatus.Status.DONE);
}
/**
* Mark recovery status as failed and set error message and end time
*/
private void markRecoveryFailed(RecoveryStatus.ErrorCode errorMessage) {
if (!isLeader.get()) {
log.warn("This node is not the recovery leader");
throw new IllegalStateException("This node is not the recovery leader");
}
RecoveryStatus recoveryStatus = queryNodeRecoveryStatus();
if (recoveryStatus.getErrorCode() != null) {
log.debug("Have already marked.");
return;
}
recoveryStatus.setErrorCode(errorMessage);
recoveryStatus.setEndTime(new Date());
recoveryStatus.setStatus(RecoveryStatus.Status.FAILED);
persistNodeRecoveryStatus(recoveryStatus);
poweroff(getHibernateNodes());
}
/**
* Persist recovery status to ZK
*/
private void persistNodeRecoveryStatus(RecoveryStatus status) {
log.info("Set node recovery status: {}", status);
if (status == null) {
return;
}
ConfigurationImpl cfg = new ConfigurationImpl();
cfg.setKind(Constants.NODE_RECOVERY_STATUS);
cfg.setId(Constants.GLOBAL_ID);
cfg.setConfig(RecoveryConstants.RECOVERY_STATUS, status.getStatus().toString());
if (status.getStartTime() != null) {
cfg.setConfig(RecoveryConstants.RECOVERY_STARTTIME, String.valueOf(status.getStartTime().getTime()));
}
if (status.getEndTime() != null) {
cfg.setConfig(RecoveryConstants.RECOVERY_ENDTIME, String.valueOf(status.getEndTime().getTime()));
}
if (status.getErrorCode() != null) {
cfg.setConfig(RecoveryConstants.RECOVERY_ERRCODE, status.getErrorCode().toString());
}
coordinator.getCoordinatorClient().persistServiceConfiguration(cfg);
log.debug("Persist node recovery status({}) to zk successfully", status);
}
/**
* Query recovery status from ZK
*/
public RecoveryStatus queryNodeRecoveryStatus() {
RecoveryStatus status = new RecoveryStatus();
Configuration cfg = coordinator.getCoordinatorClient().queryConfiguration(Constants.NODE_RECOVERY_STATUS,
Constants.GLOBAL_ID);
if (cfg != null) {
String statusStr = cfg.getConfig(RecoveryConstants.RECOVERY_STATUS);
status.setStatus(RecoveryStatus.Status.valueOf(statusStr));
String startTimeStr = cfg.getConfig(RecoveryConstants.RECOVERY_STARTTIME);
if (startTimeStr != null && startTimeStr.length() > 0) {
status.setStartTime(new Date(Long.parseLong(startTimeStr)));
}
String endTimeStr = cfg.getConfig(RecoveryConstants.RECOVERY_ENDTIME);
if (endTimeStr != null && endTimeStr.length() > 0) {
status.setEndTime(new Date(Long.parseLong(endTimeStr)));
}
String errorCodeStr = cfg.getConfig(RecoveryConstants.RECOVERY_ERRCODE);
if (errorCodeStr != null && errorCodeStr.length() > 0) {
status.setErrorCode(RecoveryStatus.ErrorCode.valueOf(errorCodeStr));
}
}
log.info("Recovery status is: {}", status);
return status;
}
private void validateAutoBootFlag() {
String siteId = coordinator.getCoordinatorClient().getSiteId();
List<Configuration> configs = coordinator.getCoordinatorClient().queryAllConfiguration(siteId, Constants.DB_CONFIG);
if (!isAllAutoBootTrue(configs)) {
log.info("Auto boot flag check passed");
return;
}
log.info("Auto boot flag was set true on all nodes, Change to false for one node");
for (int i = 0; i < configs.size(); i++) {
Configuration config = configs.get(i);
if (config.getId() == null || config.getId().equals(Constants.GLOBAL_ID)) {
continue;
}
config.setConfig(DbConfigConstants.AUTOBOOT, "false");
coordinator.getCoordinatorClient().persistServiceConfiguration(siteId, config);
log.info("Persist autoboot info as false on {} to zk successfully", config.getId());
break;
}
}
private boolean isAllAutoBootTrue(List<Configuration> configs) {
for (int i = 0; i < configs.size(); i++) {
Configuration config = configs.get(i);
// Bypasses item of "global" and folders of "version", just check db configurations.
if (config.getId() == null || config.getId().equals(Constants.GLOBAL_ID)) {
continue;
}
if (!Boolean.parseBoolean(config.getConfig(DbConfigConstants.AUTOBOOT))) {
return false;
}
}
return true;
}
/**
* Get recovery lock to protect the setting of recovery status
*/
private InterProcessLock getRecoveryLock() {
InterProcessLock lock = null;
log.info("Try to acquire recovery lock");
try {
lock = coordinator.getCoordinatorClient().getLock(RecoveryConstants.RECOVERY_LOCK);
boolean acquired = lock.acquire(RecoveryConstants.RECOVERY_LOCK_TIMEOUT, TimeUnit.MILLISECONDS);
if (!acquired) {
throw new IllegalStateException("Unable to get recovery lock");
}
} catch (Exception e) {
log.error("Get recovery lock failed", e);
throw APIException.internalServerErrors.getLockFailed();
}
log.info("Got recovery lock");
return lock;
}
/**
* Release recovery lock
*/
private void releaseLock(InterProcessLock lock) {
if (lock == null) {
log.info("The recovery lock is null, no need to release");
return;
}
try {
lock.release();
log.info("Release recovery lock successful");
} catch (Exception ignore) {
log.warn("Release recovery lock failed", ignore);
}
}
/**
* Poweroff specific nodes
*
* @param nodeIds a list of node id (e.g. vipr1)
*/
public void poweroff(List<String> nodeIds) {
for (String nodeId : nodeIds) {
try {
log.info("Try to power off {}", nodeId);
String svcId = nodeId.replace("vipr", "syssvc-");
URI nodeEndpoint = coordinator.getNodeEndpointForSvcId(svcId);
if (nodeEndpoint == null) {
continue;
}
SysClientFactory.getSysClient(coordinator.getNodeEndpointForSvcId(svcId))
.post(SysClientFactory.URI_POWEROFF_NODE, null, null);
log.info("Power off {} successfully", nodeId);
} catch (SysClientException e) {
log.error("Power off node({}) failed", nodeId, e.getMessage());
}
}
}
/**
* Poweroff local node
*/
public void poweroff() {
localRepository.poweroff();
}
/**
* Register recovery status listener to monitor the status's change
*/
private void addRecoveryStatusListener() {
try {
coordinator.getCoordinatorClient().addNodeListener(new RecoveryStatusListener());
} catch (Exception e) {
log.error("Fail to add recovery status listener", e);
throw APIException.internalServerErrors.addListenerFailed();
}
}
/**
* The listener class is to listen the recovery status node change.
*/
private class RecoveryStatusListener implements NodeListener {
@Override
public String getPath() {
String path = String.format("%1$s/%2$s/%3$s", ZkPath.CONFIG, Constants.NODE_RECOVERY_STATUS,
Constants.GLOBAL_ID);
return path;
}
/**
* Called when a change of recovery status has occurred
*/
@Override
public void nodeChanged() {
wakeupRecoveryThread();
}
/**
* Called when connection status changed
*/
@Override
public void connectionStateChanged(State state) {
}
}
private synchronized void waitOnRecoveryTriggering() throws InterruptedException {
if (getWaitingRecoveryTriggeringFlag()) {
this.wait();
}
}
private synchronized void wakeupRecoveryThread() {
if (getWaitingRecoveryTriggeringFlag()) {
log.info("Try to notify the semaphore");
this.notifyAll();
setWaitingRecoveryTriggeringFlag(false);
}
}
/**
* Use leader selector to make sure only one node(leader node) start recovery manager
*/
private void startRecoveryLeaderSelector() {
while (!coordinator.getCoordinatorClient().isConnected()) {
log.info("Waiting for connecting to zookeeper");
try {
Thread.sleep(RecoveryConstants.RECOVERY_CONNECT_INTERVAL);
} catch (InterruptedException e) {
log.warn("Exception while sleeping, ignore", e);
}
}
leaderSelector = coordinator.getCoordinatorClient().getLeaderSelector(
RecoveryConstants.RECOVERY_LEADER_PATH,
new RecoveryLeaderSelectorListener());
leaderSelector.autoRequeue();
leaderSelector.start();
}
/*
* close leader selector on the node
* */
private void closeRecoveryLeaderSelector() {
log.info ("close the leaderSelect on the node as dbsvc need to recover");
leaderSelector.close();
/*make sure continue after stopLeadership executed */
try {
Thread.sleep(RecoveryConstants.RECOVERY_CONNECT_INTERVAL);
} catch (InterruptedException e) {
log.warn("Exception while sleeping, ignore", e);
}
}
/**
* The listener class is to listen the leader node change.
*/
private class RecoveryLeaderSelectorListener extends LeaderSelectorListenerImpl {
@Override
protected void startLeadership() throws Exception {
log.info("Select as leader, wait to start recovery manager");
isLeader.set(true);
start();
}
@Override
protected void stopLeadership() {
log.info("Give up leader, try to stop recovery manager");
isLeader.set(false);
stop();
}
}
private void start() {
recoveryExecutor = new NamedThreadPoolExecutor("Recovery manager", 1);
recoveryExecutor.execute(this);
}
private void stop() {
recoveryExecutor.shutdownNow();
try {
while (!recoveryExecutor.awaitTermination(RecoveryConstants.THREAD_CHECK_INTERVAL, TimeUnit.SECONDS)) {
log.warn("Waiting recovery thread pool to shutdown for another {} seconds",
RecoveryConstants.THREAD_CHECK_INTERVAL);
}
} catch (InterruptedException e) {
log.error("Interrupted while waiting to shutdown recovery thread pool", e);
}
}
private void initNodeListByCheckOfflineTime() {
aliveNodes.clear();
corruptedNodes.clear();
ArrayList<String> nodeList = coordinator.getAllNodeIds();
for (String nodeId : nodeList) {
try {
DbOfflineStatus dbOfflineStatus = SysClientFactory.getSysClient(
coordinator.getNodeEndpoint(nodeId)).get(SysClientFactory.URI_GET_DB_OFFLINE_STATUS, DbOfflineStatus.class, null);
if (dbOfflineStatus.getOutageTimeExceeded()) {
corruptedNodes.add(nodeId);
} else {
aliveNodes.add(nodeId);
}
} catch (SysClientException e) {
log.warn("Internal error on clean up purge data: ", e.getMessage());
throw e;
}
}
log.info("Alive nodes:{}, corrupted nodes: {}", aliveNodes, corruptedNodes);
}
private void purgeDataForVappRecovery(List<String> nodeList) {
for (String nodeId : nodeList) {
try {
SysClientFactory.getSysClient(coordinator.getNodeEndpoint(nodeId)).post(URI.create(SysClientFactory.URI_NODE_DBRESET.getPath()), null, null);
} catch (SysClientException e) {
log.warn("Internal error on clean up purge data: ",e.getMessage());
throw e;
}
}
}
/**
* restart dbsvc/geosvc/syssvc after repairing for vapp
*/
private void restartServices() {
ArrayList<String> restartedServiceNames = new ArrayList<>(serviceNames);
restartedServiceNames.add(Constants.SYSSVC_NAME);
for (String nodeId : corruptedNodes) {
for (String serviceName : restartedServiceNames) {
SysClientFactory.getSysClient(coordinator.getNodeEndpoint(nodeId)).
post(URI.create(SysClientFactory.URI_RESTART_SERVICE.getPath() + "?name=" + serviceName), null, null);
}
}
}
}