package com.emc.storageos.systemservices.impl.util; import java.util.Arrays; import java.util.Calendar; import java.util.Date; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import com.emc.storageos.coordinator.client.model.Constants; import com.emc.storageos.db.server.impl.DbRepairRunnable; import com.emc.storageos.management.jmx.recovery.DbManagerOps; import com.emc.storageos.systemservices.impl.recovery.RecoveryManager; import com.emc.vipr.model.sys.recovery.DbRepairStatus; import com.emc.vipr.model.sys.recovery.RecoveryStatus; /** * Class for handle node DB repair status combination */ public class DbRepairStatusHandler { private static final Logger log = LoggerFactory.getLogger(DbRepairStatusHandler.class); private List<String> serviceNames = Arrays.asList(Constants.DBSVC_NAME, Constants.GEODBSVC_NAME); @Autowired private RecoveryManager recoveryManager; public DbRepairStatusHandler() { } private boolean isNodeRecoveryDbRepairInProgress() { RecoveryStatus recoveryStatus = recoveryManager.queryNodeRecoveryStatus(); if (recoveryStatus != null && recoveryStatus.getStatus() != null) { return recoveryStatus.getStatus() == RecoveryStatus.Status.REPAIRING; } return false; } /** * Get node repair status(have combine db repair status and geodb repair status) * it's tricky to combine local db and geo db repair together since they can be triggered * individually, lots for workaround needed to be done to ensure it works correctly. * we set IN_PROGRESS before perform actual db repair in DbRepairRunable(before get DB_REPAIR lock) * hence we can use the IN_PROGRESS here to determine if there is other pending db repair, * so we can determine whether we can merge them together or not. For db repair triggered by scheduler, * geo db repair doesn't know if there is local db finished its work or not since IN_PROGRESS will be * set to DONE (which means geo db repair is not aware of it is triggered by restart geo service alone * or node restart), we use INTERVAL_TIME_IN_MINUTES to make the decision. * Generally we follow the below rules: * 1. node recovery: always merge the result such as: local db repair progress 50% itself, 25% will * be returned, geo db repair progress 50% itself, 75% will be returned. please * be aware of local db repair always come first. * 2. node restart: always merge the result, be aware of geo db repair by using IN_PROGRESS flag in * local db repair; be aware of local db repair by checking lastCompletionTime of * geo db repair against 3 hours * 3. restart one db service alone: if you restart db serivce alone, we will return local db repair * progress directly without any merge. * <p/> * Note: we use local db repair as the first instance to grap DB_REPAIR lock, the geo db repair is * the second one to run for simply introduction even if it's by chance to get DB_REPAIR lock based * on which one bootup first, but it doesn't affect the result. */ public DbRepairStatus getDbRepairStatus() throws Exception { DbRepairStatus repairStatus = new DbRepairStatus(); DbRepairStatus localDbState = queryDbRepairStatus(serviceNames.get(0)); DbRepairStatus geoDbState = queryDbRepairStatus(serviceNames.get(1)); boolean nodeRecovery = isNodeRecoveryDbRepairInProgress(); log.info("Query repair status of dbsvc({}) and geodbsvc({}) successfully", (localDbState == null) ? localDbState : localDbState.toString(), (geoDbState == null) ? geoDbState : geoDbState.toString()); log.info("db repair running in node recovery? {}", nodeRecovery); if (localDbState == null && geoDbState == null) { repairStatus.setStatus(DbRepairStatus.Status.NOT_STARTED); return repairStatus; } if (localDbState != null && geoDbState != null) { if (localDbState.getStatus() == DbRepairStatus.Status.IN_PROGRESS && geoDbState.getStatus() == DbRepairStatus.Status.IN_PROGRESS) { log.info("local/geo db repair are in progress both"); repairStatus = getDualProgressStatus(localDbState, geoDbState); } else if (localDbState.getStatus() == DbRepairStatus.Status.IN_PROGRESS) { log.info("local db repair is in progress"); repairStatus = getSingleProgressStatus(localDbState, geoDbState, nodeRecovery, false); } else if (geoDbState.getStatus() == DbRepairStatus.Status.IN_PROGRESS) { log.info("geo db repair is in progress"); repairStatus = getSingleProgressStatus(geoDbState, localDbState, nodeRecovery, true); } else if (localDbState.getStatus() == DbRepairStatus.Status.FAILED || geoDbState.getStatus() == DbRepairStatus.Status.FAILED) { log.info("local or geo db repair failed"); repairStatus = getFailStatus(localDbState, geoDbState); } else if (localDbState.getStatus() == DbRepairStatus.Status.SUCCESS && geoDbState.getStatus() == DbRepairStatus.Status.SUCCESS) { log.info("local and geo db repair success"); repairStatus = getSuccessStatus(localDbState, geoDbState); } } if (localDbState == null) { repairStatus = geoDbState; } else if (geoDbState == null) { repairStatus = localDbState; } log.info("Repair status is: {}", repairStatus.toString()); return repairStatus; } private DbRepairStatus getFailStatus(DbRepairStatus localDbState, DbRepairStatus geoDbState) { Date startTime; if (localDbState.getStatus() == DbRepairStatus.Status.FAILED && geoDbState.getStatus() == DbRepairStatus.Status.FAILED) { startTime = getOldestTime(localDbState.getStartTime(), geoDbState.getStartTime()); } else if (localDbState.getStatus() == DbRepairStatus.Status.FAILED) { startTime = localDbState.getStartTime(); } else { startTime = geoDbState.getStartTime(); } return new DbRepairStatus(DbRepairStatus.Status.FAILED, startTime, 100); } private DbRepairStatus getSuccessStatus(DbRepairStatus localDbState, DbRepairStatus geoDbState) { Date completionTime = null; if (localDbState.getLastCompletionTime() == null) { completionTime = geoDbState.getLastCompletionTime(); } else if (geoDbState.getLastCompletionTime() == null) { completionTime = localDbState.getLastCompletionTime(); } else { completionTime = getLatestTime(localDbState.getLastCompletionTime(), geoDbState.getLastCompletionTime()); } Date startTime = getOldestTime(localDbState.getStartTime(), geoDbState.getStartTime()); return new DbRepairStatus(DbRepairStatus.Status.SUCCESS, startTime, completionTime, 100); } /* * it's tricky to check isNodeRecovery and isGeoDb, we need this to * merge progress in different way between node recovery and normal db repair */ private DbRepairStatus getSingleProgressStatus(DbRepairStatus status, DbRepairStatus otherStatus, boolean isNodeRecovery, boolean isGeoDb) { Date completionTime = null; if (status.getLastCompletionTime() != null && otherStatus.getLastCompletionTime() != null) { completionTime = getLatestTime(status.getLastCompletionTime(), otherStatus.getLastCompletionTime()); } int progress = status.getProgress(); Date startTime = status.getStartTime(); if (isNodeRecovery) { progress = isGeoDb ? (status.getProgress() + 100) / 2 : status.getProgress() / 2; startTime = isGeoDb ? otherStatus.getStartTime() : startTime; } else if (needMergeWith(otherStatus.getLastCompletionTime())) { progress = (status.getProgress() + 100) / 2; startTime = otherStatus.getStartTime(); } return new DbRepairStatus(DbRepairStatus.Status.IN_PROGRESS, startTime, completionTime, progress); } /* * we check if db repair need to merge with the other(the other means that geo db if it's a local db) * we use 3 hours as the minimum interval, so we view the other as the whole progress of db repair if * happened within 3 hours. */ private boolean needMergeWith(Date otherCompletionTime) { if (otherCompletionTime == null) { return false; } Calendar cal = Calendar.getInstance(); cal.add(Calendar.MINUTE, -DbRepairRunnable.INTERVAL_TIME_IN_MINUTES); return cal.getTime().before(otherCompletionTime); } private DbRepairStatus getDualProgressStatus(DbRepairStatus localStatus, DbRepairStatus geoStatus) { Date completionTime = null; if (localStatus.getLastCompletionTime() != null && geoStatus.getLastCompletionTime() != null) { completionTime = getLatestTime(localStatus.getLastCompletionTime(), geoStatus.getLastCompletionTime()); } Date startTime = getOldestTime(localStatus.getStartTime(), geoStatus.getStartTime()); int progress = (localStatus.getProgress() + geoStatus.getProgress()) / 2; return new DbRepairStatus(DbRepairStatus.Status.IN_PROGRESS, startTime, completionTime, progress); } private Date getOldestTime(Date one, Date another) { return one.before(another) ? one : another; } private Date getLatestTime(Date one, Date another) { return one.after(another) ? one : another; } /** * Query repair status of dbsvc or geodbsvc from DB */ private DbRepairStatus queryDbRepairStatus(String svcName) throws Exception { int progress = -1; DbRepairStatus.Status status = null; Date startTime = null; Date endTime = null; log.info("Try to get repair status of {}", svcName); try (DbManagerOps dbManagerOps = new DbManagerOps(svcName)) { DbRepairStatus repairState = dbManagerOps.getLastRepairStatus(false); if (repairState != null) { log.info("Current repair status of {} is: {}", svcName, repairState.toString()); progress = repairState.getProgress(); status = repairState.getStatus(); startTime = repairState.getStartTime(); endTime = repairState.getLastCompletionTime(); } if (endTime != null) { return repairState; } repairState = dbManagerOps.getLastSucceededRepairStatus(false); if (repairState != null) { log.info("Last successful repair status of {} is: {}", svcName, repairState.toString()); progress = (progress == -1) ? repairState.getProgress() : progress; status = (status == null) ? repairState.getStatus() : status; startTime = (startTime == null) ? repairState.getStartTime() : startTime; endTime = (endTime == null) ? repairState.getLastCompletionTime() : endTime; } } if (status != null) { return new DbRepairStatus(status, startTime, endTime, progress); } return null; } }