/*
* Copyright (c) 2014 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.systemservices.impl.jobs.backupscheduler;
import com.emc.storageos.management.backup.BackupConstants;
import com.emc.storageos.management.backup.exceptions.BackupException;
import com.emc.storageos.security.audit.AuditLogManager;
import com.emc.storageos.services.OperationTypeEnum;
import com.emc.storageos.services.util.Strings;
import com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Set;
/**
* Class to do scheduled backup.
*/
public class BackupExecutor {
private static final Logger log = LoggerFactory.getLogger(BackupExecutor.class);
private SchedulerConfig cfg;
private BackupScheduler cli;
public BackupExecutor(SchedulerConfig cfg, BackupScheduler cli) {
this.cfg = cfg;
this.cli = cli;
}
public void create() throws Exception {
if (this.cfg.schedulerEnabled) {
try (AutoCloseable lock = this.cfg.lock()) {
this.cfg.reload();
if (this.cfg.isClusterUpgradable()) {
log.info("Start to remove deleted backups");
removeDeletedBackups();
}else {
log.info("Skip to remove deleted backups as cluster is not stable");
}
log.info("Start to do backup job");
if (shouldDoBackup()) {
doBackup();
}
} catch (Exception e) {
log.error("Fail to run schedule backup", e);
}
}
}
private void removeDeletedBackups() {
Set<String> clusterTags;
try {
clusterTags = this.cli.getClusterBackupTags(false);
} catch (Exception e) {
log.error("Failed to list backups from all nodes", e);
return;
}
boolean modified = false;
for (String tag : new ArrayList<>(this.cfg.retainedBackups)) {
if (!clusterTags.contains(tag)) {
log.info("Remove tag from retained backup {}", tag);
this.cfg.retainedBackups.remove(tag);
modified = true;
}
}
if (modified) {
this.cfg.persist();
}
}
private boolean shouldDoBackup() throws ParseException, InterruptedException {
Calendar now = this.cfg.now();
ScheduleTimeRange curTimeRange = new ScheduleTimeRange(this.cfg.interval, this.cfg.intervalMultiple, now);
Date expected = curTimeRange.minuteOffset(this.cfg.startOffsetMinutes);
Date nowDate = now.getTime();
log.info("Now is {} and expected run time is {}",
ScheduledBackupTag.toTimestamp(nowDate),
ScheduledBackupTag.toTimestamp(expected));
// if now is before target time
if (nowDate.before(expected)) {
return false;
}
Date lastBackupDateTime = this.cfg.retainedBackups.isEmpty() ? null :
ScheduledBackupTag.parseBackupTag(this.cfg.retainedBackups.last());
log.info("Last backup is {}, expected is {}", lastBackupDateTime == null ? "N/A" :
ScheduledBackupTag.toTimestamp(lastBackupDateTime),
ScheduledBackupTag.toTimestamp(expected));
// If current time range already has one backup which was created at or after the expected time,
// no need create again. This check could avoid repeated creation while also considered reconfigure scenario.
if (lastBackupDateTime != null && curTimeRange.contains(lastBackupDateTime) && !lastBackupDateTime.before(expected)) {
return false;
}
while (!this.cfg.isAllowBackup()) {
log.warn("Wait {} ms for the cluster is not allowed to do backup now.",
BackupConstants.SCHEDULER_SLEEP_TIME_FOR_UPGRADING);
Thread.sleep(BackupConstants.SCHEDULER_SLEEP_TIME_FOR_UPGRADING);
}
return true;
}
private void doBackup() throws Exception {
String tag = null;
Exception lastException = null;
int retryCount = 0;
List<String> descParams = null;
while (shouldDoBackup()) {
try {
Date backupTime = this.cfg.now().getTime();
tag = ScheduledBackupTag.toBackupTag(backupTime,
this.cfg.getSoftwareVersion(), this.cfg.nodeCount);
log.info("Starting backup using tag {} (retry #{})", tag, retryCount);
this.cli.createBackup(tag);
this.cfg.retainedBackups.add(tag);
this.cfg.persist();
return;
} catch (InternalServerErrorException e) {
lastException = e;
log.error(String.format("Exception when creating backup %s (retry #%d)",
tag, retryCount), e);
}
if (retryCount == BackupConstants.BACKUP_RETRY_COUNT) {
break;
}
retryCount++;
Thread.sleep(BackupConstants.SCHEDULER_SLEEP_TIME_FOR_UPGRADING);
}
if (lastException != null) {
this.cfg.sendBackupFailureToRoot(tag, lastException.getMessage());
}
}
public void reclaim() throws Exception {
if (this.cfg.schedulerEnabled) {
try (AutoCloseable lock = this.cfg.lock()) {
this.cfg.reload();
log.info("Start to delete expired backups");
deleteExpiredBackups();
} catch (Exception e) {
log.error("Fail to run schedule backup", e);
}
}
}
private void deleteExpiredBackups() throws Exception {
// Remove out-of-date backup tags from master list
if (this.cfg.retainedBackups.size() > this.cfg.copiesToKeep) {
log.info("Found backups {} in retain list, keeping last {}",
StringUtils.join(this.cfg.retainedBackups, ','),
this.cfg.copiesToKeep);
do {
this.cfg.retainedBackups.remove(this.cfg.retainedBackups.first());
} while (this.cfg.retainedBackups.size() > this.cfg.copiesToKeep);
this.cfg.persist();
}
// Actually delete backups from disk that not in master list
// NOTE: Down nodes are ignored, because once quorum nodes agree a backup is deleted, it is deleted even it still exists
// in minority nodes.
for (String tag : ScheduledBackupTag.pickScheduledBackupTags(this.cli.getClusterBackupTags(true))) {
if (!this.cfg.retainedBackups.contains(tag)) {
try {
this.cli.deleteBackup(tag);
} catch (InternalServerErrorException e) {
log.error("Failed to delete scheduled backup from cluster", e);
}
}
}
}
}