/* * Copyright (c) 2014 EMC Corporation * All Rights Reserved */ package com.emc.storageos.systemservices.impl.jobs.backupscheduler; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import com.emc.storageos.db.client.DbClient; import com.emc.storageos.management.backup.BackupConstants; import com.emc.storageos.management.backup.BackupFileSet; import com.emc.storageos.security.audit.AuditLogManager; import com.emc.storageos.services.OperationTypeEnum; import com.emc.storageos.services.util.NamedScheduledThreadPoolExecutor; import com.emc.storageos.systemservices.impl.jobs.common.JobConstants; import com.emc.storageos.systemservices.impl.property.Notifier; import com.emc.storageos.systemservices.impl.resource.BackupService; import com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt; import com.emc.storageos.systemservices.impl.util.SkipOutputStream; import com.emc.vipr.model.sys.backup.BackupOperationStatus; import org.apache.curator.framework.recipes.leader.LeaderSelector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import com.emc.storageos.coordinator.client.service.CoordinatorClient; import com.emc.storageos.coordinator.client.service.DrUtil; import com.emc.storageos.coordinator.client.service.impl.LeaderSelectorListenerImpl; import com.emc.storageos.coordinator.common.Service; import com.emc.storageos.coordinator.common.impl.ZkPath; import com.emc.storageos.db.client.model.EncryptionProvider; import com.emc.storageos.management.backup.BackupOps; public class BackupScheduler extends Notifier implements Runnable, Callable<Object>, JobConstants { private static final Logger log = LoggerFactory.getLogger(BackupScheduler.class); private static final long SCHEDULE_BACKUP_RETRY_OFFSITE = 5 * 60 * 1000L; private volatile boolean isLeader = false; private static volatile BackupScheduler singletonInstance; @Autowired private CoordinatorClientExt coordinator; @Autowired private DbClient dbClient; @Autowired @Qualifier("encryptionProvider") private EncryptionProvider encryptionProvider; @Autowired private BackupOps backupOps; @Autowired private BackupService backupService; @Autowired @Qualifier("backupFolder") private File backupFolder; @Autowired private AuditLogManager auditMgr; @Autowired private Service serviceinfo; @Autowired private DrUtil drUtil; private SchedulerConfig cfg; private BackupExecutor backupExec; private UploadExecutor uploadExec; private ScheduledExecutorService service; private ScheduledFuture<?> scheduledTask; public BackupScheduler() { } public SchedulerConfig getCfg() { if (cfg.uploadUrl == null) { try { cfg.reload(); }catch(Exception e) { log.error("Failed to reload cfg e=", e); throw new RuntimeException(e); } } return cfg; } public static BackupScheduler getSingletonInstance() { return singletonInstance; } public UploadExecutor getUploadExecutor() { if (this.uploadExec == null) { this.uploadExec = new UploadExecutor(this.cfg, this); } return this.uploadExec; } private void cancelScheduledTask() { this.scheduledTask.cancel(false); log.info("Previous scheduled task cancelled"); this.scheduledTask = null; } /** * All scheduling adjustment should be done by scheduling this to scheduler thread pool, * so this is synchronized with scheduled executions. */ @Override public Object call() throws Exception { log.info("Starting to configure scheduler"); if (drUtil.isStandby()) { log.info("Current site is standby, disable BackupScheduler"); return null; } if (this.scheduledTask != null) { cancelScheduledTask(); } try { this.cfg.reload(); } catch (ParseException e) { log.error("Failed to initialize", e); return null; } if (this.cfg.uploadUrl == null && !this.cfg.schedulerEnabled) { log.info("External upload server is not configured and scheduler is disabled, nothing to do, quiting..."); return null; } log.info("Enabling scheduler"); this.backupExec = new BackupExecutor(this.cfg, this); this.uploadExec = new UploadExecutor(this.cfg, this); // Run once immediately in case we're crashed previously run(); return null; } private void scheduleNextRun() { Date coming = getNextScheduledRunTime(); long millisToSleep = coming.getTime() - System.currentTimeMillis(); this.scheduledTask = this.service.schedule((Runnable) this, millisToSleep, TimeUnit.MILLISECONDS); } public Date getNextScheduledRunTime() { SchedulerConfig schedulerConfig = getCfg(); Calendar now = schedulerConfig.now(); ScheduleTimeRange cur = new ScheduleTimeRange(schedulerConfig.interval, schedulerConfig.intervalMultiple, now); Date coming = cur.minuteOffset(schedulerConfig.startOffsetMinutes); if (coming.before(now.getTime())) { coming = cur.next().minuteOffset(schedulerConfig.startOffsetMinutes); } log.info("schedule next backup run at {}", coming); return coming; } @Override public void run() { try { log.info("Backup scheduler thread goes live"); this.cfg.reload(); // If we made any new backup, notify uploader thread to perform upload this.backupExec.create(); this.uploadExec.upload(); this.backupExec.reclaim(); } catch (Exception e) { log.error("Exception occurred in scheduler", e); if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); return; } } // Will retry every 5 min if schedule next run fail while (isLeader && !service.isShutdown()) { try { scheduleNextRun(); break; } catch (Exception e) { log.error("Exception occurred when schedule next run", e); try { Thread.sleep(SCHEDULE_BACKUP_RETRY_OFFSITE); } catch (InterruptedException ex) { log.debug("Interrupt exception, ignoring..."); } } } } public void auditBackup(OperationTypeEnum auditType, String operationalStatus, String description, Object... descparams) { this.auditMgr.recordAuditLog(null, null, BackupConstants.EVENT_SERVICE_TYPE, auditType, System.currentTimeMillis(), operationalStatus, description, descparams); } public void createBackup(String tag) { this.backupService.createBackup(tag, true); } public void deleteBackup(String tag) { this.backupService.deleteBackup(tag); } public void updateBackupUploadStatus(String backupName, long operationTime, boolean success) { log.info(String.format("Updating backup upload status(name=%s, time=%s, success=%s) to ZK", backupName, operationTime, success)); BackupOperationStatus backupOperationStatus = backupOps.queryBackupOperationStatus(); backupOperationStatus.setLastUpload(backupName, operationTime, (success) ? BackupOperationStatus.OpMessage.OP_SUCCESS : BackupOperationStatus.OpMessage.OP_FAILED); backupOps.persistBackupOperationStatus(backupOperationStatus); } /** * This method will scan this node's local /data/backup folder to get a rough list of * available backups * * @return */ public Set<String> getNodeBackupTags() { File[] files = this.backupFolder.listFiles(); Set<String> tags = new HashSet<>(files.length); for (File file : files) { if (file.isDirectory()) { tags.add(file.getName()); } } return tags; } /** * Get a list of backup tags. A tag could represent an ongoing backup that's not fully completed. * * @param ignoreDownNodes * @return */ public Set<String> getClusterBackupTags(boolean ignoreDownNodes) { return this.backupOps.listRawBackup(ignoreDownNodes).uniqueTags(); } public BackupFileSet getDownloadFiles(String tag) { return this.backupService.getDownloadList(tag); } public void uploadTo(BackupFileSet files, long offset, OutputStream uploadStream) throws IOException { this.backupService.collectData(files, new SkipOutputStream(uploadStream, offset)); } public String generateZipFileName(String tag, BackupFileSet files) { Set<String> availableNodes = files.uniqueNodes(); Set<String> nodeIds = this.coordinator.getCoordinatorClient().getInetAddessLookupMap().getControllerNodeIPLookupMap().keySet(); String[] allNodes = nodeIds.toArray(new String[nodeIds.size()]); Arrays.sort(allNodes); int backupNodeCount = 0; for (int i = 0; i < allNodes.length; i++) { if (availableNodes.contains(allNodes[i])) { backupNodeCount++; } } String drSiteId = drUtil.getLocalSite().getUuid(); // Remove all non alphanumeric characters drSiteId = drSiteId.replaceAll("^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$", ""); return UploadExecutor.toZipFileName(tag, nodeIds.size(), backupNodeCount, drSiteId); } public List<String> getDescParams(final String tag) { final String nodeId = this.serviceinfo.getNodeId(); return new ArrayList<String>() { { add(tag); add(nodeId); add(drUtil.getLocalSite().getName()); } }; } /** * Called when related system properties are changed, and we need to reschedule */ @Override public void doNotify() throws Exception { log.info("Received notification that related system properties are changed"); ScheduledExecutorService svc = service; if (svc != null) { try { svc.schedule((Callable<Object>) this, 0L, TimeUnit.MICROSECONDS); } catch (RejectedExecutionException ex) { if (svc.isShutdown()) { log.info("Property change notification ignored because this node is no longer backup leader."); } else { throw ex; } } } else { log.info("Property change notification ignored because this node is no longer backup leader."); } } /** * Called when initializing Spring bean, make sure only one node(leader node) performs backup job * */ public void startLeaderSelector() throws InterruptedException { while (!coordinator.getCoordinatorClient().isConnected()) { log.info("waiting for connecting to zookeeper"); try { Thread.sleep(BackupConstants.BACKUP_WAINT_BEFORE_RETRY_ZK_CONN); } catch (InterruptedException e) { log.warn("Exception while sleeping,ignore", e); throw e; } } singletonInstance = this; if (drUtil.isStandby()) { log.info("Current site is standby, disable BackupScheduler"); return; } this.cfg = new SchedulerConfig(coordinator, this.encryptionProvider, this.dbClient); LeaderSelector leaderSelector = coordinator.getCoordinatorClient().getLeaderSelector(coordinator.getCoordinatorClient().getSiteId(), BackupConstants.BACKUP_LEADER_PATH, new BackupLeaderSelectorListener()); leaderSelector.autoRequeue(); leaderSelector.start(); } private class BackupLeaderSelectorListener extends LeaderSelectorListenerImpl { @Override protected void startLeadership() throws Exception { log.info("This node is selected as backup leader, starting Backup Scheduler"); isLeader = true; service = new NamedScheduledThreadPoolExecutor("BackupScheduler", 1); ((NamedScheduledThreadPoolExecutor) service).setExecuteExistingDelayedTasksAfterShutdownPolicy(false); service.schedule((Callable<Object>) BackupScheduler.this, 0L, TimeUnit.MICROSECONDS); } @Override protected void stopLeadership() { log.info("give up leader, stop backup scheduler"); isLeader = false; // Stop scheduler thread. service.shutdown(); // Never block here. It may block all other node listeners } } }