package com.sungardas.enhancedsnapshots.tasks.executors;
import com.amazonaws.services.ec2.model.Snapshot;
import com.amazonaws.services.ec2.model.Volume;
import com.amazonaws.services.ec2.model.VolumeState;
import com.amazonaws.services.ec2.model.VolumeType;
import com.sungardas.enhancedsnapshots.aws.dynamodb.model.BackupEntry;
import com.sungardas.enhancedsnapshots.aws.dynamodb.model.TaskEntry;
import com.sungardas.enhancedsnapshots.aws.dynamodb.repository.BackupRepository;
import com.sungardas.enhancedsnapshots.aws.dynamodb.repository.TaskRepository;
import com.sungardas.enhancedsnapshots.components.ConfigurationMediator;
import com.sungardas.enhancedsnapshots.dto.CopyingTaskProgressDto;
import com.sungardas.enhancedsnapshots.enumeration.TaskProgress;
import com.sungardas.enhancedsnapshots.exception.DataAccessException;
import com.sungardas.enhancedsnapshots.exception.EnhancedSnapshotsException;
import com.sungardas.enhancedsnapshots.exception.EnhancedSnapshotsInterruptedException;
import com.sungardas.enhancedsnapshots.exception.EnhancedSnapshotsTaskInterruptedException;
import com.sungardas.enhancedsnapshots.service.*;
import com.sungardas.enhancedsnapshots.util.SystemUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.concurrent.TimeUnit;
import static com.sungardas.enhancedsnapshots.aws.dynamodb.model.TaskEntry.TaskEntryStatus.RUNNING;
import static com.sungardas.enhancedsnapshots.enumeration.TaskProgress.FAIL_CLEANING;
import static com.sungardas.enhancedsnapshots.enumeration.TaskProgress.INTERRUPTED_CLEANING;
@Service("awsRestoreVolumeTaskExecutor")
public class AWSRestoreVolumeStrategyTaskExecutor extends AbstractAWSVolumeTaskExecutor {
public static final String RESTORED_NAME_PREFIX = "Restore of ";
private static final Logger LOG = LogManager.getLogger(AWSRestoreVolumeStrategyTaskExecutor.class);
@Autowired
private TaskRepository taskRepository;
@Autowired
private BackupRepository backupRepository;
@Autowired
private SnapshotService snapshotService;
@Autowired
private AWSCommunicationService awsCommunication;
@Autowired
private StorageService storageService;
@Autowired
private ConfigurationMediator configurationMediator;
@Autowired
private NotificationService notificationService;
@Autowired
private TaskService taskService;
@Autowired
private MailService mailService;
@Override
public void execute(TaskEntry taskEntry) {
switch (taskEntry.progress()) {
case FAIL_CLEANING: {
failCleaningStep(taskEntry, new EnhancedSnapshotsException("Restore failed"));
return;
}
case INTERRUPTED_CLEANING: {
interruptedCleaningStep(taskEntry);
return;
}
}
try {
LOG.info("Starting restore process for volume {}", taskEntry.getVolume());
LOG.info("{} task state was changed to 'in progress'", taskEntry.getId());
taskEntry.setStatus(RUNNING.getStatus());
taskRepository.save(taskEntry);
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Starting restore", 0);
String sourceFile = taskEntry.getSourceFileName();
if (snapshotService.getSnapshotIdByVolumeId(taskEntry.getVolume()) != null && (sourceFile == null || sourceFile.isEmpty())) {
LOG.info("Task was defined as restore from snapshot.");
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Restore from Snapshot", 5);
restoreFromSnapshot(taskEntry);
} else {
LOG.info("Task was defined as restore from history.");
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Restore from S3", 0);
restoreFromBackupFile(taskEntry);
}
completeTask(taskEntry);
} catch (EnhancedSnapshotsTaskInterruptedException e) {
interruptedCleaningStep(taskEntry);
} catch (EnhancedSnapshotsInterruptedException e) {
if (!configurationMediator.isClusterMode()) {
interruptedCleaningStep(taskEntry);
}
} catch (Exception e) {
failCleaningStep(taskEntry, e);
}
}
private void restoreFromSnapshot(TaskEntry taskEntry) {
try {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Restore from snapshot", 20);
String targetZone = taskEntry.getAvailabilityZone();
String volumeId = taskEntry.getVolume();
String snapshotId = snapshotService.getSnapshotIdByVolumeId(volumeId);
// check that snapshot exists
if (snapshotId == null || !awsCommunication.snapshotExists(snapshotId)) {
LOG.error("Failed to find snapshot for volume {} ", volumeId);
throw new DataAccessException("Backup for volume: " + volumeId + " was not found");
}
checkThreadInterruption(taskEntry);
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Creating volume from snapshot", 50);
Volume volume = awsCommunication.createVolumeFromSnapshot(snapshotId, targetZone, VolumeType.fromValue(taskEntry.getRestoreVolumeType()),
taskEntry.getRestoreVolumeIopsPerGb());
awsCommunication.setResourceName(volume.getVolumeId(), RESTORED_NAME_PREFIX + taskEntry.getVolume());
awsCommunication.addTag(volume.getVolumeId(), "Created by", "Enhanced Snapshots");
setProgress(taskEntry, TaskProgress.DONE);
} catch (EnhancedSnapshotsTaskInterruptedException e) {
LOG.info("Restore task was canceled");
taskRepository.delete(taskEntry);
mailService.notifyAboutSystemStatus("Restore task for volume with id" + taskEntry.getVolume() + " was canceled");
setProgress(taskEntry, TaskProgress.DONE);
}
}
private void restoreFromBackupFile(TaskEntry taskEntry) throws IOException, InterruptedException {
Volume tempVolume = taskEntry.getTempVolumeId() != null ? awsCommunication.getVolume(taskEntry.getTempVolumeId()) : null;
BackupEntry backupEntry;
if (taskEntry.getSourceFileName() != null && !taskEntry.getSourceFileName().isEmpty()) {
backupEntry = backupRepository.findOne(taskEntry.getSourceFileName());
} else {
backupEntry = backupRepository.findByVolumeId(taskEntry.getVolume())
.stream().sorted((e1, e2) -> e2.getTimeCreated().compareTo(e1.getTimeCreated()))
.findFirst().get();
}
if (taskEntry.progress() != TaskProgress.NONE) {
switch (taskEntry.progress()) {
case ATTACHING_VOLUME:
case CREATING_TEMP_VOLUME:
case WAITING_TEMP_VOLUME:
case COPYING: {
try {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Detaching temp volume", 50);
detachingTempVolumeStep(taskEntry);
} catch (Exception e) {
// skip
}
try {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Deleting temp volume", 20);
deletingTempVolumeStep(taskEntry);
} catch (Exception e) {
// skip
}
setProgress(taskEntry, TaskProgress.STARTED);
break;
}
case CREATING_SNAPSHOT: {
if (taskEntry.getTempSnapshotId() != null) {
setProgress(taskEntry, TaskProgress.WAITING_SNAPSHOT);
}
break;
}
}
}
switch (taskEntry.progress()) {
case NONE:
case STARTED:
setProgress(taskEntry, TaskProgress.STARTED);
case CREATING_TEMP_VOLUME: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Creating temp volume", 10);
creationTempVolumeStep(taskEntry, backupEntry);
}
case WAITING_TEMP_VOLUME: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Waiting temp volume", 15);
tempVolume = waitingTempVolumeStep(taskEntry);
}
case ATTACHING_VOLUME: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Attaching temp volume", 20);
tempVolume = attachingVolumeStep(taskEntry, tempVolume);
}
case COPYING: {
copyingStep(taskEntry, tempVolume, backupEntry);
}
case DETACHING_TEMP_VOLUME: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Detaching temp volume", 65);
detachingTempVolumeStep(taskEntry);
}
}
if (!tempVolume.getAvailabilityZone().equals(taskEntry.getAvailabilityZone())) {
//move to target availability zone
switch (taskEntry.progress()) {
case DETACHING_TEMP_VOLUME:
case CREATING_SNAPSHOT: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Creating snapshot", 70);
creatingTempSnapshotStep(taskEntry);
}
case WAITING_SNAPSHOT: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Waiting snapshot", 75);
waitingTempSnapshotStep(taskEntry);
}
case MOVE_TO_TARGET_ZONE: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Moving to target Zone", 80);
Volume volumeToRestore = moveToTargetZoneStep(taskEntry);
awsCommunication.setResourceName(volumeToRestore.getVolumeId(), RESTORED_NAME_PREFIX + backupEntry.getFileName());
awsCommunication.addTag(volumeToRestore.getVolumeId(), "Created by", "Enhanced Snapshots");
}
case DELETING_TEMP_VOLUME: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Deleting temp volume", 85);
deletingTempVolumeStep(taskEntry);
}
case DELETING_TEMP_SNAPSHOT: {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Deleting temp snapshot", 90);
deletingTempSnapshotStep(taskEntry);
}
}
} else {
//in case availability zone is the same we do not need temp volume
awsCommunication.deleteTemporaryTag(tempVolume.getVolumeId());
awsCommunication.setResourceName(tempVolume.getVolumeId(), RESTORED_NAME_PREFIX + backupEntry.getFileName());
awsCommunication.addTag(tempVolume.getVolumeId(), "Created by", "Enhanced Snapshots");
}
}
private Volume creationTempVolumeStep(TaskEntry taskEntry, BackupEntry backupEntry) {
checkThreadInterruption(taskEntry);
setProgress(taskEntry, TaskProgress.CREATING_TEMP_VOLUME);
Volume tempVolume;
LOG.info("Used backup record: {}", backupEntry.getFileName());
int size = Integer.parseInt(backupEntry.getSizeGiB());
checkThreadInterruption(taskEntry);
// creating temporary volume
if (taskEntry.getTempVolumeType().equals(VolumeType.Io1.toString())) {
tempVolume = awsCommunication.createIO1Volume(size, taskEntry.getTempVolumeIopsPerGb());
} else {
tempVolume = awsCommunication.createVolume(size, VolumeType.fromValue(taskEntry.getTempVolumeType()));
}
LOG.info("Created {} volume:{}", taskEntry.getTempVolumeType(), tempVolume.toString());
checkThreadInterruption(taskEntry);
awsCommunication.createTemporaryTag(tempVolume.getVolumeId(), backupEntry.getFileName());
taskEntry.setTempVolumeId(tempVolume.getVolumeId());
taskRepository.save(taskEntry);
return tempVolume;
}
private Volume waitingTempVolumeStep(TaskEntry taskEntry) {
checkThreadInterruption(taskEntry);
setProgress(taskEntry, TaskProgress.WAITING_TEMP_VOLUME);
Volume volumeDest = awsCommunication.waitForVolumeState(awsCommunication.getVolume(taskEntry.getTempVolumeId()), VolumeState.Available);
LOG.info("Volume created: {}", volumeDest.toString());
return volumeDest;
}
private Volume attachingVolumeStep(TaskEntry taskEntry, Volume tempVolume) {
checkThreadInterruption(taskEntry);
setProgress(taskEntry, TaskProgress.ATTACHING_VOLUME);
awsCommunication.attachVolume(awsCommunication.getInstance(SystemUtils.getInstanceId()), tempVolume);
tempVolume = awsCommunication.syncVolume(tempVolume);
try {
TimeUnit.MINUTES.sleep(1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
checkThreadInterruption(taskEntry);
return tempVolume;
}
private void copyingStep(TaskEntry taskEntry, Volume tempVolume, BackupEntry backupEntry) throws IOException, InterruptedException {
String attachedDeviceName = storageService.detectFsDevName(tempVolume);
LOG.info("Volume was attached as device: " + attachedDeviceName);
CopyingTaskProgressDto dto = new CopyingTaskProgressDto(taskEntry.getId(), 25, 60, Long.parseLong(backupEntry.getSizeGiB()));
storageService.copyData(configurationMediator.getSdfsMountPoint() + backupEntry.getFileName(), attachedDeviceName, dto, taskEntry.getId());
}
private void completeTask(TaskEntry taskEntry) {
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Restore complete", 100);
taskEntry.setProgress(TaskProgress.DONE.name());
taskService.complete(taskEntry);
LOG.info("{} task {} was completed", taskEntry.getType(), taskEntry.getId());
mailService.notifyAboutSuccess(taskEntry);
}
private void detachingTempVolumeStep(TaskEntry taskEntry) {
setProgress(taskEntry, TaskProgress.DETACHING_TEMP_VOLUME);
checkThreadInterruption(taskEntry);
LOG.info("Detaching volume: {}", taskEntry.getTempVolumeId());
awsCommunication.detachVolume(awsCommunication.getVolume(taskEntry.getTempVolumeId()));
}
private void creatingTempSnapshotStep(TaskEntry taskEntry) {
setProgress(taskEntry, TaskProgress.CREATING_SNAPSHOT);
Volume volumeSrc = awsCommunication.getVolume(taskEntry.getTempVolumeId());
if (volumeSrc == null) {
LOG.error("Can't get access to {} volume", taskEntry.getTempVolumeId());
throw new DataAccessException(MessageFormat.format("Can't get access to {} volume", taskEntry.getVolume()));
}
taskEntry.setTempSnapshotId(awsCommunication.createSnapshot(volumeSrc).getSnapshotId());
taskRepository.save(taskEntry);
}
private void waitingTempSnapshotStep(TaskEntry taskEntry) {
setProgress(taskEntry, TaskProgress.WAITING_SNAPSHOT);
Snapshot snapshot = awsCommunication.waitForCompleteState(awsCommunication.getSnapshot(taskEntry.getTempSnapshotId()));
LOG.info("SnapshotEntry created: {}", snapshot.toString());
}
private Volume moveToTargetZoneStep(TaskEntry taskEntry) {
checkThreadInterruption(taskEntry);
notificationService.notifyAboutRunningTaskProgress(taskEntry.getId(), "Moving into target zone...", 95);
return awsCommunication.createVolumeFromSnapshot(taskEntry.getTempSnapshotId(), taskEntry.getAvailabilityZone(),
VolumeType.fromValue(taskEntry.getRestoreVolumeType()), taskEntry.getRestoreVolumeIopsPerGb());
}
private void deletingTempVolumeStep(TaskEntry taskEntry) {
setProgress(taskEntry, TaskProgress.DELETING_TEMP_VOLUME);
LOG.info("Deleting temporary volume: {}", taskEntry.getTempVolumeId());
awsCommunication.deleteVolume(awsCommunication.getVolume(taskEntry.getTempVolumeId()));
}
private void deletingTempSnapshotStep(TaskEntry taskEntry) {
setProgress(taskEntry, TaskProgress.DELETING_TEMP_SNAPSHOT);
LOG.info("Deleting temporary snapshot: {}", taskEntry.getTempSnapshotId());
awsCommunication.deleteSnapshot(taskEntry.getTempSnapshotId());
}
private void cleaningStep(TaskEntry taskEntry) {
try {
deletingTempSnapshotStep(taskEntry);
} catch (Exception e) {
//skip
}
try {
deletingTempVolumeStep(taskEntry);
} catch (Exception e) {
//skip
}
}
private void interruptedCleaningStep(TaskEntry taskEntry) {
setProgress(taskEntry, INTERRUPTED_CLEANING);
cleaningStep(taskEntry);
LOG.info("Restore task was canceled");
taskRepository.delete(taskEntry);
mailService.notifyAboutSystemStatus("Restore task for volume with id" + taskEntry.getVolume() + " was canceled");
}
private void failCleaningStep(TaskEntry taskEntry, Exception e) {
setProgress(taskEntry, FAIL_CLEANING);
cleaningStep(taskEntry);
LOG.error("Failed to execute {} task {}. Changing task status to '{}'", taskEntry.getType(), taskEntry.getId(), TaskEntry.TaskEntryStatus.ERROR);
LOG.error(e);
taskEntry.setStatus(TaskEntry.TaskEntryStatus.ERROR.getStatus());
taskRepository.save(taskEntry);
mailService.notifyAboutError(taskEntry, e);
}
}