/*
* Copyright (c) 2015 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.volumecontroller.impl;
import java.net.URI;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import com.emc.storageos.coordinator.client.service.CoordinatorClient;
import com.emc.storageos.coordinator.client.service.DrPostFailoverHandler;
import com.emc.storageos.coordinator.common.impl.ZkPath;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.db.client.constraint.ContainmentConstraint;
import com.emc.storageos.db.client.constraint.URIQueryResultList;
import com.emc.storageos.db.client.model.StorageSystem;
import com.emc.storageos.db.client.model.Task;
import com.emc.storageos.db.client.model.Workflow;
import com.emc.storageos.db.client.model.WorkflowStep;
import com.emc.storageos.exceptions.DeviceControllerException;
import com.emc.storageos.volumecontroller.AsyncTask;
import com.emc.storageos.volumecontroller.impl.ControllerServiceImpl.Lock;
import com.emc.storageos.workflow.Workflow.StepState;
import com.emc.storageos.workflow.WorkflowException;
import com.emc.storageos.workflow.WorkflowService;
/**
* Post cleanup after failing over to a standby site. For unplanned disaster on active site,
* some db/zk data may not be replicated to standby site and we may lose some data after failover. So we
* need do the following before we announce standby is ready
*
* 1) Release persistent lock
* 2) Remove all pending tasks, workflows
* 3) Set all in-progress workflow steps/workflow as error
* 4) Trigger device rediscovery
*
*/
public class ControllerWorkflowCleanupHandler extends DrPostFailoverHandler {
private static final Logger log = LoggerFactory.getLogger(ControllerWorkflowCleanupHandler.class);
@Autowired
private DbClient dbClient;
public ControllerWorkflowCleanupHandler() {
}
@Override
protected void execute() {
checkPersistentLocks();
cleanupWorkflow();
cleanupTasks();
rediscoverDevices();
}
private void checkPersistentLocks() {
CoordinatorClient coordinator = getCoordinator();
coordinator.deletePath(ZkPath.PERSISTENTLOCK.toString());
log.info("Released all persistent locks");
}
private void cleanupWorkflow() {
log.info("Start workflow cleanup");
List<URI> workflowIds = dbClient.queryByType(Workflow.class, true);
Iterator<Workflow> workflows = dbClient.queryIterativeObjects(Workflow.class, workflowIds);
int cnt = 0;
while(workflows.hasNext()) {
Workflow workflow = workflows.next();
if (!workflow.getCompleted()) {
completeWorkflow(workflow.getId());
cnt ++;
}
}
log.info("Total {} workflows processed", cnt);
}
private void completeWorkflow(URI workflowId) {
URIQueryResultList stepURIs = new URIQueryResultList();
dbClient.queryByConstraint(ContainmentConstraint.Factory.getWorkflowWorkflowStepConstraint(workflowId), stepURIs);
for (URI stepURI : stepURIs) {
WorkflowStep step = dbClient.queryObject(WorkflowStep.class, stepURI);
String state = step.getState();
List<String> activeStepStates = Arrays.asList(StepState.CREATED.toString(), StepState.BLOCKED.toString(), StepState.QUEUED.toString(), StepState.EXECUTING.toString());
if (activeStepStates.contains(state)) {
WorkflowException ex = WorkflowException.exceptions.workflowTerminatedForFailover(workflowId.toString());
log.info("Terminate workflow step {}", step.getId());
WorkflowService.completerStepErrorWithoutRollback(step.getStepId(), ex);
}
}
}
private void cleanupTasks() {
log.info("Start task cleanup");
List<URI> taskIds = dbClient.queryByType(Task.class, true);
Iterator<Task> tasks = dbClient.queryIterativeObjects(Task.class, taskIds);
int cnt = 0;
while(tasks.hasNext()) {
Task task = tasks.next();
if (task.isPending() || task.isQueued()) {
DeviceControllerException ex = DeviceControllerException.exceptions.terminatedForControllerFailover();
task.setServiceCode(ex.getServiceCode().getCode());
task.setStatus(String.valueOf(Task.Status.error));
task.setMessage(ex.getMessage());
task.setProgress(100);
task.setEndTime(Calendar.getInstance());
log.info("Terminate task {}", task.getId());
dbClient.updateObject(task);
cnt ++;
}
}
log.info("Total {} tasks processed", cnt);
}
private void rediscoverDevices(){
List<URI> storageSystemIds = dbClient.queryByType(StorageSystem.class, true);
Iterator<StorageSystem> storageSystems = dbClient.queryIterativeObjects(StorageSystem.class, storageSystemIds);
String taskId = UUID.randomUUID().toString();
while(storageSystems.hasNext()) {
StorageSystem storageSystem = storageSystems.next();
URI storageSystemId = storageSystem.getId();
try {
log.info("Start discovery {}", storageSystemId);
ControllerServiceImpl.scheduleDiscoverJobs(
new AsyncTask[] { new AsyncTask(StorageSystem.class, storageSystemId, taskId) },
Lock.DISCOVER_COLLECTION_LOCK, ControllerServiceImpl.DISCOVERY);
} catch (Exception ex) {
log.error("Failed to start discovery : " + storageSystem.getId(), ex);
}
}
}
public DbClient getDbClient() {
return dbClient;
}
public void setDbClient(DbClient dbClient) {
this.dbClient = dbClient;
}
}