/*
* Copyright (c) 2012 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.volumecontroller.impl.smis.job;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.db.client.model.Operation;
import com.emc.storageos.db.client.model.StorageSystem;
import com.emc.storageos.exceptions.DeviceControllerErrors;
import com.emc.storageos.svcs.errorhandling.model.ServiceError;
import com.emc.storageos.volumecontroller.Job;
import com.emc.storageos.volumecontroller.JobContext;
import com.emc.storageos.volumecontroller.TaskCompleter;
import com.emc.storageos.volumecontroller.impl.JobPollResult;
import com.emc.storageos.volumecontroller.impl.smis.CIMConnectionFactory;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.cim.CIMArgument;
import javax.cim.CIMInstance;
import javax.cim.CIMObjectPath;
import javax.cim.CIMProperty;
import javax.cim.UnsignedInteger16;
import javax.wbem.WBEMException;
import javax.wbem.client.WBEMClient;
import java.io.Serializable;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import static com.google.common.base.Strings.isNullOrEmpty;
/**
* An SMI-S job
*/
public class SmisJob extends Job implements Serializable
{
private static final Logger _logger = LoggerFactory.getLogger(SmisJob.class);
private static final String CIM_OBJECT_NAME = "cimobject";
private static final String STORAGE_SYSTEM_URI_NAME = "storagesystemuri";
private static final String TASK_COMPLETER_NAME = "taskcompleter";
private static final String JOB_NAME_NAME = "jobname";
private static final String ERROR_TRACKING_START_TIME = "errortrackingstarttime"; // in milli seconds
private static final String POST_PROCESSING_ERROR_TRACKING_START_TIME = "postprocessingerrortrackingstarttime"; // in milli seconds
private static final String JOB_PROPERTY_KEY_PERCENT_COMPLETE = "PercentComplete";
private static final String JOB_PROPERTY_KEY_OPERATIONAL_STS = "OperationalStatus";
private static final String JOB_PROPERTY_KEY_ERROR_DESC = "ErrorDescription";
private static final String JOB_PROPERTY_KEY_JOB_STATUS = "JobStatus";
private static final long ERROR_TRACKING_LIMIT = 2 * 60 * 60 * 1000; // tracking limit for transient errors. set for 2 hours
private static final long POST_PROCESSING_ERROR_TRACKING_LIMIT = 20 * 60 * 1000; // tracking limit for transient errors in post
// processing, 20 minutes
// Place holder for error messages and their conversions
private static final String ERROR_DESC_UNLINK_OP = "The unlink cannot be completed in this state unless the Symmetrix force flag is used";
private static final String ERROR_DESC_UNLINK_OP_NEW = "***The copy/define is in progress and unlink is not allowed at this time***";
protected JobPollResult _pollResult = new JobPollResult();
private String _id;
protected String _errorDescription = null;
protected JobStatus _status = JobStatus.IN_PROGRESS;
// status of jog.updateStatus() execution
protected JobStatus _postProcessingStatus = JobStatus.SUCCESS;
private int _percentComplete = 0;
protected Map<String, Object> _map = new HashMap<String, Object>();
private String _smisIPAddress = null;
public SmisJob(CIMObjectPath cimJob, URI storageSystem, TaskCompleter taskCompleter, String jobName) {
_map.put(CIM_OBJECT_NAME, cimJob);
_map.put(STORAGE_SYSTEM_URI_NAME, storageSystem);
_map.put(TASK_COMPLETER_NAME, taskCompleter);
_map.put(JOB_NAME_NAME, jobName);
_map.put(POST_PROCESSING_ERROR_TRACKING_START_TIME, 0L);
_map.put(ERROR_TRACKING_START_TIME, 0L);
}
public void setCimJob(CIMObjectPath cimJob) {
_map.put(CIM_OBJECT_NAME, cimJob);
}
public CIMObjectPath getCimJob() {
return (CIMObjectPath) _map.get(CIM_OBJECT_NAME);
}
public URI getStorageSystemURI() {
return (URI) _map.get(STORAGE_SYSTEM_URI_NAME);
}
public TaskCompleter getTaskCompleter() {
return (TaskCompleter) _map.get(TASK_COMPLETER_NAME);
}
public String getJobName() {
return (String) _map.get(JOB_NAME_NAME);
}
private long getErrorTrackingStartTime() {
return (Long) _map.get(ERROR_TRACKING_START_TIME);
}
public void setErrorTrackingStartTime(long trackingStartTime) {
_map.put(ERROR_TRACKING_START_TIME, trackingStartTime);
}
private long getPostProcessingErrorTrackingStartTime() {
return (Long) _map.get(POST_PROCESSING_ERROR_TRACKING_START_TIME);
}
public void setPostProcessingErrorTrackingStartTime(long trackingStartTime) {
_map.put(POST_PROCESSING_ERROR_TRACKING_START_TIME, trackingStartTime);
}
/**
* Sets the status for the job to the error status and updates the
* error description with the passed description.
*
* @param errorDescription A description of the error.
*/
public void setPostProcessingErrorStatus(String errorDescription) {
_postProcessingStatus = JobStatus.ERROR;
_errorDescription = errorDescription;
}
/**
* Sets the status for the job to the fatal_error status and updates the
* error description with the passed description.
*
* @param errorDescription A description of the error.
*/
public void setFatalErrorStatus(String errorDescription) {
_status = JobStatus.FATAL_ERROR;
_errorDescription = errorDescription;
}
/**
* Sets the status for the job to the failed status and updates the
* error description with the passed description.
*
* @param errorDescription A description of the error.
*/
public void setFailedStatus(String errorDescription) {
_status = JobStatus.FAILED;
_errorDescription = errorDescription;
}
/**
* Sets post processing status of the job to the failed status and updates the
* error description with the passed description.
*
* @param errorDescription A description of the error.
*/
public void setPostProcessingFailedStatus(String errorDescription) {
_postProcessingStatus = JobStatus.FAILED;
_errorDescription = errorDescription;
}
public JobPollResult poll(JobContext jobContext, long trackingPeriodInMillis) {
CIMProperty<Object> instanceID = null;
try {
CIMObjectPath cimJob = getCimJob();
instanceID = (CIMProperty<Object>) cimJob.getKey("InstanceID");
_pollResult.setJobName(getJobName());
_pollResult.setJobId(instanceID.getValue().toString());
_pollResult.setJobPercentComplete(_percentComplete);
if (_smisIPAddress == null) {
StorageSystem storageSystem = jobContext.getDbClient().queryObject(StorageSystem.class, getStorageSystemURI());
_smisIPAddress = storageSystem.getSmisProviderIP();
}
// poll only if job is not in terminal status
if (_status == JobStatus.IN_PROGRESS || _status == JobStatus.ERROR) {
String[] jobPathPropertyKeys =
{ JOB_PROPERTY_KEY_PERCENT_COMPLETE, JOB_PROPERTY_KEY_OPERATIONAL_STS, JOB_PROPERTY_KEY_ERROR_DESC };
CIMInstance jobPathInstance = null;
_logger.info("SmisJob: Looking up job: id {}, provider: {} ", instanceID.getValue(), _smisIPAddress);
WBEMClient wbemClient = getWBEMClient(jobContext.getDbClient(), jobContext.getCimConnectionFactory());
if (wbemClient == null) {
String errorMessage = "No CIMOM client found for provider ip: " + _smisIPAddress;
processTransientError(instanceID.getValue().toString(), trackingPeriodInMillis, errorMessage, null);
} else {
jobPathInstance = wbemClient.getInstance(getCimJob(), false, false, jobPathPropertyKeys);
CIMProperty<UnsignedInteger16> percentComplete =
(CIMProperty<UnsignedInteger16>) jobPathInstance.getProperty(JOB_PROPERTY_KEY_PERCENT_COMPLETE);
_pollResult.setJobPercentComplete(percentComplete.getValue().intValue());
_percentComplete = _pollResult.getJobPercentComplete();
// reset transient error tracking time
setErrorTrackingStartTime(0L);
if (_pollResult.getJobPercentComplete() == 100) {
CIMProperty<UnsignedInteger16[]> operationalStatus =
(CIMProperty<UnsignedInteger16[]>) jobPathInstance.getProperty(JOB_PROPERTY_KEY_OPERATIONAL_STS);
UnsignedInteger16[] statusValues = operationalStatus.getValue();
if (statusValues != null) {
for (int j = 0; j < statusValues.length; j++) {
_logger.info("Status value[{}]: {}", j, statusValues[j].intValue());
if (statusValues[j].intValue() == 2) {
_status = JobStatus.SUCCESS;
_logger.info("SmisJob: {} succeeded", instanceID.getValue());
}
if (statusValues[j].intValue() == 6) {
_status = JobStatus.FAILED;
_logger.info("SmisJob: {} returned exception", instanceID.getValue());
}
}
}
if ((_status != JobStatus.SUCCESS) && (_status != JobStatus.IN_PROGRESS)) {
// parse ErrorDescription
_errorDescription = getErrorDescription(jobPathInstance);
_status = JobStatus.FAILED;
_logger.error("SmisJob: {} failed; Details: {}", getJobName(), _errorDescription);
logErrorsFromJob(wbemClient);
}
} else {
// reset status from previous possible transient error status
_status = JobStatus.IN_PROGRESS;
}
}
}
} catch (WBEMException we) {
if ((we.getID() == WBEMException.CIM_ERR_NOT_FOUND) || (we.getID() == WBEMException.CIM_ERR_FAILED)) {
_status = JobStatus.FAILED;
_errorDescription = we.getMessage();
if (we.getID() == WBEMException.CIM_ERR_NOT_FOUND) {
_logger.error(String.format(
"SMI-S job not found. Marking as failed as we cannot determine status. " +
"User may retry the operation to be sure: Name: %s, ID: %s, Desc: %s",
getJobName(), instanceID.getValue().toString(), _errorDescription), we);
} else { // CIM_ERR_FAILED
_logger.error(String.format(
"Job failed but GetErrors() did not report the actual error. " +
"User may retry the operation to be sure: Name: %s, ID: %s, Desc: %s",
getJobName(), instanceID.getValue().toString(), _errorDescription), we);
}
} else {
processTransientError(instanceID.getValue().toString(), trackingPeriodInMillis, we.getMessage(), we);
}
} catch (Exception e) {
processTransientError(instanceID.getValue().toString(), trackingPeriodInMillis, e.getMessage(), e);
} finally {
try {
_logger.info("SmisJob: Post processing job: id {}, provider: {} ", instanceID.getValue(), _smisIPAddress);
// reset from previous possible transient error in post processing status.
_postProcessingStatus = JobStatus.SUCCESS;
updateStatus(jobContext);
if (_postProcessingStatus == JobStatus.ERROR) {
processPostProcessingError(instanceID.getValue().toString(), trackingPeriodInMillis, _errorDescription, null);
}
} catch (WBEMException we) {
if (we.getID() == WBEMException.CIM_ERR_NOT_FOUND) {
_postProcessingStatus = JobStatus.FAILED;
_errorDescription = we.getMessage();
_logger.error(String.format("SMI-S job not found. Marking as failed as we cannot determine status. " +
"User may retry the operation to be sure: Name: %s, ID: %s, Desc: %s",
getJobName(), instanceID.getValue().toString(), _errorDescription), we);
} else {
processPostProcessingError(instanceID.getValue().toString(), trackingPeriodInMillis, we.getMessage(), we);
}
} catch (Exception e) {
setFatalErrorStatus(e.getMessage());
setPostProcessingFailedStatus(e.getMessage());
_logger.error("Problem while trying to update status", e);
} finally {
if (isJobInTerminalFailedState()) {
// Have to process job completion since updateStatus may not did this.
ServiceError error = DeviceControllerErrors.smis.jobFailed(_errorDescription);
getTaskCompleter().error(jobContext.getDbClient(), error);
}
}
}
_pollResult.setJobStatus(_status);
_pollResult.setJobPostProcessingStatus(_postProcessingStatus);
_pollResult.setErrorDescription(_errorDescription);
return _pollResult;
}
public WBEMClient getWBEMClient(DbClient dbClient, CIMConnectionFactory cimConnectionFactory) {
StorageSystem system = null;
WBEMClient client = null;
try {
system = dbClient.queryObject(StorageSystem.class, getStorageSystemURI());
} catch (Exception e) {
_logger.error("Error while reading storage system:", e);
}
client = cimConnectionFactory.getConnection(system.getSmisProviderIP(), system.getSmisPortNumber().toString())
.getCimClient();
return client;
}
public void updateStatus(JobContext jobContext) throws Exception {
if (isJobInTerminalSuccessState()) {
getTaskCompleter().ready(jobContext.getDbClient());
} else if (isJobInTerminalFailedState()) {
ServiceError error = DeviceControllerErrors.smis.jobFailed(_errorDescription);
getTaskCompleter().error(jobContext.getDbClient(), error);
}
// else {
// do nothing
// }
}
public String getJobID() {
return _id;
}
public int getJobPercentComplete() {
if (_pollResult == null) {
return 0;
}
return _pollResult.getJobPercentComplete();
}
public boolean isSuccess() {
return (_status == JobStatus.SUCCESS);
}
public JobStatus getJobStatus() {
return _status;
}
public JobStatus getJobPostProcessingStatus() {
return _postProcessingStatus;
}
protected Operation.Status getOpStatus() {
switch (_status) {
case SUCCESS:
return Operation.Status.ready;
case FAILED:
return Operation.Status.error;
default:
return Operation.Status.pending;
}
}
protected String getMessage() {
switch (_status) {
case SUCCESS:
return "Job succeeded";
case FAILED:
return "Job failed with error:" + _errorDescription;
case ERROR:
return "Transient error checking job status - internal error:" + _errorDescription;
case IN_PROGRESS:
return "Job in progress: " + getJobPercentComplete() + "% complete...";
default:
return "Undefined status";
}
}
protected void processTransientError(String jobId, long trackingInterval, String errorMessage, Exception ex) {
_status = JobStatus.ERROR;
_errorDescription = errorMessage;
if (ex != null) {
_logger.error(String.format("Error while processing SmisJob - Name: %s, ID: %s, Desc: %s Status: %s",
getJobName(), jobId, _errorDescription, _status), ex);
} else {
_logger.error(String.format("Error while processing SmisJob - Name: %s, ID: %s, Desc: %s Status: %s",
getJobName(), jobId, _errorDescription, _status));
}
// Check if job tracking limit was reached. Set status to FATAL_ERROR in such a case.
if (getErrorTrackingStartTime() == 0) {
setErrorTrackingStartTime(System.currentTimeMillis());
}
long errorTrackingTime = System.currentTimeMillis() - getErrorTrackingStartTime();
_logger.info(String.format("Tracking time of SmisJob in transient error status - %s, Name: %s, ID: %s. Status %s .",
errorTrackingTime, getJobName(), jobId, _status));
if (errorTrackingTime > ERROR_TRACKING_LIMIT) {
_status = JobStatus.FATAL_ERROR;
_logger.error(String.format("Reached tracking time limit for SmisJob - Name: %s, ID: %s. Set status to %s .",
getJobName(), jobId, _status));
}
}
protected void processPostProcessingError(String jobId, long trackingInterval, String errorMessage, Exception ex) {
_postProcessingStatus = JobStatus.ERROR;
_errorDescription = errorMessage;
if (ex != null) {
_logger.error(String.format("Error while post processing SmisJob - Name: %s, ID: %s, Desc: %s Status: %s",
getJobName(), jobId, _errorDescription, _postProcessingStatus), ex);
} else {
_logger.error(String.format("Error while processing SmisJob - Name: %s, ID: %s, Desc: %s Status: %s",
getJobName(), jobId, _errorDescription, _postProcessingStatus));
}
// Check if job post processing tracking limit was reached. Set post processing status to FAILED in such a case.
if (getPostProcessingErrorTrackingStartTime() == 0) {
setPostProcessingErrorTrackingStartTime(System.currentTimeMillis());
}
long postProcessingErrorTrackingTime = System.currentTimeMillis() - getPostProcessingErrorTrackingStartTime();
_logger.info(String.format(
"Tracking time of SmisJob in post processing error - %s, Name: %s, ID: %s. Status: %s, PostProcessing status: %s .",
postProcessingErrorTrackingTime, getJobName(), jobId, _status, _postProcessingStatus));
if (postProcessingErrorTrackingTime > POST_PROCESSING_ERROR_TRACKING_LIMIT) {
_postProcessingStatus = JobStatus.FAILED;
_logger.error(String.format(
"Reached tracking time limit for SmisJob post processing - Name: %s, ID: %s. Set post processing status to %s .",
getJobName(), jobId, _postProcessingStatus));
}
}
public boolean isJobInTerminalState() {
return (getJobStatus() == Job.JobStatus.SUCCESS ||
getJobStatus() == Job.JobStatus.FAILED || getJobStatus() == Job.JobStatus.FATAL_ERROR) &&
(getJobPostProcessingStatus() == Job.JobStatus.SUCCESS ||
getJobPostProcessingStatus() == Job.JobStatus.FAILED || getJobPostProcessingStatus() == Job.JobStatus.FATAL_ERROR);
}
public boolean isJobInTerminalFailedState() {
return (isJobInTerminalState() && (getJobStatus() != Job.JobStatus.SUCCESS || getJobPostProcessingStatus() != Job.JobStatus.SUCCESS));
}
public boolean isJobInTerminalSuccessState() {
return (getJobStatus() == Job.JobStatus.SUCCESS && getJobPostProcessingStatus() == Job.JobStatus.SUCCESS);
}
private void logErrorsFromJob(WBEMClient wbemClient) throws WBEMException {
try {
CIMArgument[] pOutputArguments = new CIMArgument[1];
wbemClient.invokeMethod(getCimJob(), "GetErrors", null, pOutputArguments);
_logger.error("GetErrors() for job {} response :{}", getCimJob(), pOutputArguments);
} catch (Exception e) {
_logger.error("GetErrors() for job {} failed.", getCimJob(), e);
}
}
/**
* Attempt to get a non-empty error description from the CIM Job.
*
* @param jobPathInstance The SMI-S job.
* @return Non-empty string containing the error description, or a default.
*/
private String getErrorDescription(CIMInstance jobPathInstance) {
String[] errorKeys = new String[] {JOB_PROPERTY_KEY_ERROR_DESC, JOB_PROPERTY_KEY_JOB_STATUS};
for (String errorKey : errorKeys) {
CIMProperty<String> errorProperty = (CIMProperty<String>) jobPathInstance.getProperty(errorKey);
if (errorProperty != null && !isNullOrEmpty(errorProperty.getValue())) {
String msg = errorProperty.getValue();
_logger.info("Job {} has error message: {}", jobPathInstance.getObjectPath(), msg);
return getProperErrorDescription(msg);
}
}
return String.format("No error description available for job %s.", jobPathInstance.getObjectPath());
}
/**
* Attempt to get a better non-empty error description from the CIM Job where ever possible.
*
* @param errorMessage The current SMI-S error message .
* @return Non-empty string containing better error message if available or the default.
*/
private String getProperErrorDescription(String errorMessage) {
if (StringUtils.containsIgnoreCase(errorMessage, ERROR_DESC_UNLINK_OP)) {
return ERROR_DESC_UNLINK_OP_NEW;
}
_logger.info("No meaningful conversion available for the current error message:{}", errorMessage);
return errorMessage;
}
}