/*
* Copyright (c) 2012 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.volumecontroller.impl.plugins.discovery.smis;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.recipes.locks.InterProcessLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import com.emc.storageos.coordinator.client.service.CoordinatorClient;
import com.emc.storageos.coordinator.client.service.DistributedQueueItemProcessedCallback;
import com.emc.storageos.coordinator.client.service.impl.DistributedQueueConsumer;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.db.client.model.DiscoveredDataObject;
import com.emc.storageos.db.client.model.StorageProvider;
import com.emc.storageos.db.client.model.StorageSystem;
import com.emc.storageos.db.client.model.StorageSystem.Discovery_Namespaces;
import com.emc.storageos.db.client.util.CustomQueryUtility;
import com.emc.storageos.db.exceptions.DatabaseException;
import com.emc.storageos.exceptions.DeviceControllerErrors;
import com.emc.storageos.exceptions.DeviceControllerException;
import com.emc.storageos.networkcontroller.impl.NetworkDeviceController;
import com.emc.storageos.plugins.AccessProfile;
import com.emc.storageos.plugins.BaseCollectionException;
import com.emc.storageos.plugins.StorageSystemViewObject;
import com.emc.storageos.plugins.common.Constants;
import com.emc.storageos.svcs.errorhandling.model.ServiceError;
import com.emc.storageos.svcs.errorhandling.resources.InternalException;
import com.emc.storageos.volumecontroller.ControllerLockingService;
import com.emc.storageos.volumecontroller.impl.ControllerServiceImpl;
import com.emc.storageos.volumecontroller.impl.plugins.discovery.smis.DataCollectionJob.JobOrigin;
import com.emc.storageos.volumecontroller.impl.plugins.discovery.smis.DataCollectionJobScheduler.JobIntervals;
import com.emc.storageos.volumecontroller.impl.smis.CIMConnectionFactory;
/**
* Consumer for Discovery Jobs added to Queue.
* This acts as core Class, which has multiple responsibilities
* 1. Scheduled Loading Provider Device from DB every X minutes (Scanning)
* 1a. Schedule Loading PhsyicalStorageSystems every x minutes. (Discovery)
* 2. Consume Discovery Jobs
* 3. Submit the Discovery Jobs to ExecutorService
*
*/
public class DataCollectionJobConsumer extends
DistributedQueueConsumer<DataCollectionJob> implements ApplicationContextAware {
private static final Logger _logger = LoggerFactory
.getLogger(DataCollectionJobConsumer.class);
private Map<String, String> _configInfo;
private DataCollectionJobUtil _util;
private DbClient _dbClient;
private NetworkDeviceController _networkDeviceController;
private CoordinatorClient _coordinator;
private CIMConnectionFactory _connectionFactory;
private DataCollectionJobScheduler _jobScheduler;
private ControllerLockingService _locker;
/**
* 1. Create AccessProfile for Discovery Job
* 2. Invoke Discovery Runnable, which in turn instructs plugins to get data from DataSources.
*/
@Override
public void consumeItem(
final DataCollectionJob job, final DistributedQueueItemProcessedCallback callback)
throws Exception {
try {
// By the time we get to Discovery/Metering someone could have removed the storage system from Vipr.
// Check that the job is still "active".
if (!job.isActiveJob(_dbClient)) {
return;
}
if (job instanceof DataCollectionScanJob) {
triggerScanning((DataCollectionScanJob) job);
} else {
invokeJob(job);
}
} catch (InternalException e) {
_logger.error(job.getType() + " job failed for {}---> ", job.systemString(), e);
try {
job.error(_dbClient, e);
} catch (Exception ex) {
_logger.error("Failed to record status error for system : {}. Caused by: ", job.systemString(), ex);
}
} catch (Exception e) {
_logger.error(job.getType() + " job failed for {}---> ", job.systemString(), e);
try {
ServiceError serviceError = DeviceControllerException.errors.jobFailed(e);
job.error(_dbClient, serviceError);
} catch (Exception ex) {
_logger.error("Failed to record status error for system: {}. Caused by: ", job.systemString(), ex);
}
} finally {
try {
callback.itemProcessed();
} catch (Exception e) {
_logger.warn("Queue Item removal failed :"
+ job.systemString());
}
}
}
public void invokeJob(final DataCollectionJob job) throws Exception {
if (job instanceof DataCollectionScanJob) {
throw new DeviceControllerException("Invoked wrong job type : " + job.getType());
}
DataCollectionTaskCompleter completer = job.getCompleter();
// set the next run time based on the time this discovery job is started (not the time it's queued)
completer.setNextRunTime(_dbClient,
System.currentTimeMillis() + JobIntervals.get(job.getType()).getInterval() * 1000);
completer.updateObjectState(_dbClient, DiscoveredDataObject.DataCollectionJobStatus.IN_PROGRESS);
// get the node that this discovery is being run on so it is displayed in the UI
String jobType = job.getType();
String nodeId = _coordinator.getInetAddessLookupMap().getNodeId();
job.updateTask(_dbClient, "Started " + jobType + " on node " + nodeId);
/**
* TODO ISILON or VNXFILE
* AccessProfile needs to get created, for each device Type.
* Hence for isilon or vnxFile discovery, add logic in getAccessProfile
* to set the required parameters for Discovery.
*/
AccessProfile profile = _util.getAccessProfile(completer.getType(),
completer.getId(),
jobType, job.getNamespace());
profile.setProps(new HashMap<String, String>(_configInfo));
if (job instanceof DataCollectionArrayAffinityJob) {
List<URI> hostIds = ((DataCollectionArrayAffinityJob) job).getHostIds();
if (hostIds != null && !hostIds.isEmpty()) {
profile.getProps().put(Constants.HOST_IDS, StringUtils.join(hostIds, Constants.ID_DELIMITER));
}
List<URI> systemIds = ((DataCollectionArrayAffinityJob) job).getSystemIds();
if (systemIds != null && !systemIds.isEmpty()) {
profile.getProps().put(Constants.SYSTEM_IDS, StringUtils.join(systemIds, Constants.ID_DELIMITER));
Iterator<StorageSystem> storageSystems = _dbClient.queryIterativeObjects(StorageSystem.class, systemIds);
List<String> systemSerialIds = new ArrayList<String>();
while (storageSystems.hasNext()) {
StorageSystem systemObj = storageSystems.next();
systemSerialIds.add(systemObj.getSerialNumber());
}
if (!systemSerialIds.isEmpty()) {
profile.getProps().put(Constants.SYSTEM_SERIAL_IDS, StringUtils.join(systemSerialIds, Constants.ID_DELIMITER));
}
}
}
profile.setCimConnectionFactory(_connectionFactory);
profile.setCurrentSampleTime(System.currentTimeMillis());
DataCollectionJobInvoker invoker = new DataCollectionJobInvoker(
profile, _configInfo, _dbClient, _coordinator, _networkDeviceController, _locker, job.getNamespace(), completer);
invoker.process(applicationContext);
job.ready(_dbClient);
}
/**
* Once scanning is done for all Providers, we have the list of Discovered Arrays.
* Check whether the discovered Array is new, using lastRunTime, if yes, start Discovery
* If refreshAll flag set, then check whether the system's state is inProgress, if yes, skip
* else, check whether discovery had run recently, if yes skip, else trigger discovery
*
* @param storageSystemsCache
*/
public void triggerDiscoveryNew(
Map<String, StorageSystemViewObject> storageSystemsCache, JobOrigin origin) throws Exception {
Set<String> sysNativeGuidSet = storageSystemsCache.keySet();
ArrayList<DataCollectionJob> jobs = new ArrayList<DataCollectionJob>();
for (String sysNativeGuid : sysNativeGuidSet) {
StorageSystem system = null;
try {
List<StorageSystem> systems = CustomQueryUtility.getActiveStorageSystemByNativeGuid(_dbClient, sysNativeGuid);
if (systems.isEmpty()) {
continue;
}
system = systems.get(0);
if (0 == system.getLastDiscoveryRunTime()) {
_logger.info("Triggering discovery of new storage system {}",
sysNativeGuid);
String taskId = UUID.randomUUID().toString();
DiscoverTaskCompleter completer = new DiscoverTaskCompleter(system.getClass(), system.getId(), taskId,
ControllerServiceImpl.DISCOVERY);
jobs.add(new DataCollectionDiscoverJob(completer, origin, Discovery_Namespaces.ALL.toString()));
}
} catch (Exception e) {
_logger.error("Triggering Manual Array Discovery Failed {}:", system, e);
}
}
_jobScheduler.scheduleMultipleJobs(jobs, ControllerServiceImpl.Lock.DISCOVER_COLLECTION_LOCK);
}
/**
* 1. refreshConnections - needs to get called on each Controller, before acquiring lock.
* 2. Try to acquire lock, if found
* 3. Acquiring lock is not made as a Blocking Call, hence Controllers will return immediately,
* if lock not found
* 3. If lock found, spawn a new thread to do triggerScanning.
* 4. Release lock immediately.
*/
private void triggerScanning(DataCollectionScanJob job) throws Exception {
_logger.info("Started scanning Providers : triggerScanning()");
List<URI> providerList = job.getProviders();
String providerType = null;
if (!providerList.isEmpty()) {
providerType = _dbClient.queryObject(StorageProvider.class, providerList.iterator().next()).getInterfaceType();
}
_jobScheduler.refreshProviderConnections(providerType);
List<URI> allProviderURI = _dbClient.queryByType(StorageProvider.class, true);
List<StorageProvider> allProvidersAllTypes = _dbClient.queryObject(StorageProvider.class, allProviderURI);
List<StorageProvider> allProviders = new ArrayList<StorageProvider>();
// since dbQuery does not return a normal list required by bookkeeping, we need to rebuild it.
allProviderURI = new ArrayList<URI>();
for (StorageProvider provider : allProvidersAllTypes) {
if (providerType == null || providerType.equals(provider.getInterfaceType())) {
allProviderURI.add(provider.getId());
allProviders.add(provider);
}
}
Map<String, StorageSystemViewObject> storageSystemsCache = Collections
.synchronizedMap(new HashMap<String, StorageSystemViewObject>());
boolean exceptionIntercepted = false;
/**
*
* Run "Scheduled" Scanner Jobs of all Providers in only one Controller.
* means our Cache is populated with the latest
* physicalStorageSystems ID got from this scheduled Scan Job.
* Compare the list against the ones in DB, and decide the physicalStorageSystem's
* state REACHABLE
*/
String lockKey = ControllerServiceImpl.Lock.SCAN_COLLECTION_LOCK.toString();
if (providerType != null) {
lockKey += providerType;
}
InterProcessLock scanLock = _coordinator.getLock(lockKey);
if (scanLock.acquire(ControllerServiceImpl.Lock.SCAN_COLLECTION_LOCK.getRecommendedTimeout(), TimeUnit.SECONDS)) {
_logger.info("Acquired a lock {} to run scanning Job", ControllerServiceImpl.Lock.SCAN_COLLECTION_LOCK.toString()+providerType);
List<URI> cacheProviders = new ArrayList<URI>();
Map<URI, Exception> cacheErrorProviders = new HashMap<URI, Exception>();
try {
boolean scanIsNeeded = false;
boolean hasProviders = false;
// First find out if scan is needed. If it needed for a single system , it is needed for all
for (StorageProvider provider : allProviders) {
if (provider.connected() || provider.initializing()) {
hasProviders = true;
if (_jobScheduler.isProviderScanJobSchedulingNeeded(provider, ControllerServiceImpl.SCANNER, job.isSchedulerJob())) {
scanIsNeeded = true;
break;
}
}
}
if (!scanIsNeeded) {
for (StorageProvider provider : allProviders) {
ScanTaskCompleter scanCompleter = job.findProviderTaskCompleter(provider.getId());
if (scanCompleter == null) {
continue;
}
if (provider.connected() || provider.initializing()) {
scanCompleter.ready(_dbClient);
}
else {
String errMsg = "Failed to establish connection to the storage provider";
scanCompleter.error(_dbClient, DeviceControllerErrors.smis.unableToCallStorageProvider(errMsg));
provider.setLastScanStatusMessage(errMsg);
_dbClient.updateObject(provider);
}
}
if (!hasProviders) {
_util.performBookKeeping(storageSystemsCache, allProviderURI);
}
_logger.info("Scan is not needed");
}
else {
// If scan is needed for a single system,
// it must be performed for all available providers in the database at the same time.
// update each provider that is reachable to scan in progress
List<StorageProvider> connectedProviders = new ArrayList<StorageProvider>();
for (StorageProvider provider : allProviders) {
if (provider.connected() || provider.initializing()) {
ScanTaskCompleter scanCompleter = job.findProviderTaskCompleter(provider.getId());
if (scanCompleter == null) {
String taskId = UUID.randomUUID().toString();
scanCompleter = new ScanTaskCompleter(StorageProvider.class, provider.getId(), taskId);
job.addCompleter(scanCompleter);
}
scanCompleter.createDefaultOperation(_dbClient);
scanCompleter.updateObjectState(_dbClient, DiscoveredDataObject.DataCollectionJobStatus.IN_PROGRESS);
scanCompleter.setNextRunTime(_dbClient, System.currentTimeMillis()
+ DataCollectionJobScheduler.JobIntervals.get(ControllerServiceImpl.SCANNER).getInterval() * 1000);
provider.setLastScanStatusMessage("");
_dbClient.updateObject(provider);
connectedProviders.add(provider);
} else {
if (null != provider.getStorageSystems() &&
!provider.getStorageSystems().isEmpty()) {
provider.getStorageSystems().clear();
}
if (providerList.contains(provider.getId())) {
String errMsg = "Failed to establish connection to the storage provider";
provider.setLastScanStatusMessage(errMsg);
job.findProviderTaskCompleter(provider.getId()).
error(_dbClient, DeviceControllerErrors.smis.unableToCallStorageProvider(errMsg));
}
_dbClient.updateObject(provider);
}
}
// now scan each connected provider
for (StorageProvider provider : connectedProviders) {
try {
_logger.info("provider.getInterfaceType():{}", provider.getInterfaceType());
ScanTaskCompleter scanCompleter = job.findProviderTaskCompleter(provider.getId());
performScan(provider.getId(), scanCompleter, storageSystemsCache);
cacheProviders.add(provider.getId());
} catch (Exception ex) {
_logger.error("Scan failed for {}--->", provider.getId(), ex);
cacheErrorProviders.put(provider.getId(), ex);
}
}
// Perform BooKKeeping
// TODO: we need to access the status of job completer.
// for now we assume that this operation can not fail.
_util.performBookKeeping(storageSystemsCache, allProviderURI);
}
} catch (final Exception ex) {
_logger.error("Scan failed for {} ", ex.getMessage());
exceptionIntercepted = true;
for (URI provider : cacheProviders) {
job.findProviderTaskCompleter(provider).error(_dbClient, DeviceControllerErrors.dataCollectionErrors.scanFailed(ex.getLocalizedMessage(), ex));
_logger.error("Scan failed for {}--->", provider, ex);
}
throw ex;
} finally {
if (!exceptionIntercepted) {
for (URI provider : cacheProviders) {
job.findProviderTaskCompleter(provider).ready(_dbClient);
_logger.info("Scan complete successfully for " + provider);
}
}
for (Entry<URI, Exception> entry : cacheErrorProviders.entrySet()) {
URI provider = entry.getKey();
Exception ex = entry.getValue();
job.findProviderTaskCompleter(provider).error(_dbClient, DeviceControllerErrors.dataCollectionErrors.scanFailed(ex.getLocalizedMessage(), ex));
}
scanLock.release();
_logger.info("Released a lock {} to run scanning Job", lockKey);
try {
if (!exceptionIntercepted /* && job.isSchedulerJob() */) {
// Manually trigger discoveries, if any new Arrays detected
triggerDiscoveryNew(storageSystemsCache,
(job.isSchedulerJob() ? DataCollectionJob.JobOrigin.SCHEDULER : DataCollectionJob.JobOrigin.USER_API));
}
} catch (Exception ex) {
_logger.error("Exception occurred while triggering discovery of new systems", ex);
}
}
}
else {
job.setTaskError(_dbClient, DeviceControllerErrors.dataCollectionErrors.scanLockFailed());
_logger.error("Not able to Acquire Scanning {} lock-->{}", lockKey, Thread
.currentThread().getId());
}
}
public void performScan(URI provider,
ScanTaskCompleter scanCompleter,
Map<String, StorageSystemViewObject> storageCache) throws DatabaseException, BaseCollectionException, DeviceControllerException {
AccessProfile profile = _util.getAccessProfile(StorageProvider.class, provider, ControllerServiceImpl.SCANNER, null);
profile.setCache(storageCache);
profile.setCimConnectionFactory(_connectionFactory);
profile.setProps(_configInfo);
DataCollectionJobInvoker invoker = new DataCollectionJobInvoker(
profile, _configInfo, _dbClient, _coordinator, null, _locker, null, scanCompleter);
invoker.process(applicationContext);
}
public void start() throws Exception {
}
public void stop() throws Exception {
}
public void setDbClient(DbClient dbClient) {
_dbClient = dbClient;
}
/**
* Set CoordinatorClient
*
* @param coordinator
*/
public void setCoordinator(CoordinatorClient coordinator) {
_coordinator = coordinator;
}
public void setUtil(DataCollectionJobUtil util) {
_util = util;
}
public void setConnectionFactory(CIMConnectionFactory cimConnectionFactory) {
_connectionFactory = cimConnectionFactory;
}
public void setJobScheduler(DataCollectionJobScheduler jobScheduler) {
_jobScheduler = jobScheduler;
}
public void setConfigInfo(Map<String, String> configInfo) {
_configInfo = configInfo;
}
/**
* Set the controller locking service.
*
* @param locker An instance of ControllerLockingService
*/
public void setLocker(ControllerLockingService locker) {
_locker = locker;
}
private ApplicationContext applicationContext = null;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
this.applicationContext = applicationContext;
}
public NetworkDeviceController getNetworkDeviceController() {
return _networkDeviceController;
}
public void setNetworkDeviceController(
NetworkDeviceController networkDeviceController) {
this._networkDeviceController = networkDeviceController;
}
}