/*
* Copyright (c) 2012 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.volumecontroller.impl.plugins.discovery.smis;
import java.net.URI;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.curator.framework.recipes.leader.LeaderSelector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.emc.storageos.coordinator.client.service.CoordinatorClient;
import com.emc.storageos.coordinator.client.service.LeaderSelectorListenerForPeriodicTask;
import com.emc.storageos.coordinator.client.service.impl.LeaderSelectorListenerImpl;
import com.emc.storageos.datadomain.restapi.DataDomainClientFactory;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.db.client.URIUtil;
import com.emc.storageos.db.client.model.ComputeSystem;
import com.emc.storageos.db.client.model.DiscoveredDataObject;
import com.emc.storageos.db.client.model.DiscoveredDataObject.CompatibilityStatus;
import com.emc.storageos.db.client.model.DiscoveredDataObject.DataCollectionJobStatus;
import com.emc.storageos.db.client.model.DiscoveredDataObject.Type;
import com.emc.storageos.db.client.model.DiscoveredSystemObject;
import com.emc.storageos.db.client.model.Host;
import com.emc.storageos.db.client.model.NetworkSystem;
import com.emc.storageos.db.client.model.ProtectionSystem;
import com.emc.storageos.db.client.model.StorageProvider;
import com.emc.storageos.db.client.model.StorageSystem;
import com.emc.storageos.db.client.model.StorageSystem.Discovery_Namespaces;
import com.emc.storageos.db.client.model.Vcenter;
import com.emc.storageos.db.client.model.util.TaskUtils;
import com.emc.storageos.db.client.util.CustomQueryUtility;
import com.emc.storageos.db.client.util.NullColumnValueGetter;
import com.emc.storageos.exceptions.DeviceControllerErrors;
import com.emc.storageos.exceptions.DeviceControllerException;
import com.emc.storageos.hds.api.HDSApiFactory;
import com.emc.storageos.model.ResourceOperationTypeEnum;
import com.emc.storageos.model.property.PropertyConstants;
import com.emc.storageos.services.util.PlatformUtils;
import com.emc.storageos.volumecontroller.impl.ControllerServiceImpl;
import com.emc.storageos.volumecontroller.impl.ceph.CephUtils;
import com.emc.storageos.volumecontroller.impl.cinder.CinderUtils;
import com.emc.storageos.volumecontroller.impl.datadomain.DataDomainUtils;
import com.emc.storageos.volumecontroller.impl.externaldevice.ExternalDeviceUtils;
import com.emc.storageos.volumecontroller.impl.hds.prov.utils.HDSUtils;
import com.emc.storageos.volumecontroller.impl.plugins.metering.smis.processor.PortMetricsProcessor;
import com.emc.storageos.volumecontroller.impl.scaleio.ScaleIOStorageDevice;
import com.emc.storageos.volumecontroller.impl.smis.CIMConnectionFactory;
import com.emc.storageos.volumecontroller.impl.xtremio.prov.utils.XtremIOProvUtils;
import com.emc.storageos.vplexcontroller.VPlexDeviceController;
import com.emc.storageos.xtremio.restapi.XtremIOClientFactory;
/**
* Consumer for Discovery Jobs added to Queue.
* This acts as core Class, which has multiple responsibilities
* 1. Scheduled Loading Provider Device from DB every X minutes (Scanning)
* 1a. Schedule Loading PhsyicalStorageSystems every x minutes. (Discovery)
* 2. Consume Discovery Jobs
* 3. Submit the Discovery Jobs to ExecutorService
*
*/
public class DataCollectionJobScheduler {
private static final Logger _logger = LoggerFactory
.getLogger(DataCollectionJobScheduler.class);
private ScheduledExecutorService _dataCollectionExecutorService = null;
private static final String ENABLE_METERING = "enable-metering";
private static final String ENABLE_AUTODISCOVER = "enable-autodiscovery";
private static final String ENABLE_ARRAYAFFINITY_DISCOVER = "enable-arrayaffinity-discovery";
private static final String ENABLE_AUTOSCAN = "enable-autoscan";
private static final String ENABLE_AUTO_OPS_SINGLENODE = "enable-auto-discovery-metering-scan-single-node-deployments";
private static final String TOLERANCE = "time-tolerance";
private static final String PROP_HEADER_CONTROLLER = "controller_";
private static final String SYSTEM_TENANT_ID = "urn:storageos:TenantOrg:system:";
private static final int initialScanDelay = 30;
private static final int initialDiscoveryDelay = 90;
private static final int initialArrayAffinityDiscoveryDelay = 90;
private static final int initialMeteringDelay = 60;
private static final int initialConnectionRefreshDelay = 10;
private Map<String, String> _configInfo;
private DbClient _dbClient;
private CoordinatorClient _coordinator;
private CIMConnectionFactory _connectionFactory;
private final String leaderSelectorPath = "discoveryleader";
private final String leaderSelectorComputePortMetricsPath = "computeportmetricsleader";
private LeaderSelector discoverySchedulingSelector;
private HDSApiFactory hdsApiFactory;
private DataDomainClientFactory ddClientFactory;
private XtremIOClientFactory xioClientFactory;
private PortMetricsProcessor _portMetricsProcessor;
private LeaderSelector computePortMetricsSelector;
private final Lock _providerConnectionRefreshMutex = new ReentrantLock();
static enum JobIntervals {
SCAN_INTERVALS("scan-interval", "scan-refresh-interval", initialScanDelay),
DISCOVER_INTERVALS("discovery-interval", "discovery-refresh-interval", initialDiscoveryDelay),
ARRAYAFFINITY_DISCOVER_INTERVALS("arrayaffinity-discovery-interval", "arrayaffinity-discovery-refresh-interval", initialArrayAffinityDiscoveryDelay),
CS_DISCOVER_INTERVALS("cs-discovery-interval", "cs-discovery-refresh-interval", initialDiscoveryDelay),
NS_DISCOVER_INTERVALS("ns-discovery-interval", "ns-discovery-refresh-interval", initialDiscoveryDelay),
COMPUTE_DISCOVER_INTERVALS("compute-discovery-interval", "compute-discovery-refresh-interval", initialDiscoveryDelay),
METERING_INTERVALS("metering-interval", "metering-refresh-interval", initialMeteringDelay);
private final String _interval;
private volatile long _intervalValue;
private final String _refreshInterval;
private volatile long _refreshIntervalValue;
private final long _initialDelay;
static private long _maximumIdleInterval;
JobIntervals(String interval, String refresh, long delay) {
_interval = interval;
_refreshInterval = refresh;
_initialDelay = delay;
}
void initialize(Map<String, String> configInfo) {
_intervalValue = Long.parseLong(configInfo.get(_interval));
_refreshIntervalValue = Long.parseLong(configInfo.get(_refreshInterval));
_maximumIdleInterval = Long.parseLong(configInfo.get("maximum-idle-timeout"));
}
public long getInterval() {
return _intervalValue;
}
public long getRefreshInterval() {
return _refreshIntervalValue;
}
public long getInitialDelay() {
return _initialDelay;
}
public static JobIntervals get(String jobType) {
if (ControllerServiceImpl.SCANNER.equalsIgnoreCase(jobType)) {
return SCAN_INTERVALS;
}
if (ControllerServiceImpl.DISCOVERY.equalsIgnoreCase(jobType)) {
return DISCOVER_INTERVALS;
}
if (ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY.equalsIgnoreCase(jobType)) {
return ARRAYAFFINITY_DISCOVER_INTERVALS;
}
if (ControllerServiceImpl.NS_DISCOVERY.equalsIgnoreCase(jobType)) {
return NS_DISCOVER_INTERVALS;
}
if (ControllerServiceImpl.CS_DISCOVERY.equalsIgnoreCase(jobType)) {
return CS_DISCOVER_INTERVALS;
}
if (ControllerServiceImpl.METERING.equalsIgnoreCase(jobType)) {
return METERING_INTERVALS;
}
if (ControllerServiceImpl.COMPUTE_DISCOVERY.equalsIgnoreCase(jobType)) {
return COMPUTE_DISCOVER_INTERVALS;
} else {
return null;
}
}
public static long getMaxIdleInterval() {
return _maximumIdleInterval;
}
}
public void start() throws Exception {
_dataCollectionExecutorService = Executors.newScheduledThreadPool(1);
for (JobIntervals intervals : JobIntervals.values()) {
// Override intervals and refresh intervals with system properties, if set.
// Requires these system props start with "controller_" and uses underscores instead of hyphens.
String prop = _coordinator.getPropertyInfo().getProperty(PROP_HEADER_CONTROLLER + intervals._interval.replace('-', '_'));
if (prop != null) {
_configInfo.put(intervals._interval, prop);
}
prop = _coordinator.getPropertyInfo().getProperty(PROP_HEADER_CONTROLLER + intervals._refreshInterval.replace('-', '_'));
if (prop != null) {
_configInfo.put(intervals._refreshInterval, prop);
}
intervals.initialize(_configInfo);
}
boolean enableAutoScan = Boolean.parseBoolean(_configInfo.get(ENABLE_AUTOSCAN));
boolean enableAutoDiscovery = Boolean.parseBoolean(_configInfo.get(ENABLE_AUTODISCOVER));
boolean enableArrayAffinityDiscovery = Boolean.parseBoolean(_configInfo.get(ENABLE_ARRAYAFFINITY_DISCOVER));
boolean enableAutoMetering = Boolean.parseBoolean(_configInfo.get(ENABLE_METERING));
// Override auto discovery, scan, and metering if this is one node deployment, such as devkit,
// standalone, or 1+0. CoprHD are single-node deployments typically, so ignore this variable in CoprHD.
if (!PlatformUtils.isOssBuild() && (enableAutoScan || enableAutoDiscovery || enableAutoMetering)) {
String numOfNodesString = _coordinator.getPropertyInfo().getProperty(PropertyConstants.NODE_COUNT_KEY);
if (numOfNodesString != null && numOfNodesString.equals("1")) {
boolean enableAutoOpsSingleNodeString = false;
String enableAutoOpsSingleNode = _configInfo.get(ENABLE_AUTO_OPS_SINGLENODE);
if (enableAutoOpsSingleNode != null) {
enableAutoOpsSingleNodeString = Boolean.parseBoolean(enableAutoOpsSingleNode);
}
if (!enableAutoOpsSingleNodeString) {
enableAutoScan = enableAutoDiscovery = enableAutoMetering = false;
}
}
}
LeaderSelectorListenerForPeriodicTask schedulingProcessor = new LeaderSelectorListenerForPeriodicTask(
_dataCollectionExecutorService);
if (enableAutoScan) {
JobIntervals intervals = JobIntervals.get(ControllerServiceImpl.SCANNER);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.SCANNER),
intervals.getInitialDelay(),
intervals.getInterval());
} else {
_logger.info("Auto scan is disabled.");
}
if (enableAutoDiscovery) {
JobIntervals intervals = JobIntervals.get(ControllerServiceImpl.DISCOVERY);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.DISCOVERY),
intervals.getInitialDelay(),
intervals.getInterval());
intervals = JobIntervals.get(ControllerServiceImpl.NS_DISCOVERY);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.NS_DISCOVERY),
intervals.getInitialDelay(),
intervals.getInterval());
intervals = JobIntervals.get(ControllerServiceImpl.COMPUTE_DISCOVERY);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.COMPUTE_DISCOVERY),
intervals.getInitialDelay(),
intervals.getInterval());
intervals = JobIntervals.get(ControllerServiceImpl.CS_DISCOVERY);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.CS_DISCOVERY),
intervals.getInitialDelay(),
intervals.getInterval());
} else {
_logger.info("Auto discovery is disabled.");
}
if (enableArrayAffinityDiscovery) {
JobIntervals intervals = JobIntervals.get(ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY),
intervals.getInitialDelay(),
intervals.getInterval());
_logger.info("Array Affinity discovery is enabled with interval {}", intervals.getInterval());
} else {
_logger.info("Array Affinity discovery is disabled");
}
if (enableAutoMetering) {
JobIntervals intervals = JobIntervals.get(ControllerServiceImpl.METERING);
schedulingProcessor.addScheduledTask(new DiscoveryScheduler(ControllerServiceImpl.METERING),
intervals.getInitialDelay(),
intervals.getInterval());
}
else {
_logger.info("Metering is disabled.");
}
discoverySchedulingSelector = _coordinator.getLeaderSelector(leaderSelectorPath,
schedulingProcessor);
discoverySchedulingSelector.autoRequeue();
discoverySchedulingSelector.start();
// run provider refresh in it's own thread so we don't hold up the scheduling
// thread if it takes longer than expected
_dataCollectionExecutorService.scheduleAtFixedRate(
new Runnable() {
@Override
public void run() {
try {
(new Thread(new RefreshProviderConnectionsThread())).start();
} catch (Exception e) {
_logger.error("Failed to start refresh connections thread: {}", e.getMessage());
_logger.error(e.getMessage(), e);
}
}
}, initialConnectionRefreshDelay, JobIntervals.SCAN_INTERVALS.getInterval(), TimeUnit.SECONDS);
// recompute storage ports's metrics for all storage system
// Since traverse through all storage ports in all storage systems may take a while, it best to perform the
// task in a thread. We definitely do not want all nodes in cluster to do the same task, select a leader to
// do it there.
computePortMetricsSelector = _coordinator.getLeaderSelector(leaderSelectorComputePortMetricsPath, new LeaderSelectorListenerImpl() {
@Override
protected void stopLeadership() {
}
@Override
protected void startLeadership() throws Exception {
_dataCollectionExecutorService.schedule(new Runnable() {
@Override
public void run() {
_portMetricsProcessor.computeStoragePortUsage();
}
}, 1, TimeUnit.MILLISECONDS);
}
});
computePortMetricsSelector.autoRequeue();
computePortMetricsSelector.start();
}
private class RefreshProviderConnectionsThread implements Runnable {
@Override
public void run() {
try {
// a simple mutex lock is all we need in this case since provider connection refresh happens on all nodes
// this lock prevents a single node from starting a new provider connection refresh operation
// if a previous operation is still in progress
boolean acquired = _providerConnectionRefreshMutex.tryLock();
if (acquired) {
try {
_logger.info("Acquired mutex lock (_providerConnectionRefreshMutex) to refresh provider connections");
refreshProviderConnections();
} finally {
_providerConnectionRefreshMutex.unlock();
}
} else {
_logger.error("Could not aquire mutex lock (_providerConnectionRefreshMutex) to refresh provider connections");
}
} catch (Exception e) {
_logger.error("Failed to refresh connections: {}", e.getMessage());
_logger.error(e.getMessage(), e);
}
}
}
private class DiscoveryScheduler implements Runnable {
String jobType;
public DiscoveryScheduler(String jobType) {
this.jobType = jobType;
}
@Override
public void run() {
try {
if (ControllerServiceImpl.SCANNER.equalsIgnoreCase(jobType)) {
scheduleScannerJobs();
} else {
loadSystemfromDB(jobType);
}
} catch (Exception e) {
_logger.error(String.format("Exception caught when trying to run discovery job %s", jobType), e);
}
}
}
/**
* Core method, responsible for loading StorageProviders from DB and do scanning.
*
* @throws Exception
*/
private void scheduleScannerJobs() throws Exception {
_logger.info("Started Loading Storage Providers from DB");
List<StorageProvider> providers = _dbClient.queryObject(StorageProvider.class, _dbClient.queryByType(StorageProvider.class, true));
Map<String, DataCollectionScanJob> scanJobByInterfaceType = new HashMap<String, DataCollectionScanJob>();
for (StorageProvider provider : providers) {
if (scanJobByInterfaceType.get(provider.getInterfaceType()) == null) {
scanJobByInterfaceType.put(provider.getInterfaceType(), new DataCollectionScanJob(DataCollectionJob.JobOrigin.SCHEDULER));
}
String taskId = UUID.randomUUID().toString();
scanJobByInterfaceType.get(provider.getInterfaceType()).addCompleter(new ScanTaskCompleter(StorageProvider.class, provider.getId(), taskId));
}
for (DataCollectionScanJob scanJob : scanJobByInterfaceType.values()) {
scheduleScannerJobs(scanJob);
}
}
/**
* scans a list of providers in one scan job
*
* @param providers
* @throws Exception
*/
public void scheduleScannerJobs(DataCollectionScanJob scanJob) throws Exception {
List<StorageProvider> providers = _dbClient.queryObject(StorageProvider.class, scanJob.getProviders());
if (providers == null || providers.isEmpty()) {
_logger.info("No scanning needed: provider list is empty");
return;
}
_logger.info("Starting scan of providers of type {}", providers.iterator().next().getInterfaceType());
long lastScanTime = 0;
List<URI> provUris = scanJob.getProviders();
if (provUris != null && !provUris.isEmpty()) {
ControllerServiceImpl.Lock lock = ControllerServiceImpl.Lock.getLock(ControllerServiceImpl.SCANNER);
if (lock.acquire(lock.getRecommendedTimeout())) {
try {
_logger.info("Acquired a lock {} to schedule {} scanner Jobs", providers.iterator().next().getInterfaceType(), lock.toString());
boolean inProgress = ControllerServiceImpl.isDataCollectionJobInProgress(scanJob) || ControllerServiceImpl.isDataCollectionJobQueued(scanJob);
// Find the last scan time from the provider whose scan status is not in progress or scheduled
if (!inProgress) {
lastScanTime = providers.iterator().next().getLastScanTime();
// if there are any pending tasks clear them; look for pending tasks more than an hour old. That will exclude the
// tasks created for the jobs currently being scheduled
for (StorageProvider provider: providers) {
Calendar oneHourAgo = Calendar.getInstance();
oneHourAgo.setTime(Date.from(LocalDateTime.now().minusHours(1).atZone(ZoneId.systemDefault()).toInstant()));
TaskUtils.cleanupPendingTasks(_dbClient, provider.getId(), ResourceOperationTypeEnum.SCAN_STORAGEPROVIDER.getName(), URI.create(SYSTEM_TENANT_ID),
oneHourAgo);
}
}
if (isDataCollectionScanJobSchedulingNeeded(lastScanTime, inProgress)) {
for (StorageProvider provider : providers) {
provider.setScanStatus(DataCollectionJobStatus.SCHEDULED.toString());
_dbClient.updateObject(provider);
}
_logger.info("Added Scan job to the Distributed Queue");
ControllerServiceImpl.enqueueDataCollectionJob(scanJob);
} else {
// clear the task that was created for this job but don't set the provider to not in progress
scanJob.setTaskReady(_dbClient, "Scan job was not run because it is either in progress or was run recently");
}
} catch (Exception e) {
_logger.error(e.getMessage(), e);
} finally {
try {
lock.release();
_logger.info("Released a lock {} to schedule Jobs", lock.toString());
} catch (Exception e) {
_logger.error("Failed to release Lock {} -->{}", lock.toString(), e.getMessage());
}
}
} else {
_logger.debug("Not able to Acquire lock {}-->{}", lock.toString(), Thread.currentThread().getId());
throw new DeviceControllerException("Failed to acquire lock : " + lock.toString());
}
}
}
private void addToList(List<URI> newList, Iterator<URI> iter) {
while (iter.hasNext()) {
newList.add(iter.next());
}
}
/**
* Load Physical Systems from DB, and add to Discovery Job Queue
*
* @throws Exception
*/
private void loadSystemfromDB(String jobType) throws Exception {
_logger.info("Started Loading Systems from DB for " + jobType + " jobs");
ArrayList<DataCollectionJob> jobs = new ArrayList<DataCollectionJob>();
List<URI> allSystemsURIs = new ArrayList<URI>();
Map<URI, List<URI>> providerToSystemsMap = new HashMap<URI, List<URI>>();
if (jobType.equalsIgnoreCase(ControllerServiceImpl.NS_DISCOVERY)) {
addToList(allSystemsURIs, _dbClient.queryByType(NetworkSystem.class, true).iterator());
} else if (jobType.equalsIgnoreCase(ControllerServiceImpl.CS_DISCOVERY)) {
addToList(allSystemsURIs, _dbClient.queryByType(Host.class, true).iterator());
addToList(allSystemsURIs, _dbClient.queryByType(Vcenter.class, true).iterator());
} else if (jobType.equalsIgnoreCase(ControllerServiceImpl.COMPUTE_DISCOVERY)) {
addToList(allSystemsURIs, _dbClient.queryByType(ComputeSystem.class, true).iterator());
} else if (jobType.equalsIgnoreCase(ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY)) {
List<URI> systemURIs = _dbClient.queryByType(StorageSystem.class, true);
List<StorageSystem> systems = new ArrayList<StorageSystem>();
Iterator<StorageSystem> storageSystems = _dbClient.queryIterativeObjects(StorageSystem.class, systemURIs, true);
while (storageSystems.hasNext()) {
StorageSystem system = storageSystems.next();
if (system.deviceIsType(Type.vmax) || system.deviceIsType(Type.vnxblock) || system.deviceIsType(Type.xtremio) ||
system.deviceIsType(Type.unity)) {
systems.add(system);
}
}
// Sort systems by last array affinity time, so that system with the earliest last array affinity time will be used
// when checking if job should be scheduled
Collections.sort(systems, new Comparator<StorageSystem>() {
public int compare(StorageSystem system1, StorageSystem system2) {
return Long.compare(system1.getLastArrayAffinityRunTime(), system2.getLastArrayAffinityRunTime());
}
});
for (StorageSystem system : systems) {
if (system.deviceIsType(Type.unity)) {
List<URI> systemIds = new ArrayList<URI>();
systemIds.add(system.getId());
providerToSystemsMap.put(system.getId(), systemIds);
} else {
StorageProvider provider = _dbClient.queryObject(StorageProvider.class,
system.getActiveProviderURI());
if (provider != null && !provider.getInactive()) {
List<URI> systemIds = providerToSystemsMap.get(provider.getId());
if (systemIds == null) {
systemIds = new ArrayList<URI>();
providerToSystemsMap.put(provider.getId(), systemIds);
}
systemIds.add(system.getId());
}
}
}
} else {
addToList(allSystemsURIs, _dbClient.queryByType(StorageSystem.class, true).iterator());
addToList(allSystemsURIs, _dbClient.queryByType(ProtectionSystem.class, true).iterator());
}
if (!providerToSystemsMap.isEmpty()) {
for (Map.Entry<URI, List<URI>> entry : providerToSystemsMap.entrySet()) {
String taskId = UUID.randomUUID().toString();
List<URI> systemIds = entry.getValue();
ArrayAffinityDataCollectionTaskCompleter completer = new ArrayAffinityDataCollectionTaskCompleter(StorageSystem.class, systemIds, taskId, jobType);
DataCollectionArrayAffinityJob job = new DataCollectionArrayAffinityJob(null, systemIds, completer, DataCollectionJob.JobOrigin.SCHEDULER, Discovery_Namespaces.ARRAY_AFFINITY.name());
jobs.add(job);
}
scheduleMultipleJobs(jobs, ControllerServiceImpl.Lock.getLock(jobType));
} else if (!allSystemsURIs.isEmpty()) {
Iterator<URI> systemURIsItr = allSystemsURIs.iterator();
while (systemURIsItr.hasNext()) {
URI systemURI = systemURIsItr.next();
String taskId = UUID.randomUUID().toString();
DataCollectionJob job = null;
StorageProvider provider = null;
if (URIUtil.isType(systemURI, StorageSystem.class)) {
StorageSystem systemObj = _dbClient.queryObject(StorageSystem.class, systemURI);
if (systemObj == null || systemObj.getInactive()) {
_logger.warn(String.format("StorageSystem %s is no longer in the DB or is inactive. It could have been deleted or decommissioned",
systemURI));
continue;
}
// check devices managed by SMIS/hicommand/vplex device mgr has ActiveProviderURI or not.
if (systemObj.isStorageSystemManagedByProvider()) {
if (systemObj.getActiveProviderURI() == null
|| NullColumnValueGetter.getNullURI().equals(systemObj.getActiveProviderURI())) {
_logger.info("Skipping {} Job : StorageSystem {} does not have an active provider",
jobType, systemURI);
continue;
}
provider = _dbClient.queryObject(StorageProvider.class,
systemObj.getActiveProviderURI());
if (provider == null || provider.getInactive()) {
_logger.info("Skipping {} Job : StorageSystem {} does not have a valid active provider",
jobType, systemURI);
continue;
}
}
// For Metering, check SerialNumber has populated or not.
if (ControllerServiceImpl.METERING.equalsIgnoreCase(jobType)) {
if (null == systemObj.getSerialNumber()) {
_logger.info("Skipping {} Job : StorageSystem {} discovery failed or hasn't run.",
jobType, systemURI);
continue;
} else if (CompatibilityStatus.INCOMPATIBLE.name().equalsIgnoreCase(systemObj.getCompatibilityStatus())) {
_logger.info("Skipping {} Job : StorageSystem {} has incompatible version",
jobType, systemURI);
continue;
}
}
job = getDataCollectionJobByType(StorageSystem.class, jobType, taskId, systemURI);
} else if (URIUtil.isType(systemURI, NetworkSystem.class)) {
job = getDataCollectionJobByType(NetworkSystem.class, jobType, taskId, systemURI);
} else if (URIUtil.isType(systemURI, ComputeSystem.class)) {
job = getDataCollectionJobByType(ComputeSystem.class, jobType, taskId, systemURI);
} else if (URIUtil.isType(systemURI, Host.class)) {
Host host = _dbClient.queryObject(Host.class, systemURI);
// Add host
if ((host.getDiscoverable() == null || host.getDiscoverable())) {
job = getDataCollectionJobByType(Host.class, jobType, taskId, systemURI);
}
} else if (URIUtil.isType(systemURI, Vcenter.class)) {
job = getDataCollectionJobByType(Vcenter.class, jobType, taskId, systemURI);
} else if (URIUtil.isType(systemURI, ProtectionSystem.class)) {
// Do not queue any metering jobs for ProtectionSystems.
// Protection System metrics are not used for "metering" per vpool/project/tenant
if (!jobType.equals(ControllerServiceImpl.METERING)) {
job = getDataCollectionJobByType(ProtectionSystem.class, jobType, taskId, systemURI);
}
}
if (null != job) {
jobs.add(job);
}
}
scheduleMultipleJobs(jobs, ControllerServiceImpl.Lock.getLock(jobType));
} else {
_logger.info("No systems found in db to schedule jobs.");
}
}
/**
* Return the job based on its type.
*
* @param systemClass : System Object to create TaskCompleter.
* @param jobType : JobType tells which type of job to create.
* @param taskId : TaskID to set in TaskCompleter.
* @param systemURI : systemURI to set in TaskCompleter.
* @return
*/
private DataCollectionJob getDataCollectionJobByType(Class<? extends DiscoveredSystemObject> systemClass,
String jobType, String taskId, URI systemURI) {
DataCollectionJob job = null;
if (ControllerServiceImpl.METERING.equalsIgnoreCase(jobType)) {
MeteringTaskCompleter completer = new MeteringTaskCompleter(systemClass, systemURI,
taskId);
job = new DataCollectionMeteringJob(completer, DataCollectionJob.JobOrigin.SCHEDULER);
} else if (ControllerServiceImpl.isDiscoveryJobTypeSupported(jobType)) {
DiscoverTaskCompleter completer = new DiscoverTaskCompleter(systemClass, systemURI, taskId, jobType);
job = new DataCollectionDiscoverJob(completer, DataCollectionJob.JobOrigin.SCHEDULER, Discovery_Namespaces.ALL.toString());
}
return job;
}
public void scheduleMultipleJobs(List<DataCollectionJob> jobs, ControllerServiceImpl.Lock lock) throws Exception {
if (lock.acquire(lock.getRecommendedTimeout())) {
try {
_logger.info("Acquired a lock {} to schedule Jobs", lock.toString());
enqueueJobs(jobs);
} finally {
try {
lock.release();
} catch (Exception e) {
_logger.error("Failed to release Lock {} -->{}", lock.toString(), e.getMessage());
}
}
} else {
_logger.debug("Not able to Acquire lock {}-->{}", lock.toString(), Thread.currentThread().getId());
throw new DeviceControllerException("Failed to acquire lock : " + lock.toString());
}
}
private void enqueueJobs(List<DataCollectionJob> jobs) {
for (DataCollectionJob job : jobs) {
try {
DataCollectionTaskCompleter completer = job.getCompleter();
DiscoveredSystemObject system = (DiscoveredSystemObject)
_dbClient.queryObject(completer.getType(), completer.getId());
if (isDataCollectionJobSchedulingNeeded(system, job)) {
job.schedule(_dbClient);
if (job instanceof DataCollectionArrayAffinityJob) {
((ArrayAffinityDataCollectionTaskCompleter) completer).setLastStatusMessage(_dbClient, "");
} else {
system.setLastDiscoveryStatusMessage("");
_dbClient.updateObject(system);
}
ControllerServiceImpl.enqueueDataCollectionJob(job);
}
else {
_logger.info("Skipping {} Job for {}", job.getType(), completer.getId());
if (!job.isSchedulerJob()) {
job.setTaskReady(_dbClient,
"The discovery for this system is currently running or was run quite recently. Resubmit this request at a later time, if needed.");
}
}
} catch (Exception e) {
_logger.error("Failed to enqueue {} Job {}", job.getType(), e.getMessage());
if (!job.isSchedulerJob()) {
try {
job.setTaskError(_dbClient,
DeviceControllerErrors.dataCollectionErrors.failedToEnqueue(job.getType(), e));
} catch (Exception ex) {
_logger.warn("Exception occurred while updating task status", ex);
}
}
}
}
}
private <T extends DiscoveredSystemObject> boolean isInProgress(
T storageSystem, String type) {
// if inprogress,
String progressStatus = getStatus(storageSystem, type);
return DiscoveredDataObject.DataCollectionJobStatus.IN_PROGRESS.toString()
.equalsIgnoreCase(progressStatus) ||
DiscoveredDataObject.DataCollectionJobStatus.SCHEDULED.toString()
.equalsIgnoreCase(progressStatus);
}
private boolean isInProgress(StorageProvider provider) {
// if inprogress,
return DiscoveredDataObject.DataCollectionJobStatus.IN_PROGRESS.toString()
.equalsIgnoreCase(provider.getScanStatus());
}
private <T extends DiscoveredSystemObject> boolean isError(
T storageSystem, String type) {
String progressStatus = getStatus(storageSystem, type);
return DiscoveredDataObject.DataCollectionJobStatus.ERROR.toString()
.equalsIgnoreCase(progressStatus);
}
private boolean isError(StorageProvider provider) {
return DiscoveredDataObject.DataCollectionJobStatus.ERROR.toString()
.equalsIgnoreCase(provider.getScanStatus());
}
/**
* If the job is in progress, don't schedule
* the job.
* If not in progress, then schedule if refresh interval is satisfied.
*
* @param lastScanTime
* @return
*/
private <T extends DiscoveredSystemObject> boolean isDataCollectionScanJobSchedulingNeeded(long lastScanTime, boolean inProgress) {
long systemTime = System.currentTimeMillis();
long refreshInterval = getRefreshInterval(ControllerServiceImpl.SCANNER);
// Job is in progress
if (inProgress) {
return false;
}
// If not in progress, check that refresh interval is satisfied.
if (lastScanTime > 0 && (systemTime - lastScanTime < refreshInterval * 1000)) {
_logger.info("Skipping scanner job; attempt to schedule faster than refresh interval allows");
return false;
}
return true;
}
/**
*
* @param <T>
* @param system
* @param scheduler indicates if the job is initiated automatically by scheduler or if it is
* requested by a user.
* @return
*/
private <T extends DiscoveredSystemObject> boolean isDataCollectionJobSchedulingNeeded(T system, DataCollectionJob job) {
String type = job.getType();
boolean scheduler = job.isSchedulerJob();
String namespace = job.getNamespace();
// CTRL-8227 if an unmanaged volume discovery is requested by the user,
// just run it regardless of last discovery time
// COP-20052 if an unmanaged CG discovery is requested, just run it
if (!scheduler &&
(Discovery_Namespaces.UNMANAGED_VOLUMES.name().equalsIgnoreCase(namespace) ||
Discovery_Namespaces.BLOCK_SNAPSHOTS.name().equalsIgnoreCase(namespace) ||
Discovery_Namespaces.UNMANAGED_FILESYSTEMS.name().equalsIgnoreCase(namespace) ||
Discovery_Namespaces.UNMANAGED_CGS.name().equalsIgnoreCase(namespace))) {
_logger.info(namespace + " discovery has been requested by the user, scheduling now...");
return true;
}
if (ControllerServiceImpl.METERING.equalsIgnoreCase(type) &&
!DiscoveredDataObject.RegistrationStatus.REGISTERED.toString()
.equalsIgnoreCase(system.getRegistrationStatus())) {
return false;
}
// Scan triggered the discovery of this new System found, and discovery was in progress
// in the mean time, UI triggered the discovery again, the last Run time will be 0
// as we depend on the last run time to calculate next run time, the value will be
// always 3600 seconds in this case, which is lower than the maximum idle interval which is 4200 sec.
// hence a new Job will again get rescheduled.
// This fix, calculates next time from last Run time , only if its not 0.
long lastTime = getLastRunTime(system, type);
long nextTime = getNextRunTime(system, type);
if (lastTime > 0) {
nextTime = lastTime + JobIntervals.get(type).getInterval() * 1000;
}
if (ControllerServiceImpl.DISCOVERY.equalsIgnoreCase(type) && system instanceof NetworkSystem) {
type = ControllerServiceImpl.NS_DISCOVERY;
}
if (ControllerServiceImpl.DISCOVERY.equalsIgnoreCase(type) && system instanceof ComputeSystem) {
type = ControllerServiceImpl.COMPUTE_DISCOVERY;
}
if (ControllerServiceImpl.DISCOVERY.equalsIgnoreCase(type) &&
(system instanceof Host || system instanceof Vcenter)) {
type = ControllerServiceImpl.CS_DISCOVERY;
}
// check directly on the queue to determine if the job is in progress
boolean inProgress = ControllerServiceImpl.isDataCollectionJobInProgress(job);
boolean queued = ControllerServiceImpl.isDataCollectionJobQueued(job);
if (!queued && !inProgress) {
// the job does not appear on the queue in either active or queued state
// check the storage system database status; if it shows that it's scheduled or in progress, something
// went wrong with a previous discovery. Set it to error and allow it to be rescheduled.
boolean dbInProgressStatus = isInProgress(system, type);
if (dbInProgressStatus) {
_logger.warn(type + " job for " + system.getLabel() + " is not queued or in progress; correcting the ViPR DB status");
updateDataCollectionStatus(system, type, DiscoveredDataObject.DataCollectionJobStatus.ERROR);
}
// check for any pending tasks; if there are any, they're orphaned and should be cleaned up
// look for tasks older than one hour; this will exclude the discovery job currently being scheduled
Calendar oneHourAgo = Calendar.getInstance();
oneHourAgo.setTime(Date.from(LocalDateTime.now().minusDays(1).atZone(ZoneId.systemDefault()).toInstant()));
if (ControllerServiceImpl.DISCOVERY.equalsIgnoreCase(type)) {
TaskUtils.cleanupPendingTasks(_dbClient, system.getId(), ResourceOperationTypeEnum.DISCOVER_STORAGE_SYSTEM.getName(), URI.create(SYSTEM_TENANT_ID),
oneHourAgo);
} else if (ControllerServiceImpl.METERING.equalsIgnoreCase(type)) {
TaskUtils.cleanupPendingTasks(_dbClient, system.getId(), ResourceOperationTypeEnum.METERING_STORAGE_SYSTEM.getName(), URI.create(SYSTEM_TENANT_ID),
oneHourAgo);
}
} else {
// log a message if the discovery job has been runnig for longer than expected
long currentTime = System.currentTimeMillis();
long maxIdleTime = JobIntervals.getMaxIdleInterval() * 1000;
long jobInterval = JobIntervals.get(job.getType()).getInterval();
// next time is the time the job was picked up from the queue plus the job interval
// so the start time of the currently running job is next time minus job interval
// the running time of the currently running job is current time - next time - job interval
boolean longRunningDiscovery = inProgress && (currentTime - nextTime - jobInterval >= maxIdleTime);
if (longRunningDiscovery) {
_logger.warn(type + " job for " + system.getLabel() +
" has been running for longer than expected; this could indicate a problem with the storage system");
}
}
return isJobSchedulingNeeded(system.getId(), type, (queued || inProgress), isError(system, type), scheduler, lastTime, nextTime);
}
/**
* update data collection status on storage system
*
* @param system
* @param type
* @param status
*/
private <T extends DiscoveredSystemObject> void updateDataCollectionStatus(T system, String type, DiscoveredDataObject.DataCollectionJobStatus status) {
if (ControllerServiceImpl.METERING.equalsIgnoreCase(type)) {
system.setMeteringStatus(status.toString());
} else if (ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY.equalsIgnoreCase(type)) {
((StorageSystem) system).setArrayAffinityStatus(status.toString());
} else {
system.setDiscoveryStatus(status.toString());
}
_dbClient.updateObject(system);
}
/**
*
* @param provider
* @param scheduler indicates if the job is initiated automatically by scheduler or if it is
* requested by a user.
* @return
*/
boolean isProviderScanJobSchedulingNeeded(StorageProvider provider, String type, boolean scheduler) {
long nextTime = provider.getNextScanTime();
long lastTime = provider.getLastScanTime();
return isJobSchedulingNeeded(provider.getId(), type, isInProgress(provider), isError(provider), scheduler, lastTime, nextTime);
}
/**
* If current time - lastRunTime is > refreshInterval, then schedule
*
* @param inProgress indicates if the job is in progress or not.
* @param scheduler indicates if the job is initiated automatically by scheduler or if it is
* requested by a user.
*/
private boolean isJobSchedulingNeeded(URI id, String type, boolean inProgress, boolean isError, boolean scheduler, long lastTime, long nextTime) {
long systemTime = System.currentTimeMillis();
long tolerance = Long.parseLong(_configInfo.get(TOLERANCE)) * 1000;
_logger.info("Next Run Time {} , Last Run Time {}", nextTime, lastTime);
long refreshInterval = getRefreshInterval(type);
if (!inProgress) {
// First for job, that is scheduled, is compared against the "next time"
// it expected to be started by the scheduler thread
if (scheduler) {
if (systemTime < nextTime - tolerance) {
_logger.info("Skipping Job {} ; attempt to schedule it before the next run time :{}",
id + "of type " + type, new Date(nextTime));
_logger.info("Current system time {}; tolerance time allowed {}.", new Date(systemTime), tolerance);
return false;
}
}
// CTRL-10655 - if manual discovery is requested and the discovery status is error, then
// schedule the job
if (!scheduler && isError && lastTime > 0) {
_logger.info("User triggered {} Job for {} whose discovery status is error. Reschedule the job", type, id);
return true;
}
// For all jobs, check that refresh interval is satisfied.
if (systemTime - lastTime < refreshInterval * 1000) {
_logger.info("Skipping Job {} of type {}; attempt to schedule faster than refresh interval allows",
id, type);
return false;
}
} else {
_logger.info("{} Job for {} is in Progress", type, id);
return false;
}
return true;
}
/**
* We would like the refresh interval to be configurable on-the-fly, so we'll check the system properties to see if it's
* set there.
*
* @param type type of Job Interval
* @return the value of the system property for that type, otherwise the default configinfo property.
*/
private long getRefreshInterval(String type) {
long refreshInterval = JobIntervals.get(type).getRefreshInterval();
String prop = _coordinator.getPropertyInfo().getProperty(
PROP_HEADER_CONTROLLER + JobIntervals.get(type)._refreshInterval.replace('-', '_'));
if (prop != null) {
refreshInterval = Long.parseLong(prop);
}
return refreshInterval;
}
private <T extends DiscoveredSystemObject> long getLastRunTime(
T storageSystem, String type) {
if (ControllerServiceImpl.METERING.equalsIgnoreCase(type)) {
return storageSystem.getLastMeteringRunTime();
} else if (ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY.equalsIgnoreCase(type)) {
return ((StorageSystem) storageSystem).getLastArrayAffinityRunTime();
} else {
return storageSystem.getLastDiscoveryRunTime();
}
}
private <T extends DiscoveredSystemObject> long getNextRunTime(
T storageSystem, String type) {
if (ControllerServiceImpl.METERING.equalsIgnoreCase(type)) {
return storageSystem.getNextMeteringRunTime();
} else if (ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY.equalsIgnoreCase(type)) {
return ((StorageSystem) storageSystem).getNextArrayAffinityRunTime();
} else {
return storageSystem.getNextDiscoveryRunTime();
}
}
/**
* get Status
*
* @param <T>
* @param system
* @param type
* @return
*/
private <T extends DiscoveredSystemObject> String getStatus(T system, String type) {
if (ControllerServiceImpl.METERING.equalsIgnoreCase(type)) {
return system.getMeteringStatus();
} else if (ControllerServiceImpl.ARRAYAFFINITY_DISCOVERY.equalsIgnoreCase(type)) {
return ((StorageSystem) system).getArrayAffinityStatus();
} else {
return system.getDiscoveryStatus();
}
}
/**
* Stopping DisocveryConsumer, would close the execService.
*/
public void stop() {
try {
discoverySchedulingSelector.close();
_dataCollectionExecutorService.shutdown();
_dataCollectionExecutorService.awaitTermination(120, TimeUnit.SECONDS);
} catch (Exception e) {
// To-DO: filter it for timeout sException
// No need to throw any exception
_logger.error("TimeOut occured after waiting Client Threads to finish");
}
}
public void setDbClient(DbClient dbClient) {
_dbClient = dbClient;
}
/**
* Set CoordinatorClient
*
* @param coordinator
*/
public void setCoordinator(CoordinatorClient coordinator) {
_coordinator = coordinator;
}
public void setConfigInfo(Map<String, String> configInfo) {
_configInfo = configInfo;
}
public Map<String, String> getConfigInfo() {
return _configInfo;
}
public void setConnectionFactory(CIMConnectionFactory cimConnectionFactory) {
_connectionFactory = cimConnectionFactory;
}
/**
* refresh all provider connections for an interface type
*
* @param interfaceType
* @return the list of reachable providers for an interface type
*/
public List<URI> refreshProviderConnections(String interfaceType) {
List<URI> activeProviderURIs = new ArrayList<URI>();
if (StorageProvider.InterfaceType.smis.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(_connectionFactory.refreshConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.smis.name())));
} else if (StorageProvider.InterfaceType.ibmxiv.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(_connectionFactory.refreshConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ibmxiv.name())));
} else if (StorageProvider.InterfaceType.vplex.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(VPlexDeviceController.getInstance()
.refreshConnectionStatusForAllVPlexManagementServers());
} else if (StorageProvider.InterfaceType.hicommand.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(HDSUtils.refreshHDSConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.hicommand.name()),
_dbClient, hdsApiFactory));
} else if (StorageProvider.InterfaceType.cinder.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(CinderUtils.refreshCinderConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.cinder.name()),
_dbClient));
} else if (StorageProvider.InterfaceType.ddmc.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(DataDomainUtils.refreshDDConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ddmc.name()),
_dbClient, ddClientFactory));
} else if (StorageProvider.InterfaceType.xtremio.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(XtremIOProvUtils.refreshXtremeIOConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.xtremio.name()),
_dbClient, xioClientFactory));
} else if (StorageProvider.InterfaceType.ceph.name().equalsIgnoreCase(interfaceType)) {
activeProviderURIs.addAll(CephUtils.refreshCephConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ceph.name()),
_dbClient));
} else {
activeProviderURIs.addAll(ExternalDeviceUtils.refreshProviderConnections(_dbClient));
}
return activeProviderURIs;
}
public List<URI> refreshProviderConnections() {
List<URI> activeProviderURIs = new ArrayList<URI>();
activeProviderURIs.addAll(_connectionFactory.refreshConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.smis.name())));
activeProviderURIs.addAll(_connectionFactory.refreshConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ibmxiv.name())));
activeProviderURIs.addAll(VPlexDeviceController.getInstance()
.refreshConnectionStatusForAllVPlexManagementServers());
activeProviderURIs.addAll(HDSUtils.refreshHDSConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.hicommand.name()),
_dbClient, hdsApiFactory));
activeProviderURIs.addAll(CinderUtils.refreshCinderConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.cinder.name()),
_dbClient));
activeProviderURIs.addAll(DataDomainUtils.refreshDDConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ddmc.name()),
_dbClient, ddClientFactory));
activeProviderURIs.addAll(XtremIOProvUtils.refreshXtremeIOConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.xtremio.name()),
_dbClient, xioClientFactory));
activeProviderURIs.addAll(ScaleIOStorageDevice.getInstance().refreshConnectionStatusForAllSIOProviders());
activeProviderURIs.addAll(CephUtils.refreshCephConnections(
CustomQueryUtility.getActiveStorageProvidersByInterfaceType(
_dbClient, StorageProvider.InterfaceType.ceph.name()),
_dbClient));
// process providers managed by SB SDK drivers
activeProviderURIs.addAll(ExternalDeviceUtils.refreshProviderConnections(_dbClient));
return activeProviderURIs;
}
public void setHdsApiFactory(HDSApiFactory hdsApiFactory) {
this.hdsApiFactory = hdsApiFactory;
}
public void setDataDomainFactory(DataDomainClientFactory ddClientFactory) {
this.ddClientFactory = ddClientFactory;
}
public void setXtremIOFactory(XtremIOClientFactory xioClientFactory) {
this.xioClientFactory = xioClientFactory;
}
/**
* Sets portMetricsProcess via Spring injection
*
* @param portMetricsProcessor
*/
public void setPortMetricsProcessor(PortMetricsProcessor portMetricsProcessor) {
_portMetricsProcessor = portMetricsProcessor;
}
}