/*
* Copyright (c) 2013 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.systemservices.impl.resource;
import javax.ws.rs.*;
import javax.ws.rs.core.MediaType;
import com.emc.storageos.coordinator.client.service.CoordinatorClient.LicenseType;
import com.emc.storageos.coordinator.client.service.impl.DualInetAddress;
import com.emc.storageos.model.vpool.ManagedResourcesCapacity;
import com.emc.storageos.model.vpool.ManagedResourcesCapacity.ManagedResourceCapacity;
import com.emc.storageos.security.authorization.CheckPermission;
import com.emc.storageos.security.authorization.Role;
import com.emc.storageos.services.ServicesMetadata;
import com.emc.storageos.svcs.errorhandling.resources.APIException;
import com.emc.storageos.systemservices.impl.healthmonitor.*;
import com.emc.storageos.systemservices.impl.healthmonitor.models.*;
import com.emc.storageos.systemservices.impl.licensing.LicenseManager;
import com.emc.storageos.systemservices.impl.resource.util.ClusterNodesUtil;
import com.emc.storageos.systemservices.impl.resource.util.NodeDataCollector;
import com.emc.storageos.systemservices.impl.resource.util.NodeInfo;
import com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt;
import com.emc.storageos.systemservices.impl.resource.util.NodeDataCollector.Action;
import com.emc.vipr.model.sys.healthmonitor.DataDiskStats;
import com.emc.vipr.model.sys.healthmonitor.DiagRequestParams;
import com.emc.vipr.model.sys.healthmonitor.DiagnosticsRestRep;
import com.emc.vipr.model.sys.healthmonitor.DiagTest;
import com.emc.vipr.model.sys.healthmonitor.HealthRestRep;
import com.emc.vipr.model.sys.healthmonitor.NodeDiagnostics;
import com.emc.vipr.model.sys.healthmonitor.NodeHardwareInfo.NodeHardwareInfoType;
import com.emc.vipr.model.sys.healthmonitor.NodeHardwareInfoRestRep;
import com.emc.vipr.model.sys.healthmonitor.NodeHealth;
import com.emc.vipr.model.sys.healthmonitor.NodeStats;
import com.emc.vipr.model.sys.healthmonitor.RequestParams;
import com.emc.vipr.model.sys.healthmonitor.ServiceHealth;
import com.emc.vipr.model.sys.healthmonitor.StatsRestRep;
import com.emc.vipr.model.sys.healthmonitor.StorageStats;
import com.emc.vipr.model.sys.healthmonitor.TestParam;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import java.net.URI;
import java.util.*;
/**
* Class that provides REST API for node(and its services) health and statistics.
*/
@Path("/monitor")
public class HealthMonitorService extends BaseLogSvcResource {
private static final Logger _log = LoggerFactory.getLogger(HealthMonitorService
.class);
@Autowired
private CoordinatorClientExt _coordinatorClientExt;
@Autowired
private LicenseManager _licenseManager;
@Autowired
private NodeResourceAllocationChecker _checker;
private static final String INTERNAL_NODE_STATS_URI =
"/monitor/internal/node-stats";
private static final String INTERNAL_NODE_HARDWARE_URI =
"/monitor/internal/node-hardware-info";
private static final String INTERNAL_NODE_HEALTH_URI =
"/monitor/internal/node-health";
private static final String INTERNAL_NODE_DIAGNOSTICS_URI =
"/monitor/internal/node-diagnostics";
/**
* Internal method to get node statistics
*
* @return Node stats response
*/
@POST
@Path("/internal/node-stats")
@Produces({ MediaType.APPLICATION_JSON })
public NodeStats getNodeStats(RequestParams requestParams) {
_log.info("Retrieving node stats");
String nodeId = _coordinatorClientExt.getMyNodeId();
String nodeName = _coordinatorClientExt.getMyNodeName();
return getNodeStats(nodeId, nodeName, getNodeIP(nodeId),
requestParams.getInterval(),
ServicesMetadata.getRoleServiceNames(_coordinatorClientExt.getNodeRoles()));
}
/**
* Internal method to get node hardware info
*
* @return node hardware info response
*/
@GET
@Path("/internal/node-hardware-info")
@Produces({ MediaType.APPLICATION_JSON })
public NodeHardwareInfoRestRep getNodeHardwareInfo() {
_log.info("Retrieving node hardware info");
String nodeId = _coordinatorClientExt.getMyNodeId();
return getNodeHardWareInfo(nodeId);
}
/**
* Get statistics of virtual machine and its active services
* Virtual machine stats include memory usage, I/O for each device,
* load average numbers
* Service stats include service memory usage, command that invoked it,
* file descriptors count and other stats (uptime, start time, thread count).
* <p/>
* If interval value is passed it will return differential disk stats: difference between first report (contains stats for the time
* since system startup) and second report (stats collected during the interval since the first report).
*
* @brief Show disk, memory, service statistics of all virtual machines
* @param nodeIds node ids for which stats are collected.
* @param nodeNames node names for which stats are collected.
* @param interval Specifies amount of time in seconds for differential stats.
* @prereq none
* @return Stats response
*/
@GET
@Path("/stats")
@CheckPermission(roles = { Role.SYSTEM_ADMIN, Role.SYSTEM_MONITOR, Role.SECURITY_ADMIN })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
public StatsRestRep getStats(@QueryParam("node_id") List<String> nodeIds,
@QueryParam("interval") int interval,
@QueryParam("node_name") List<String> nodeNames) {
nodeIds=_coordinatorClientExt.combineNodeNamesWithNodeIds(nodeNames,nodeIds);
_log.info("Retrieving stats for nodes. Requested node ids: {}", nodeIds);
StatsRestRep statsRestRep = new StatsRestRep();
List<NodeInfo> nodeInfoList = ClusterNodesUtil.getClusterNodeInfo(nodeIds);
// Validate 'interval'
if (interval < 0) {
throw APIException.badRequests.parameterIsNotValid("interval");
}
RequestParams requestParams = new RequestParams(interval);
Map<String, NodeStats> nodesData = NodeDataCollector.getDataFromNodes
(nodeInfoList, INTERNAL_NODE_STATS_URI,
Action.POST, requestParams, NodeStats.class, null);
statsRestRep.getNodeStatsList().addAll(nodesData.values());
return statsRestRep;
}
/**
* Internal method to get node health.
*
* @return Node health response
*/
@GET
@Path("/internal/node-health")
@Produces({ MediaType.APPLICATION_JSON })
public NodeHealth getNodeHealth() {
_log.info("Retrieving node health");
String nodeId = _coordinatorClientExt.getMyNodeId();
String nodeName = _coordinatorClientExt.getMyNodeName();
return getNodeHealth(nodeId,nodeName, getNodeIP(nodeId),
ServicesMetadata.getRoleServiceNames(_coordinatorClientExt.getNodeRoles()));
}
/**
* Gets health of node and its services.
* <p/>
* Node health status: Good - when node is reachable and all its services are GOOD Unavailable - when node is not reachable Degraded -
* when node is reachable and any of its service is Unavailable/Degraded Node/syssvc Unavailable - when node is down or syssvc is not
* Unavailable on the node
* <p/>
* Service health status: Good - when a service is up and running Unavailable - when a service is not running but is registered in
* coordinator Restarted - when service is restarting
*
* @brief Show service health of all virtual machines
* @param nodeIds node ids for which health stats are collected.
* @param nodeNames node names for which health stats are collected.
* @prereq none
* @return Health response.
*/
@GET
@Path("/health")
@CheckPermission(roles = { Role.SYSTEM_ADMIN, Role.SYSTEM_MONITOR, Role.SECURITY_ADMIN })
@Produces({MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON})
public HealthRestRep getHealth(@QueryParam("node_id") List<String> nodeIds, @QueryParam("node_name") List<String> nodeNames) {
HealthRestRep healthRestRep = new HealthRestRep();
List<NodeHealth> nodehealthList = healthRestRep.getNodeHealthList();
nodeIds=_coordinatorClientExt.combineNodeNamesWithNodeIds(nodeNames,nodeIds);
//Collecting data from all nodes
List<NodeInfo> nodeInfoList = ClusterNodesUtil.getClusterNodeInfo(nodeIds);
Map<String, NodeHealth> nodesData = NodeDataCollector.getDataFromNodes
(nodeInfoList, INTERNAL_NODE_HEALTH_URI,
Action.GET, null, NodeHealth.class, null);
nodehealthList.addAll(nodesData.values());
String thisNodeId = _coordinatorClientExt.getMyNodeId();
if (thisNodeId.equals("standalone")) {
return healthRestRep;
}
Map<String, DualInetAddress> ipLookupTable = _coordinatorClientExt.getCoordinatorClient().getInetAddessLookupMap()
.getControllerNodeIPLookupMap();
// get all nodes if the input param is empty
if (nodeIds == null || nodeIds.isEmpty()) {
int clusterNodeCount = _coordinatorClientExt.getNodeCount();
nodeIds = new ArrayList<>();
for (int i = 1; i <= clusterNodeCount; i++) {
String nodeId = "vipr" + i;
nodeIds.add(nodeId);
}
}
// Adding health for nodes that are not returned
for (String nodeId : nodeIds) {
DualInetAddress ip = ipLookupTable.get(nodeId);
if (!nodesData.containsKey(nodeId)) {
String nodeName = _coordinatorClientExt.getPropertyInfo().getProperty("node_"+nodeId.replace("vipr","")+"_name");
nodehealthList.add(new NodeHealth(nodeId,nodeName,ip.toString(), Status.NODE_OR_SYSSVC_UNAVAILABLE.toString()));
}
}
return healthRestRep;
}
/**
* Get results of diagtool shell script for all virtual machines in a ViPR
* controller appliance. Also gives test details when verbose option
* is set.
*
* @brief Get diagtool script results
* @param nodeIds node ids for which diagnostic results are collected.
* @param nodeNames node names for which diagnostic results are collected.
* @param verbose when set to "1" will run command with -v option.
* @prereq none
* @return Returns diagnostic test results.
*/
@GET
@Path("/diagnostics")
@CheckPermission(roles = { Role.SYSTEM_ADMIN, Role.SYSTEM_MONITOR })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
public DiagnosticsRestRep getDiagnostics(@QueryParam("node_id") List<String> nodeIds,
@QueryParam("verbose") String verbose,
@QueryParam("node_name") List<String> nodeNames) {
_log.info("Initiating diagnostics test for nodes");
nodeIds=_coordinatorClientExt.combineNodeNamesWithNodeIds(nodeNames,nodeIds);
boolean isVerbose = ("1".equals(verbose)) ? true : false;
DiagRequestParams diagRequestParams = new DiagRequestParams(isVerbose);
DiagnosticsRestRep diagnosticsRestRep = new DiagnosticsRestRep();
List<NodeInfo> nodeInfoList = ClusterNodesUtil.getClusterNodeInfo(nodeIds);
Map<String, NodeDiagnostics> nodesData = NodeDataCollector.getDataFromNodes
(nodeInfoList, INTERNAL_NODE_DIAGNOSTICS_URI,
Action.POST, diagRequestParams, NodeDiagnostics.class, null);
String allocationResult = _checker.getNodeResourceAllocationCheckResult();
DiagTest allocationTest = new DiagTest("Resource allocation", allocationResult, new ArrayList<TestParam>());
for (Map.Entry<String, NodeDiagnostics> entry : nodesData.entrySet()) {
List<DiagTest> diagTests = entry.getValue().getDiagTests();
diagTests.add(allocationTest);
entry.getValue().setDiagTests(diagTests);
}
diagnosticsRestRep.getNodeDiagnosticsList().addAll(nodesData.values());
return diagnosticsRestRep;
}
/**
* Internal method that gets results of diagtool for each node.
*
* @param requestParams Contains verbose option for diagtool
* @return Returns node diagnostics
*/
@POST
@Path("/internal/node-diagnostics")
@Produces({ MediaType.APPLICATION_JSON })
public NodeDiagnostics getNodeDiagnostics(DiagRequestParams requestParams) {
String nodeId = _coordinatorClientExt.getMyNodeId();
String nodeName = _coordinatorClientExt.getMyNodeName();
_log.info("Retrieving node diagnostics for node: {}", nodeId);
return new NodeDiagnostics(nodeId, nodeName, getNodeIP(nodeId),
DiagnosticsExec.getDiagToolResults(requestParams.isVerbose()
? DiagConstants.VERBOSE : ""));
}
/**
* Get the current capacity for object, file and block storage.
*
* @brief Show storage capacity
* @prereq none
* @return Storage stats for controller (file & block) and object.
*/
@GET
@Path("/storage")
@CheckPermission(roles = { Role.SYSTEM_ADMIN, Role.SYSTEM_MONITOR })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
public StorageStats getStorageStats() {
_log.info("Getting storage stats");
StorageStats.ControllerStorageStats controllerStats = null;
if (_licenseManager.isProductLicensed(LicenseType.CONTROLLER)) {
ManagedResourcesCapacity resourceCapacities = _licenseManager
.getControllerCapacity();
controllerStats = new StorageStats.ControllerStorageStats();
for (ManagedResourceCapacity cap : resourceCapacities.getResourceCapacityList()) {
switch (cap.getType()) {
case VOLUME:
controllerStats.setBlockCapacityKB(cap.getResourceCapacity() / StatConstants.CAPACITY_CONVERSION_VALUE);
break;
case FILESHARE:
controllerStats.setFileCapacityKB(cap.getResourceCapacity() / StatConstants.CAPACITY_CONVERSION_VALUE);
break;
case POOL:
controllerStats.setFreeManagedCapacityKB(cap.getResourceCapacity() / StatConstants.CAPACITY_CONVERSION_VALUE);
break;
case BUCKET:
controllerStats.setObjectCapacityKB(cap.getResourceCapacity() / StatConstants.CAPACITY_CONVERSION_VALUE);
break;
}
}
}
return new StorageStats(controllerStats);
}
/**
* Returns IP address of the node
*
* @param nodeId node id
* @return IP address
*/
private String getNodeIP(String nodeId) {
Map<String, DualInetAddress> ipLookupTable = _coordinatorClientExt.getCoordinatorClient().getInetAddessLookupMap()
.getControllerNodeIPLookupMap();
DualInetAddress ip = ipLookupTable.get(nodeId);
return ip.toString();
}
/**
* Main method for starting the process to extract proc data from the
* desired Storageos related pids.
*
* @return NodeStats
*/
protected NodeStats getNodeStats(String nodeId,String nodeName, String nodeIP, int interval,
List<String> availableServices) {
try {
_log.info("List of available services: {}", availableServices);
return new NodeStats(nodeId, nodeName, nodeIP, ProcStats.getLoadAvgStats(),
ProcStats.getMemoryStats(), ProcStats.getDataDiskStats(),
NodeStatsExtractor.getServiceStats(availableServices),
NodeStatsExtractor.getDiskStats(interval));
} catch (Exception e) {
_log.error("Internal error occurred while getting node stats. {}", e);
_log.debug(ExceptionUtils.getStackTrace(e));
throw APIException.internalServerErrors.getObjectError("node stats", e);
}
}
/**
* Method that returns node and it services health.
*
* @return NodeHealth
*/
protected NodeHealth getNodeHealth(String nodeId, String nodeName, String nodeIP,
List<String> availableServices) {
try {
_log.info("List of available services: {}", availableServices);
String nodeStatus = Status.GOOD.toString();
List<ServiceHealth> serviceHealthList = NodeHealthExtractor.getServiceHealth
(NodeStatsExtractor.getServiceStats(availableServices), _coordinatorClientExt.getCoordinatorClient(), nodeId);
for (ServiceHealth serviceHealth : serviceHealthList) {
if (Status.UNAVAILABLE.toString().equals(serviceHealth.getStatus())
|| Status.DEGRADED.toString().equals(serviceHealth.getStatus())) {
nodeStatus = Status.DEGRADED.toString();
break;
}
}
return new NodeHealth(nodeId, nodeName, nodeIP, nodeStatus, serviceHealthList);
} catch (Exception e) {
_log.error("Internal error occurred while getting node health. {}", e);
_log.debug(ExceptionUtils.getStackTrace(e));
throw APIException.internalServerErrors.getObjectError("health for node " +
nodeId, e);
}
}
/**
* Get node hard ware info
*
* @param nodeId
* @return
*/
private NodeHardwareInfoRestRep getNodeHardWareInfo(String nodeId) {
try {
Map<NodeHardwareInfoType, Float> hardwareInfos = new HashMap<NodeHardwareInfoType, Float>();
hardwareInfos.put(NodeHardwareInfoType.CPUCOUNT, (float) ProcStats.getCPUCount());
hardwareInfos.put(NodeHardwareInfoType.CPUFREQ, ProcStats.getCPUFrequence());
hardwareInfos.put(NodeHardwareInfoType.MEMORY, (float) ProcStats.getMemoryStats().getMemTotal());
hardwareInfos.put(NodeHardwareInfoType.DISK, (float) getNodeDiskAmount());
return new NodeHardwareInfoRestRep(nodeId, getNodeIP(nodeId), hardwareInfos);
} catch (Exception e) {
_log.error("Internal error occurred while getting node hardware info. {}", e);
_log.debug(ExceptionUtils.getStackTrace(e));
throw APIException.internalServerErrors.getObjectError("node hardware info", e);
}
}
private long getNodeDiskAmount() {
DataDiskStats dataDiskStats = ProcStats.getDataDiskStats();
long rootDiskAmount = dataDiskStats.getRootAvailKB() +
dataDiskStats.getRootUsedKB();
long dataDiskAmount = dataDiskStats.getDataAvailKB() +
dataDiskStats.getDataUsedKB();
return (rootDiskAmount + dataDiskAmount);
}
}