/*******************************************************************************
* ===========================================================
* Ankush : Big Data Cluster Management Solution
* ===========================================================
*
* (C) Copyright 2014, by Impetus Technologies
*
* This is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL v3) as
* published by the Free Software Foundation;
*
* This software is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
******************************************************************************/
/**
*
*/
package com.impetus.ankush2.db;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.impetus.ankush.AppStore;
import com.impetus.ankush.AppStoreWrapper;
import com.impetus.ankush2.constant.Constant;
import com.impetus.ankush.common.domain.Cluster;
import com.impetus.ankush.common.domain.Event;
import com.impetus.ankush.common.domain.Event.Severity;
import com.impetus.ankush.common.domain.Event.Type;
import com.impetus.ankush.common.domain.EventHistory;
import com.impetus.ankush.common.domain.HAService;
import com.impetus.ankush.common.domain.Node;
import com.impetus.ankush.common.domain.NodeMonitoring;
import com.impetus.ankush.common.domain.Role;
import com.impetus.ankush.common.domain.User;
import com.impetus.ankush.common.mail.MailManager;
import com.impetus.ankush.common.mail.MailMsg;
import com.impetus.ankush.common.service.GenericManager;
import com.impetus.ankush.common.service.MonitoringManager;
import com.impetus.ankush.common.service.UserManager;
import com.impetus.ankush2.framework.config.AlertsConf;
import com.impetus.ankush2.framework.config.ThresholdConf;
import com.impetus.ankush2.ha.HAServiceExecutor;
import com.impetus.ankush2.logger.AnkushLogger;
/**
* It generates the alerts and events for the node monitoring.
*
* @author hokam
*
*/
public class DBEventManager {
/** The Constant SLASH_N. */
private static final String SLASH_N = "\n";
/** The Constant COLON. */
private static final String COLON = " : ";
// Event manager.
/** The event manager. */
private GenericManager<Event, Long> eventManager = AppStoreWrapper
.getManager(Constant.Manager.EVENT, Event.class);
// Event manager.
/** The event manager. */
private GenericManager<EventHistory, Long> eventHistoryManager = AppStoreWrapper
.getManager(Constant.Manager.EVENTHISTORY, EventHistory.class);
// Event manager.
/** The role manager. */
private GenericManager<Role, Long> roleManager = AppStoreWrapper
.getManager(Constant.Manager.ROLE, Role.class);
// user manager
/** The user manager. */
private UserManager userManager = AppStoreWrapper.getService("userManager",
UserManager.class);
// Ankush logger.
/** The log. */
private static AnkushLogger LOG = new AnkushLogger(DBEventManager.class);
// formating the usage value to 0 decimal number
/** The f. */
private static DecimalFormat f = new DecimalFormat("##");
/**
* Save event.
*
* @param event
* the event
* @return the event
*/
private Event saveEvent(Event event) {
// save event
event = eventManager.save(event);
// save event history.
EventHistory eventHistory = new EventHistory();
eventHistory.setEvent(event);
eventHistory.setEventId(event.getId());
eventHistory.setClusterId(event.getClusterId());
eventHistoryManager.save(eventHistory);
return event;
}
public Map<String, Object> getPropertyMap(Long clusterId, String node,
Type type, String category, String name, Severity severity) {
Map<String, Object> propMap = new HashMap<String, Object>();
if (clusterId != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.CLUSTERID,
clusterId);
}
if (node != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.HOST, node);
}
if (type != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.TYPE, type);
}
if (category != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.CATEGORY,
category);
}
if (name != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.NAME, name);
}
if (severity != null) {
propMap.put(com.impetus.ankush2.constant.Constant.Keys.SEVERITY,
severity);
}
return propMap;
}
private List<Event> getEvents(Map<String, Object> propMap) {
try {
return eventManager.getAllByPropertyValue(propMap,
com.impetus.ankush2.constant.Constant.Keys.CATEGORY,
com.impetus.ankush2.constant.Constant.Keys.NAME,
com.impetus.ankush2.constant.Constant.Keys.SEVERITY);
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
return new ArrayList<Event>();
}
public Event getEvent(Long clusterId, String node, Type type,
String category, String name, Severity severity) {
try {
return eventManager.getByPropertyValueGuarded(getPropertyMap(
clusterId, node, type, category, name, severity));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
return null;
}
/**
* Method to get the cluster event history.
*
* @param clusterId
* the cluster id
* @return the all events
*/
public List<Event> getEventsSummary(Long clusterId, int start,
int maxResults) {
// getting the events history of the cluster.
List<Event> events = eventManager.getAllByPropertyValue(
com.impetus.ankush2.constant.Constant.Keys.CLUSTERID,
clusterId, start, maxResults, "-date");
// returning events.
return events;
}
public List<Event> getEvents(Long clusterId, String node, Type type,
String category, String name, Severity severity) {
return getEvents(getPropertyMap(clusterId, node, type, category, name,
severity));
}
public List<Event> getAlerts(Long clusterId, String node, Type type,
String category, String name) {
try {
// creating disjunc map.
List<Map<String, Object>> disMap = new ArrayList<Map<String, Object>>();
disMap.add(getPropertyMap(clusterId, node, type, category, name,
Severity.CRITICAL));
disMap.add(getPropertyMap(clusterId, node, type, category, name,
Severity.WARNING));
return eventManager.getAllByDisjunctionveNormalQuery(disMap,
com.impetus.ankush2.constant.Constant.Keys.CATEGORY,
com.impetus.ankush2.constant.Constant.Keys.NAME,
com.impetus.ankush2.constant.Constant.Keys.SEVERITY);
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
return new ArrayList<Event>();
}
public Map<Event.Severity, Integer> getEventsCountBySeverity(Long clusterId) {
Map<Event.Severity, Integer> severityCount = new HashMap<Event.Severity, Integer>();
Map<String, Object> propMap = new HashMap<String, Object>();
try {
// Get CRITICAL count
propMap.put(com.impetus.ankush2.constant.Constant.Keys.CLUSTERID,
clusterId);
propMap.put(com.impetus.ankush2.constant.Constant.Keys.SEVERITY,
Event.Severity.CRITICAL);
severityCount.put(Event.Severity.CRITICAL,
eventManager.getAllByPropertyValueCount(propMap));
// Get WARNING count
propMap.put(com.impetus.ankush2.constant.Constant.Keys.SEVERITY,
Event.Severity.WARNING);
severityCount.put(Event.Severity.WARNING,
eventManager.getAllByPropertyValueCount(propMap));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
return severityCount;
}
/**
* Method to generate event and alerts for monitoring.
*
* @param nodeMonitoring
* the node monitoring
*/
public void checkAlertsForUsage(final String host, final Long clusterId,
final NodeMonitoring nodeMonitoring) {
try {
AppStoreWrapper.getExecutor().execute(new Runnable() {
@Override
public void run() {
Cluster cluster = new DBClusterManager()
.getCluster(clusterId);
// Check for stable cluster.
if (!isDeployedOrAdded(cluster)) {
return;
}
// Process CPU usage
if (nodeMonitoring != null
&& nodeMonitoring.getMonitoringInfo() != null
&& nodeMonitoring.getMonitoringInfo()
.getUptimeInfos() != null) {
// getting current cpu usage value.
Double cpuUsage = nodeMonitoring.getMonitoringInfo()
.getUptimeInfos().get(0).getCpuUsage();
// process the usage alerts.
processUsageAlert(host, cluster, cpuUsage,
Constant.Alerts.Metric.CPU);
}
// Process Memory usage
if (nodeMonitoring.getMonitoringInfo() != null
&& nodeMonitoring.getMonitoringInfo()
.getMemoryInfos() != null) {
// getting current memory usage value.
Double memoryUsage = nodeMonitoring.getMonitoringInfo()
.getMemoryInfos().get(0).getUsedPercentage();
// process the memory usage alerts.
processUsageAlert(host, cluster, memoryUsage,
Constant.Alerts.Metric.MEMORY);
}
}
});
} catch (Exception e) {
LOG.error(e.getMessage());
}
}
/**
* Process HAServices
*
* Start HAServices if it is down. Stop schedule of HAService if it is up.
*
* @param clusterId
* @param host
* @param component
* @param serviceStatus
*/
private void processHAServices(Long clusterId, String host,
String component, Map<String, Boolean> serviceStatus) {
for (String service : serviceStatus.keySet()) {
if (serviceStatus.get(service)) {
HAServiceExecutor.removeHAService(clusterId, host, component,
service);
} else {
HAService haService = new DBHAServiceManager().getHAService(
clusterId, host, component, service);
if (haService != null) {
HAServiceExecutor.addHAService(haService, host);
}
}
}
}
public void checkAlertsForService(final String host, final Long clusterId,
final HashMap<String, Map<String, Boolean>> agentServiceStatus) {
try {
AppStoreWrapper.getExecutor().execute(new Runnable() {
@Override
public void run() {
Cluster cluster = new DBClusterManager()
.getCluster(clusterId);
if (!isDeployedOrAdded(cluster)) {
return;
}
// iterating over the map of services.
for (String technology : agentServiceStatus.keySet()) {
// Process HAServices
processHAServices(clusterId, host, technology,
agentServiceStatus.get(technology));
// Process alerts
processServiceAlert(host, cluster, technology,
agentServiceStatus.get(technology));
}
}
});
} catch (Exception e) {
LOG.error(e.getMessage());
}
}
/**
* Method to check the either the cluster/node is deployed/added or not.
*
* @param node
* the node
* @param cluster
* the cluster
* @return true, if is deployed or added
*/
private boolean isDeployedOrAdded(Cluster cluster) {
if (cluster.getState().equals(Constant.Cluster.State.ERROR)
|| cluster.getState().equals(Constant.Cluster.State.DEPLOYING)
|| cluster.getState().equals(Constant.Cluster.State.REMOVING)) {
return false;
}
return true;
}
/**
* To generate events and alerts for service.
*
* @param node
* the node
* @param cluster
* the cluster
* @param status
* the status
* @param service
* the service
*/
private void processServiceAlert(String host, Cluster cluster,
String category, Map<String, Boolean> serviceStatus) {
for (String service : serviceStatus.keySet()) {
// calculating current severity.
Severity severity = serviceStatus.get(service) ? Event.Severity.NORMAL
: Event.Severity.CRITICAL;
// current value of the service.
String value = serviceStatus.get(service) ? com.impetus.ankush2.constant.Constant.Keys.UP
: com.impetus.ankush2.constant.Constant.Keys.DOWN;
processAlert(cluster, host, Event.Type.SERVICE, category, service,
severity, value, null);
}
}
private void processAlert(Cluster dbCluster, String host, Event.Type type,
String category, String name, Event.Severity severity,
String value, String thresholdValue) {
// last event object.
Event lastEvent = getEvent(null, host, null, category, name, null);
// process first non normal event and existing event with different
// severity
if ((lastEvent == null && !severity.equals(Event.Severity.NORMAL))
|| (lastEvent != null && !lastEvent.getSeverity().equals(
severity))) {
// create event object.
Event event = populateEvent(dbCluster.getId(), host, type,
category, name, severity, value, thresholdValue, lastEvent);
event = saveEvent(event);
// drop a mail.
sendMail(dbCluster, event);
return;
}
}
/**
* Method to save the event
*
* @param node
* @param cluster
* @param currentValue
* @param type
* @param subType
* @param eventName
* @param currentSeverity
* @param groupingType
* @param lastEvent
* @return
*/
private Event populateEvent(Long clusterId, String host, Event.Type type,
String category, String name, Event.Severity severity,
String value, String thresholdValue, Event lastEvent) {
Event event = new Event();
if (lastEvent != null) {
event.setId(lastEvent.getId());
}
event.setClusterId(clusterId);
event.setHost(host);
event.setType(type);
event.setCategory(category);
event.setName(name);
event.setSeverity(severity);
event.setValue(value);
event.setThresholdValue(thresholdValue);
event.setDate(new Date());
return event;
}
/**
* Process usage alert.
*
* @param node
* the node
* @param cluster
* the cluster
* @param value
* the usage value
* @param name
* the metric name
*/
private void processUsageAlert(String host, Cluster cluster, Double value,
String name) {
// Getting threshold values for the metric.
ThresholdConf threshold = getThresholdConf(cluster, name);
// Getting current severity.
Event.Severity severity = getCurrentSeverity(value, threshold);
String thresholdValue = getThresholdValue(threshold, severity);
// current usage value.
String strValue = f.format(value).toString();
processAlert(cluster, host, Event.Type.USAGE,
com.impetus.ankush2.constant.Constant.Component.Name.AGENT,
name, severity, strValue, thresholdValue);
}
public List getGroupbyCount(Long clusterId, String host, Event.Type type,
String component, String name, Event.Severity severity) {
try {
StringBuilder sb = new StringBuilder();
// Count select query
sb.append("select e.type, e.category, e.name, e.severity, count(e) from Event e ");
Map<String, Object> propMap = getPropertyMap(clusterId, host, type,
component, name, severity);
// Create where clause
if (!propMap.isEmpty()) {
StringBuilder where = new StringBuilder();
for (String key : propMap.keySet()) {
if (where.length() > 0) {
where.append("and ");
}
where.append(key).append("=");
if (key.equals(com.impetus.ankush2.constant.Constant.Keys.CLUSTERID)) {
where.append(propMap.get(key)).append(" ");
} else if (key
.equals(com.impetus.ankush2.constant.Constant.Keys.TYPE)) {
where.append(((Event.Type) propMap.get(key)).ordinal())
.append(" ");
} else if (key
.equals(com.impetus.ankush2.constant.Constant.Keys.SEVERITY)) {
where.append(
((Event.Severity) propMap.get(key)).ordinal())
.append(" ");
} else {
where.append("\"").append(propMap.get(key))
.append("\" ");
}
}
sb.append("where ").append(where);
}
// Add group by
sb.append("group by e.type, e.category, e.name, e.severity");
return eventManager.getCustomQuery(sb.toString());
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
return null;
}
private String getThresholdValue(ThresholdConf threshold, Severity severity) {
if (threshold == null || severity.equals(Event.Severity.NORMAL)) {
return null;
}
if (severity.equals(Event.Severity.CRITICAL)) {
return threshold.getAlertLevel().toString();
}
return threshold.getWarningLevel().toString();
}
/**
* To send e-mail.
*
* @param event
* the event
* @param to
* the to
*/
private void sendMail(Cluster dbCluster, Event event) {
LOG.debug("Sending a mail");
// creating mail message object.
MailMsg message = new MailMsg();
message.setTo(getEmailList(dbCluster));
message.setSubject(event.getSubject());
message.setMessage(getDescription(event, dbCluster.getName()));
message.setContentType("text/plain");
// Getting mail manager.
MailManager mm = AppStoreWrapper.getMailManager();
if (mm != null) {
// Sending mail.
mm.sendSystemMail(message);
}
}
/**
* Method to get description of message.
*
* @param event
* the event
* @param clustername
* the clustername
* @return the description
*/
private String getDescription(Event event, String clustername) {
// creating the description message for the event.
StringBuilder string = new StringBuilder();
string.append("Cluster Name").append(COLON).append(clustername)
.append(SLASH_N);
string.append(event.getName()).append(COLON).append(event.getValue())
.append(SLASH_N);
string.append("Host").append(COLON).append(event.getHost())
.append(SLASH_N);
string.append("Created At").append(COLON)
.append(event.getDate().toString()).append(" ").append(SLASH_N);
return string.toString();
}
/**
* To get the current severity value.
*
* @param currentValue
* the current value
* @param threshold
* the threshold
* @return the current severity
*/
private Event.Severity getCurrentSeverity(Double value,
ThresholdConf threshold) {
if (threshold != null && threshold.getAlertLevel() != null
&& value >= threshold.getAlertLevel()) {
return Event.Severity.CRITICAL;
}
if (threshold != null && threshold.getWarningLevel() != null
&& value >= threshold.getWarningLevel()) {
return Event.Severity.WARNING;
}
return Event.Severity.NORMAL;
}
/**
* Methdo to get the threshold values for the cluster of giver metric.
*
* @param cluster
* the cluster
* @param metricName
* the metric name
* @return the threshold conf
*/
private ThresholdConf getThresholdConf(Cluster cluster, String metricName) {
// Getting alerts conf.
AlertsConf alertsConf = cluster.getAlertsConf();
if (alertsConf == null) {
return null;
}
// iterating over the all threshold values.
for (ThresholdConf thresholdConf : alertsConf.getThresholds()) {
// If metric name is same then return the threshold conf.
if (thresholdConf.getMetricName().equalsIgnoreCase(metricName)) {
return thresholdConf;
}
}
return null;
}
/**
* Method to get email list of the cluster.
*
* @param cluster
* the cluster
* @return the email list
*/
private String getEmailList(Cluster cluster) {
// Getting alerts conf.
AlertsConf alertsConf = cluster.getAlertsConf();
String emailList = "";
if (alertsConf == null) {
return null;
}
// getting administrator mailing list
if (alertsConf.isInformAllAdmins()) {
Role role = roleManager.getByPropertyValue(
com.impetus.ankush2.constant.Constant.Keys.NAME,
Constant.User.Role.ROLE_SUPER_USER);
List<User> users = userManager.getUsersByRole(role);
for (User user : users) {
if (user.isEnabled()) {
emailList += user.getEmail() + ";";
}
}
}
// getting configured mailing list
if (alertsConf.getMailingList() != null) {
emailList += alertsConf.getMailingList();
}
return emailList;
}
/**
* To process agent down alerts.
*/
public void processAgentDownAlerts() {
List<Cluster> clusters = new DBClusterManager().getClusters();
MonitoringManager monitoringManager = new MonitoringManager();
for (Cluster cluster : clusters) {
// Check for stable clusters only.
if (!isDeployedOrAdded(cluster)) {
continue;
}
for (Node node : cluster.getNodes()) {
// Check agent down for deployed nodes only.
if (!node.getState().equalsIgnoreCase(
Constant.Node.State.DEPLOYED.toString())) {
continue;
}
NodeMonitoring nodeMonitoring = monitoringManager
.getMonitoringData(node.getId());
if (nodeMonitoring == null) {
// Create new object with default values
nodeMonitoring = new NodeMonitoring();
nodeMonitoring.setNodeId(node.getId());
nodeMonitoring.setUpdateTime(new Date());
nodeMonitoring
.setTechnologyServiceStatus(new HashMap<String, Map<String, Boolean>>());
monitoringManager.save(nodeMonitoring);
} else {
long dateDiff = new Date().getTime()
- nodeMonitoring.getUpdateTime().getTime();
if (!DBServiceManager.getManager().isAgentDown(
node.getPublicIp())
&& (dateDiff > (Integer) AppStore
.getObject(com.impetus.ankush2.constant.Constant.Keys.AGENT_DOWN_INTERVAL))) {
// Set Agent is down and update database.
Map<String, Boolean> status = new HashMap<String, Boolean>();
HashMap<String, Map<String, Boolean>> serviceMap = new HashMap<String, Map<String, Boolean>>();
status.put(
com.impetus.ankush2.constant.Constant.Role.AGENT,
false);
serviceMap
.put(com.impetus.ankush2.constant.Constant.Component.Name.AGENT,
status);
nodeMonitoring.setTechnologyServiceStatus(serviceMap);
nodeMonitoring.setUpdateTime(new Date());
monitoringManager.save(nodeMonitoring);
// Save into service table
DBServiceManager
.getManager()
.setStatus(
cluster.getId(),
node.getPublicIp(),
com.impetus.ankush2.constant.Constant.Component.Name.AGENT,
com.impetus.ankush2.constant.Constant.Role.AGENT,
null, null, false);
// Process alert
processAlert(
cluster,
node.getPublicIp(),
Event.Type.SERVICE,
com.impetus.ankush2.constant.Constant.Component.Name.AGENT,
com.impetus.ankush2.constant.Constant.Role.AGENT,
Severity.CRITICAL,
com.impetus.ankush2.constant.Constant.Keys.DOWN,
null);
}
}
}
}
}
/**
* Update event sub types.
*
* @param cluster
* the cluster
*/
public void updateEventSubTypes(Cluster cluster) {
// iterate over the nodes.
MonitoringManager monitoringManager = new MonitoringManager();
for (Node node : cluster.getNodes()) {
// node monitoring object.
NodeMonitoring nodeMonitoring = monitoringManager
.getMonitoringData(node.getId());
// check for node monitoring.
checkAlertsForUsage(node.getPublicIp(), cluster.getId(),
nodeMonitoring);
}
}
/**
* Method to check the agent down alerts.
*
* @param cluster
* @return
*/
public boolean isAnyAgentDown(Cluster cluster) {
Event event = getEvent(cluster.getId(), null, Type.SERVICE,
com.impetus.ankush2.constant.Constant.Component.Name.AGENT,
com.impetus.ankush2.constant.Constant.Role.AGENT,
Severity.CRITICAL);
return event != null;
}
}