/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.alerts; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.ambari.server.AmbariException; import org.apache.ambari.server.actionmanager.ActionManager; import org.apache.ambari.server.actionmanager.RequestStatus; import org.apache.ambari.server.api.query.Query; import org.apache.ambari.server.api.query.QueryImpl; import org.apache.ambari.server.api.query.render.MinimalRenderer; import org.apache.ambari.server.api.resources.ClusterResourceDefinition; import org.apache.ambari.server.api.services.BaseRequest; import org.apache.ambari.server.controller.spi.ClusterController; import org.apache.ambari.server.controller.spi.Resource; import org.apache.ambari.server.controller.utilities.ClusterControllerHelper; import org.apache.ambari.server.orm.dao.HostRoleCommandDAO; import org.apache.ambari.server.orm.dao.RequestDAO; import org.apache.ambari.server.orm.entities.AlertDefinitionEntity; import org.apache.ambari.server.security.authorization.internal.InternalAuthenticationToken; import org.apache.ambari.server.state.Alert; import org.apache.ambari.server.state.AlertState; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.alert.AlertDefinition; import org.apache.ambari.server.state.alert.AlertDefinitionFactory; import org.apache.ambari.server.state.alert.ParameterizedSource.AlertParameter; import org.apache.ambari.server.state.alert.ServerSource; import org.apache.ambari.server.state.services.AmbariServerAlertService; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.security.core.context.SecurityContextHolder; import com.google.inject.Inject; /** * The {@link AmbariPerformanceRunnable} is used by the * {@link AmbariServerAlertService} to ensure that certain areas of Ambari are * responsive. It performs the following checks: * <ul> * <li>A GET request against the cluster endpoint.</li> * <li>A query against {@link HostRoleCommandDAO} to get a summary of request * statuses</li> * <ul> */ public class AmbariPerformanceRunnable extends AlertRunnable { /** * Logger. */ private final static Logger LOG = LoggerFactory.getLogger(AmbariPerformanceRunnable.class); /** * <pre> * Performance Overview: * Database Access (Request By Status): 330ms (OK) * REST API (Cluster Request): 5,456ms (WARNING) * </pre> */ private static final String PERFORMANCE_OVERVIEW_TEMPLATE = "Performance Overview:" + System.lineSeparator() + "{0}"; /** * Example: {@code Database Access (Request By Status): 330ms (OK)} */ private static final String PERFORMANCE_AREA_TEMPLATE = " {0}: {1}ms ({2})"; /** * Example: * {@code Unable to execute performance alert area REQUEST_BY_STATUS (UNKNOWN)} */ private static final String PERFORMANCE_AREA_FAILURE_TEMPLATE = " Unable to execute performance alert area {0}: ({1})"; /** * Used for converting {@link AlertDefinitionEntity} into * {@link AlertDefinition} instances. */ @Inject private AlertDefinitionFactory m_definitionFactory; /** * The {@link PerformanceArea} enumeration represents logical areas of * functionality to test for performance. */ enum PerformanceArea { /** * Query for requests by {@link RequestStatus#IN_PROGRESS}. */ REQUEST_BY_STATUS("Database Access (Request By Status)", "request.by.status.warning.threshold", 3000, "request.by.status.critical.threshold", 5000) { /** * {@inheritDoc} */ @Override void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception { runnable.m_actionManager.getRequestsByStatus(RequestStatus.IN_PROGRESS, BaseRequest.DEFAULT_PAGE_SIZE, false); } }, /** * Query for requests by {@link RequestStatus#IN_PROGRESS}. */ HRC_SUMMARY_STATUS("Database Access (Task Status Aggregation)", "task.status.aggregation.warning.threshold", 3000, "task.status.aggregation.critical.threshold", 5000) { /** * {@inheritDoc} */ @Override void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception { List<Long> requestIds = runnable.m_requestDAO.findAllRequestIds( BaseRequest.DEFAULT_PAGE_SIZE, false); for (long requestId : requestIds) { runnable.m_hostRoleCommandDAO.findAggregateCounts(requestId); } } }, /** * Query through the REST API framework for a cluster. */ REST_API_GET_CLUSTER("REST API (Cluster)", "rest.api.cluster.warning.threshold", 5000, "rest.api.cluster.critical.threshold", 7000) { /** * {@inheritDoc} */ @Override void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception { // Set authenticated user so that authorization checks will pass InternalAuthenticationToken authenticationToken = new InternalAuthenticationToken("admin"); authenticationToken.setAuthenticated(true); SecurityContextHolder.getContext().setAuthentication(authenticationToken); // create the request Map<Resource.Type, String> mapIds = new HashMap<>(); mapIds.put(Resource.Type.Cluster, cluster.getClusterName()); ClusterController clusterController = ClusterControllerHelper.getClusterController(); Query query = new QueryImpl(mapIds, new ClusterResourceDefinition(), clusterController); query.setRenderer(new MinimalRenderer()); query.addProperty("Clusters/desired_configs", null); query.addProperty("Clusters/credential_store_properties", null); query.addProperty("Clusters/desired_service_config_versions", null); query.addProperty("Clusters/health_report", null); query.addProperty("Clusters/total_hosts", null); query.addProperty("alerts_summary", null); query.addProperty("alerts_summary_hosts", null); query.execute(); } }; /** * The label for the performance area. */ private final String m_label; /** * The name of the parameter on the alert definition which represents the * {@link AlertState#WARNING} threshold value. */ private final String m_warningParameter; /** * A default {@link AlertState#WARNING} threshold value of the definition * doesn't have {@link #m_warningParameter} defined. */ private final int m_defaultWarningThreshold; /** * The name of the parameter on the alert definition which represents the * {@link AlertState#CRITICAL} threshold value. */ private final String m_criticalParameter; /** * A default {@link AlertState#WARNING} threshold value of the definition * doesn't have {@link #m_criticalParameter} defined. */ private final int m_defaultCriticalThreshold; /** * Constructor. * * @param label * the display label for this performance area (not {@code null}). * @param warningParameter * the definition parameter name for the warning threshold (not * {@code null}) * @param defaultWarningThreshold * the default value to use if the definition does not have a * warning threshold paramter. * @param criticalParameter * the definition parameter name for the critical threshold (not * {@code null}) * @param defaultCriticalThreshold * the default value to use if the definition does not have a * critical threshold paramter. */ PerformanceArea(String label, String warningParameter, int defaultWarningThreshold, String criticalParameter, int defaultCriticalThreshold) { m_label = label; m_warningParameter = warningParameter; m_defaultWarningThreshold = defaultWarningThreshold; m_criticalParameter = criticalParameter; m_defaultCriticalThreshold = defaultCriticalThreshold; } /** * Runs the {@link PerformanceArea}. * * @param runnable * a reference to the parent {@link AlertRunnable} which has * injected members for use. * @return a result of running the performance area (never {@code null}). */ abstract void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception; } /** * Used for getting the most recent requests. */ @Inject private RequestDAO m_requestDAO; /** * Used for executing queries which are known to potentially take a long time. */ @Inject private HostRoleCommandDAO m_hostRoleCommandDAO; /** * Used for querying for requests by status. */ @Inject private ActionManager m_actionManager; /** * Constructor. * * @param definitionName */ public AmbariPerformanceRunnable(String definitionName) { super(definitionName); } /** * {@inheritDoc} */ @Override List<Alert> execute(Cluster cluster, AlertDefinitionEntity entity) throws AmbariException { // coerce the entity into a business object so that the list of parameters // can be extracted and used for threshold calculation AlertDefinition definition = m_definitionFactory.coerce(entity); ServerSource serverSource = (ServerSource) definition.getSource(); List<AlertParameter> parameters = serverSource.getParameters(); List<String> results = new ArrayList<>(); // start out assuming OK AlertState alertState = AlertState.OK; // run every performance area for (PerformanceArea performanceArea : PerformanceArea.values()) { // execute the performance area, creating an UNKNOWN state on exceptions PerformanceResult performanceResult; try { long startTime = System.currentTimeMillis(); performanceArea.execute(this, cluster); long totalTime = System.currentTimeMillis() - startTime; performanceResult = calculatePerformanceResult(performanceArea, totalTime, parameters); } catch (Exception exception) { String result = MessageFormat.format(PERFORMANCE_AREA_FAILURE_TEMPLATE, performanceArea, AlertState.UNKNOWN); LOG.error(result, exception); performanceResult = new PerformanceResult(result, AlertState.UNKNOWN); } String result = performanceResult.getResult(); AlertState resultAlertState = performanceResult.getAlertState(); // keep track of the string result for formatting later results.add(result); // keep track of the overall state of "this" alert switch (resultAlertState) { case CRITICAL: alertState = AlertState.CRITICAL; break; case OK: break; case SKIPPED: break; case UNKNOWN: if (alertState == AlertState.OK) { alertState = AlertState.UNKNOWN; } break; case WARNING: if (alertState != AlertState.CRITICAL) { alertState = AlertState.WARNING; } break; default: break; } } // create a text overview of all of the runs String allResults = StringUtils.join(results, System.lineSeparator()); String overview = MessageFormat.format(PERFORMANCE_OVERVIEW_TEMPLATE, allResults); // build the alert to return Alert alert = new Alert(entity.getDefinitionName(), null, entity.getServiceName(), entity.getComponentName(), null, alertState); alert.setLabel(entity.getLabel()); alert.setText(overview); alert.setTimestamp(System.currentTimeMillis()); alert.setCluster(cluster.getClusterName()); return Collections.singletonList(alert); } /** * Calculates the state based on the threshold values for a * {@link PerformanceArea} and an actual run time. * * @param area * the area to calculate the result for (not {@code null}). * @param time * the time taken, in milliseconds, to run the test. * @param parameters * a list of parameters from the alert definition which contain the * threshold values. * @return a result of running the performance area (never {@code null}). */ PerformanceResult calculatePerformanceResult(PerformanceArea area, long time, List<AlertParameter> parameters) { AlertState alertState = AlertState.OK; int warningThreshold = area.m_defaultWarningThreshold; int criticalThreshold = area.m_defaultCriticalThreshold; for (AlertParameter parameter : parameters) { Object value = parameter.getValue(); if (StringUtils.equals(parameter.getName(), area.m_warningParameter)) { warningThreshold = getThresholdValue(value, warningThreshold); } if (StringUtils.equals(parameter.getName(), area.m_criticalParameter)) { criticalThreshold = getThresholdValue(value, criticalThreshold); } } if (time >= warningThreshold && time < criticalThreshold) { alertState = AlertState.WARNING; } if (time >= criticalThreshold) { alertState = AlertState.CRITICAL; } String resultLabel = MessageFormat.format(PERFORMANCE_AREA_TEMPLATE, area.m_label, time, alertState); return new PerformanceResult(resultLabel, alertState); } /** * The {@link PerformanceResult} class is used to wrap the result of a * {@link PerformanceArea}. */ private static final class PerformanceResult { private final String m_result; private final AlertState m_alertState; /** * Constructor. * * @param result * the text of the result (not {@code null}). * @param alertState * the result state (not {@code null}). */ private PerformanceResult(String result, AlertState alertState) { m_result = result; m_alertState = alertState; } /** * Gets the fully-rendered result text, such as: * {@code Database Access (Request By Status): 330ms (OK)} * * @return the result */ public String getResult() { return m_result; } /** * The state of the result as calculated by the threshold parameters. * * @return the state */ public AlertState getAlertState() { return m_alertState; } } }