/************************************************************************* * Copyright 2009-2014 Eucalyptus Systems, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. * * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need * additional information or have any questions. * * This file may incorporate work covered under the following copyright * and permission notice: * * Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights * Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF * ANY KIND, either express or implied. See the License for the specific * language governing permissions and limitations under the License. ************************************************************************/ package com.eucalyptus.cloudwatch.workflow.alarms; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.TreeMap; import com.eucalyptus.cloudwatch.common.config.CloudWatchConfigProperties; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmEntity; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmHistory; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmManager; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmState; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmUtils; import com.eucalyptus.cloudwatch.common.internal.domain.metricdata.MetricUtils; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.log4j.Logger; import com.eucalyptus.bootstrap.Bootstrap; import com.eucalyptus.cloudwatch.common.CloudWatchBackend; import com.eucalyptus.cloudwatch.common.internal.domain.alarms.AlarmEntity.StateValue; import com.eucalyptus.cloudwatch.common.internal.domain.metricdata.MetricManager; import com.eucalyptus.cloudwatch.common.internal.domain.metricdata.MetricStatistics; import com.eucalyptus.component.Topology; public class AlarmStateEvaluationWorker implements Runnable { private Collection<AlarmEntity> alarmEntities; private static final Logger LOG = Logger.getLogger(AlarmStateEvaluationWorker.class); public AlarmStateEvaluationWorker(Collection<AlarmEntity> alarmEntities) { super(); this.alarmEntities = alarmEntities; } @Override public void run() { try { if (!CloudWatchConfigProperties.isDisabledCloudWatchService() && Bootstrap.isOperational() && Topology.isEnabledLocally(CloudWatchBackend.class)) { Date evaluationDate = new Date(); Map<AlarmEntity, AlarmState> currentStates = evaluateStates(alarmEntities); Map<String, AlarmState> statesToUpdate = Maps.newHashMap(); List<AlarmHistory> historyList = Lists.newArrayList(); for (AlarmEntity alarmEntity : currentStates.keySet()) { AlarmState currentState = currentStates.get(alarmEntity); if (currentState.getStateValue() != alarmEntity.getStateValue()) { statesToUpdate.put(alarmEntity.getNaturalId(), currentState); historyList.add(AlarmManager.createChangeAlarmStateHistoryItem(alarmEntity, currentState, evaluationDate)); historyList.addAll(AlarmManager.executeActionsAndRecord(alarmEntity, currentState, true, evaluationDate, historyList)); } else if (moreThanOnePeriodHasPassed(alarmEntity, evaluationDate)) { historyList.addAll(AlarmManager.executeActionsAndRecord(alarmEntity, currentState, false, evaluationDate, historyList)); } } AlarmManager.changeAlarmStateBatch(statesToUpdate, evaluationDate); AlarmManager.addAlarmHistoryEvents(historyList); } } catch(Exception e) { LOG.error(e); } } private boolean moreThanOnePeriodHasPassed(AlarmEntity alarmEntity, Date now) { now = MetricUtils.stripSeconds(now); Date then = MetricUtils.stripSeconds(alarmEntity.getLastActionsUpdatedTimestamp()); return now.getTime() - then.getTime() >= 1000L * alarmEntity.getPeriod(); } private Map<AlarmEntity, AlarmState> evaluateStates(Collection<AlarmEntity> alarmEntities) { Map<AlarmEntity, AlarmState> returnValue = Maps.newLinkedHashMap(); Date queryDate = new Date(); Date endDate = MetricUtils.stripSeconds(queryDate); List<MetricManager.GetMetricStatisticsParams> getMetricStatisticsParamses = Lists.newArrayList(); for (AlarmEntity alarmEntity: alarmEntities) { Date startDate = new Date(endDate.getTime() - 1000L * alarmEntity.getPeriod() * alarmEntity.getEvaluationPeriods()); // We put in a slight buffer in addition to the regular window time (two additional periods or 5 minutes, whichever is greater) to delay // insufficient data from going down... Date bufferStartDate = new Date(startDate.getTime() - 1000L * alarmEntity.getPeriod() * numBufferPeriods(alarmEntity.getPeriod())); getMetricStatisticsParamses.add(new MetricManager.GetMetricStatisticsParams(alarmEntity.getAccountId(), alarmEntity.getMetricName(), alarmEntity.getNamespace(), alarmEntity.getDimensionMap(), alarmEntity.getMetricType(), alarmEntity.getUnit(), bufferStartDate, endDate, alarmEntity.getPeriod())); } List<Collection<MetricStatistics>> manyMetricsStatisticsList = MetricManager.getManyMetricStatistics(getMetricStatisticsParamses); int count = 0; for (AlarmEntity alarmEntity: alarmEntities) { Date startDate = new Date(endDate.getTime() - 1000L * alarmEntity.getPeriod() * alarmEntity.getEvaluationPeriods()); // We put in a slight buffer in addition to the regular window time (two additional periods or 5 minutes, whichever is greater) to delay // insufficient data from going down... Date bufferStartDate = new Date(startDate.getTime() - 1000L * alarmEntity.getPeriod() * numBufferPeriods(alarmEntity.getPeriod())); Collection<MetricStatistics> metricStatisticsList = manyMetricsStatisticsList.get(count++); TreeMap<Long, StateAndMetricValue> dataPointMap = new TreeMap<Long, StateAndMetricValue>(); for (long L = bufferStartDate.getTime(); L < endDate.getTime(); L += alarmEntity.getPeriod() * 1000L) { dataPointMap.put(L, new StateAndMetricValue(StateValue.INSUFFICIENT_DATA, null)); } // now populate based on items from the returned values for (MetricStatistics metricStatistics : metricStatisticsList) { Long dateAsLong = metricStatistics.getTimestamp().getTime(); if (!dataPointMap.containsKey(dateAsLong)) { LOG.warn("Data point does not fall in interval, skipping"); } else { dataPointMap.put(dateAsLong, calculateLocalStateAndMetricValue(alarmEntity, metricStatistics)); } } // Rules // 1) If at least one "OK" interval found in previous "evaluationPeriods" intervals, state is OK // 2) If every interval found in previous "evaluationPeriods" intervals is ALARM, state is ALARM // 3) If the oldest interval within the previous "evaluationPeriods" intervals is ALARM, and all // intervals since then are either ALARM or INSUFFICIENT data, an alarm state was entered at // the proper time, and we have not yet seen an OK to clear it, so state is ALARM. // 4) If the oldest interval within the previous "evaluationPeriods" interval is INSUFFICIENT_DATA, // look backwards into the buffer period. Set the state to the last known value. If there are // no known values, set the state to INSUFFICIENT_DATA. In particular, this means if there is // some ALARM data in the later states, do not set it to ALARM yet, as the time threshold has // not yet passed. List<Double> okPoints = new ArrayList<Double>(); List<Double> alarmPoints = new ArrayList<Double>(); List<Double> insufficientDataPoints = new ArrayList<Double>(); LinkedList<Double> relevantDataPoints = new LinkedList<Double>(); // we will add at the beginning sometimes StateValue oldestStateValue = null; for (long L = startDate.getTime(); L < endDate.getTime(); L += alarmEntity.getPeriod() * 1000L) { StateAndMetricValue stateAndMetricValue = dataPointMap.get(L); relevantDataPoints.addLast(stateAndMetricValue.getMetricValue()); // newer ones go at the end? if (oldestStateValue == null) { oldestStateValue = stateAndMetricValue.getStateValue(); } if (stateAndMetricValue.getStateValue() == StateValue.OK) { okPoints.add(stateAndMetricValue.getMetricValue()); } else if (stateAndMetricValue.getStateValue() == StateValue.ALARM) { alarmPoints.add(stateAndMetricValue.getMetricValue()); } else if (stateAndMetricValue.getStateValue() == StateValue.INSUFFICIENT_DATA) { insufficientDataPoints.add(stateAndMetricValue.getMetricValue()); } } // TODO: we really need to get better reasons, but these are like Amazon's reasons for now. AlarmState alarmState; if (okPoints.size() > 0) { alarmState = AlarmManager.createAlarmState(StateValue.OK, okPoints, relevantDataPoints, alarmEntity.getComparisonOperator(), alarmEntity.getThreshold(), alarmEntity.getPeriod(), queryDate, alarmEntity.getStatistic()); // it's ok } else if (oldestStateValue == StateValue.ALARM) { alarmState = AlarmManager.createAlarmState(StateValue.ALARM, alarmPoints, relevantDataPoints, alarmEntity.getComparisonOperator(), alarmEntity.getThreshold(), alarmEntity.getPeriod(), queryDate, alarmEntity.getStatistic()); } else { // go back earlier StateValue lastNonInsufficientDataStateValue = null; for (long L = startDate.getTime() - alarmEntity.getPeriod() * 1000L; L >= bufferStartDate.getTime(); L -= alarmEntity.getPeriod() * 1000L) { StateAndMetricValue stateAndMetricValue = dataPointMap.get(L); relevantDataPoints.addFirst(stateAndMetricValue.getMetricValue()); // older ones go at the beginning? if (stateAndMetricValue.getStateValue() == StateValue.OK) { okPoints.add(stateAndMetricValue.getMetricValue()); lastNonInsufficientDataStateValue = StateValue.OK; break; } else if (stateAndMetricValue.getStateValue() == StateValue.ALARM) { alarmPoints.add(stateAndMetricValue.getMetricValue()); lastNonInsufficientDataStateValue = StateValue.ALARM; break; } } if (lastNonInsufficientDataStateValue == StateValue.OK) { alarmState = AlarmManager.createAlarmState(StateValue.OK, okPoints, relevantDataPoints, alarmEntity.getComparisonOperator(), alarmEntity.getThreshold(), alarmEntity.getPeriod(), queryDate, alarmEntity.getStatistic()); } else if (lastNonInsufficientDataStateValue == StateValue.ALARM) { alarmState = AlarmManager.createAlarmState(StateValue.ALARM, alarmPoints, relevantDataPoints, alarmEntity.getComparisonOperator(), alarmEntity.getThreshold(), alarmEntity.getPeriod(), queryDate, alarmEntity.getStatistic()); } else { alarmState = AlarmManager.createAlarmState(StateValue.INSUFFICIENT_DATA, insufficientDataPoints, relevantDataPoints, alarmEntity.getComparisonOperator(), alarmEntity.getThreshold(), alarmEntity.getPeriod(), queryDate, alarmEntity.getStatistic()); // (TODO: distinguish the case of complete insufficient data // from that of some insufficient data and some alarm, where alarm has not been seen "long enough" } } returnValue.put(alarmEntity, alarmState); } return returnValue; } private StateAndMetricValue calculateLocalStateAndMetricValue( AlarmEntity alarmEntity, MetricStatistics metricStatistics) { Double metricValue = AlarmUtils.calculateMetricValue(alarmEntity.getStatistic(), metricStatistics); StateValue stateValue = AlarmUtils.calculateStateValue(alarmEntity.getThreshold(), alarmEntity.getComparisonOperator(), metricValue); return new StateAndMetricValue(stateValue, metricValue); } private static class StateAndMetricValue { private StateValue stateValue; private Double metricValue; public StateAndMetricValue(StateValue stateValue, Double metricValue) { super(); this.stateValue = stateValue; this.metricValue = metricValue; } public StateValue getStateValue() { return stateValue; } public Double getMetricValue() { return metricValue; } } private Integer numBufferPeriods(Integer period) { // it is the greater of 5 minutes or two periods, but it should be a whole number of periods. Integer periodMinutes = period / 60; if (periodMinutes == 1) return 5; if (periodMinutes == 2) return 3; return 2; } }