/* * Copyright (c) 2013 EMC Corporation * All Rights Reserved */ package com.emc.storageos.security.audit; import java.text.MessageFormat; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.scheduling.concurrent.CustomizableThreadFactory; import com.emc.storageos.services.util.AlertsLogger; /** * Utility class to track various stats regarding requests. This class * is thread safe. */ public class RequestStatTracker { private static final Logger _log = LoggerFactory.getLogger(RequestStatTracker.class); private AlertsLogger _alertLog = AlertsLogger.getAlertsLogger(); private static final int SUSPISCIOUS_RSP_TIME_MS = 5 * 1000; private static final int HIGH_NUMBER_REQUESTS = 10; private static final String STAT_THREAD_NAME = "RequestStats"; private ScheduledExecutorService executor; /** * terminates scheduled threads executor */ public void destroy() { if (executor != null) { executor.shutdown(); _log.debug("Shutting down request stat tracker threads"); } } // COUNTERS AND AVG/SAMPLES ------------------------------------------------------------- // Conventions to help read the code and variable names: // "lst" == last == element as it is being computed so far since the last reset // "prv" == previous == element as it has been computed at the end of the cycle that completed // For example: - #request last minute means == # requests since the last time we reset the // minute counter. This value changes every time a request comes in. // - #request previous minutes means == we reset the minute counter. In the previous // minute, there is now a finite #requests that occurred there. This value is // unchanged until the next reset. (i.e for the first minute of life, this value // will be zero. private AtomicInteger _activeRequests = new AtomicInteger(0); // active requests at any point in time private AtomicInteger _reqsPrevMin = new AtomicInteger(0); private AtomicInteger _reqsLstMin = new AtomicInteger(0); private AtomicInteger _500ErrorsLstHr = new AtomicInteger(0); // # 500 and 503 errors since service startup private ThreadLocal<Long> _currentRequestStartTime = new ThreadLocal<Long>() { @Override protected Long initialValue() { return new Long(0); } }; /** * helper class to hold a running total of all the occurences of the item to average, * the number of samples, and the avg of all the items, computed at the end of the cycle(minute, hour or day) */ private class SampledValues { public float _total = 0; public int _samples = 0; public float _avg = 0; } // request response time avg/min private volatile SampledValues _reqRspPerMin = new SampledValues(); // request response time avg of the (avg/min) in previous hr private volatile SampledValues _reqRspPerMinHr = new SampledValues(); // # requests per minute avg in previous hr private volatile SampledValues _reqPerMinHr = new SampledValues(); // # requests per minute, avg per hour, avg for the previous day private volatile SampledValues _reqPerMinHrDay = new SampledValues(); // min/max response time since last service startup private volatile int _minReqRspMs = 0; private volatile int _maxReqRspMs = 0; private boolean _statsOn = false; private int _displayRateInMins = 15; // every 15 minutes by default /** * Determines how frequently the stats are displayed in the logs. * This is independant from how the stats are captured/computed. * * @param seconds */ public void setDisplayRateInMins(int mins) { _displayRateInMins = mins; } // STATS INIT AND THREADS --------------------------------------------------------------- /** * Turns the stat engine on */ public synchronized void init() { if (_statsOn) { return; } _statsOn = true; // scheduled threads executor = Executors.newSingleThreadScheduledExecutor(new CustomizableThreadFactory(STAT_THREAD_NAME)); executor.scheduleAtFixedRate(new StatLogger(), _displayRateInMins, _displayRateInMins, TimeUnit.MINUTES); // add one second initial delay to all processing jobs to avoid overlaps with // display job. executor.scheduleAtFixedRate(new PerMinuteProcessing(), 61, 60, TimeUnit.SECONDS); executor.scheduleAtFixedRate(new PerHourProcessing(), 3601, 3600, TimeUnit.SECONDS); executor.scheduleAtFixedRate(new PerDayProcessing(), 86401, 86400, TimeUnit.SECONDS); } /** * Main stat logger thread. Will log stats at intervals defined by _displayRateInMls */ private class StatLogger implements Runnable { @Override public void run() { _log.info("BEGIN REQUEST STATS: "); _log.info("# of active requests: {}", _activeRequests.get()); _log.info("# of requests previous minute: {}", _reqsPrevMin.get()); _log.info("# of requests last minute: {}", _reqsLstMin.get()); _log.info("Request avg response time previous minute: {} ms", _reqRspPerMin._avg); _log.info("Request avg response time previous hour: {} ms", _reqRspPerMinHr._avg); _log.info("Min/max response time since startup: {}ms/{}ms", _minReqRspMs, _maxReqRspMs); _log.info("Avg request per minute in the previous hour: {}", _reqPerMinHr._avg); _log.info("Avg request per minute avg per hour in the previous day: {}", _reqPerMinHrDay._avg); _log.info("# of 500 and 503 errors in the last hour: {}", _500ErrorsLstHr.get()); _log.info("END REQUEST STATS: "); } } /** * Thread that performs the following each minute: * - Updates request per minute in the last hour with the latest * count of request per minute * - Resets the requests per minute counter * - Resets the request response time average in the last minute */ private class PerMinuteProcessing implements Runnable { @Override public void run() { _reqsPrevMin.set(_reqsLstMin.get()); updateOrResetReqPerMinLstHr(_reqsPrevMin.get(), false); _reqsLstMin.set(0); updateOrResetReqRspAvgLstMin(0, true); } } /** * Thread that performs the following every hour: * - Resets request response time average in the last hour * - Resets request per minute in the last hour, and updates the request per minute per hour per day counter * - Resets the 500/503 error counter */ private class PerHourProcessing implements Runnable { @Override public void run() { updateOrResetReqRspAvgLstHr(0, true); updateOrResetReqPerMinPerHrLstDay(updateOrResetReqPerMinLstHr(0, true), false); _500ErrorsLstHr.set(0); } } /** * Thread that performs the following every day: * - resets request per minute per hour in the last day */ private class PerDayProcessing implements Runnable { @Override public void run() { updateOrResetReqPerMinPerHrLstDay(0, true); } } // PUBLIC CALLS ------------------------------------------------------------------- /** * Increments the number of active requests. * If the number of active requests gets above HIGH_NUMBER_REQUESTS, logs a warning. * This usually indicates overload or an exception situation that wasn't handled * properly. */ public void incrementActiveRequests() { if (!_statsOn) { return; } if (_activeRequests.incrementAndGet() >= HIGH_NUMBER_REQUESTS) { _alertLog.warn(MessageFormat.format("Number of concurrent requests is high: {0}. Avg response time recently is: {1}ms", _activeRequests, _reqRspPerMin._avg)); } _reqsLstMin.incrementAndGet(); } /** * Decrements the active requests counter. */ public void decrementActiveRequests() { if (!_statsOn) { return; } _activeRequests.decrementAndGet(); } /** * Increment the 500/503 error counter. */ public void flag500Error() { if (!_statsOn) { return; } _500ErrorsLstHr.incrementAndGet(); } /** * returns the number of concurrently active requests. * * @return */ public int getActiveRequests() { return _activeRequests.get(); } /** * returns the number of 500/503 errors in the last hour. * * @return */ public int get500Errors() { return _500ErrorsLstHr.get(); } /** * Saves the start time of the request for the local thread */ public void recordStartTime() { _currentRequestStartTime.set(System.currentTimeMillis()); } /** * Computes the response time of the request that just finished for the local thread * and updates the request response average in the last minute, and propagates to the * request response per minute per hour counter. * Updates min/max response time. * If the response is above SUSPICIOUS_RESP_TIME_MS, log a warning. */ public void recordEndTime() { int msDiff = (int) (System.currentTimeMillis() - _currentRequestStartTime.get()); _log.info("Request response time: {} ms", msDiff); updateOrResetReqRspAvgLstHr(updateOrResetReqRspAvgLstMin(msDiff, false), false); updateMinMaxReqRspMs(msDiff); if (msDiff >= SUSPISCIOUS_RSP_TIME_MS) { _alertLog.warn(MessageFormat.format("Request took an unusually long time to complete: {0}ms", msDiff)); } _currentRequestStartTime.remove(); } // INTERNAL COUNTER COMPUTE METHODS -------------------------------------------------- /** * Updates the min and max response time indicators * * @param val */ private synchronized void updateMinMaxReqRspMs(int val) { if (_minReqRspMs == 0 || _maxReqRspMs == 0) { _minReqRspMs = _minReqRspMs == 0 ? val : _minReqRspMs; _maxReqRspMs = _maxReqRspMs == 0 ? val : _maxReqRspMs; } else if (val < _minReqRspMs) { _minReqRspMs = val; } else if (val > _maxReqRspMs) { _maxReqRspMs = val; } } /** * Utility function that resets a SampledValues internal members to start a new cycle, or * Increments its holding total and samples counter. * * @param val the value to increment * @param reset true to reset, false otherwise (val is ignored then) * @param smpl (the SampledValues object onwhich to perform the update or reset) * @return */ private float updateOrResetSampledValues(float val, boolean reset, SampledValues smpl) { if (!reset) { smpl._total += val; smpl._samples++; } else { // compute the average then reset average if (smpl._samples > 0) { smpl._avg = smpl._total / smpl._samples; smpl._total = 0f; smpl._samples = 0; } else { smpl._avg = 0f; } } return smpl._avg; } /** * Updates or resets request response time per minute * * @param diff response time * @param reset whether we are resetting the counter or not * @return in case of a reset, we are returning the new average */ private synchronized float updateOrResetReqRspAvgLstMin(int diff, boolean reset) { return updateOrResetSampledValues(diff, reset, _reqRspPerMin); } /** * Updates or resets the request response time average per minute in last hour * * @param avg request resp avg per minute * @param reset whether we are resetting the counter or not */ private synchronized void updateOrResetReqRspAvgLstHr(float avg, boolean reset) { updateOrResetSampledValues(avg, reset, _reqRspPerMinHr); } /** * Updates or resets the request per minute in the last hour counter * * @param incr number of requests per minute in the minute that just finished * @param reset whether we are resetting the counter or not * @return the average of request per minute in the previous hour */ private synchronized float updateOrResetReqPerMinLstHr(float incr, boolean reset) { return updateOrResetSampledValues(incr, reset, _reqPerMinHr); } /** * Updates or resets the request per minute per hour in the last day counter * * @param incr number of request per minute avg per hour in the last day * @param reset */ private synchronized void updateOrResetReqPerMinPerHrLstDay(float incr, boolean reset) { updateOrResetSampledValues(incr, reset, _reqPerMinHrDay); } }