/**
* Helios, OpenSource Monitoring
* Brought to you by the Helios Development Group
*
* Copyright 2007, Helios Development Group and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*
*/
package org.helios.apmrouter.catalog;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.helios.apmrouter.collections.ConcurrentLongSlidingWindow;
import org.helios.apmrouter.collections.ConcurrentLongSortedSet;
import org.helios.apmrouter.collections.LongSlidingWindow;
import org.helios.apmrouter.server.ServerComponentBean;
import org.helios.apmrouter.util.SystemClock;
import org.springframework.jmx.export.annotation.ManagedMetric;
import org.springframework.jmx.support.MetricType;
/**
* <p>Title: MetricLastTimeSeenService</p>
* <p>Description: Service that maintains a skip-list map of metric-ids and the last time-stamp they were seen.
* Intended to support event broadcasts when metrics go stale.</p>
* <p>Company: Helios Development Group LLC</p>
* @author Whitehead (nwhitehead AT heliosdev DOT org)
* <p><code>org.helios.apmrouter.catalog.MetricLastTimeSeenService</code></p>
*/
public class MetricLastTimeSeenService extends ServerComponentBean implements Runnable {
/** A map of second timestamps keyed by the metric-id */
protected final ConcurrentSkipListMap<Integer, ConcurrentLongSortedSet> metricTimestampMap = new ConcurrentSkipListMap<Integer, ConcurrentLongSortedSet>();
/** The stale metric event handler */
protected StaleMetricEventProcessor staleEventProcessor = null;
/** The stale threshold level in seconds. Default is {@link #DEFAULT_STALE_THRESHOLD} */
protected int staleThreshold = DEFAULT_STALE_THRESHOLD;
/** The stale window size in seconds. Last seen entries are added into a sliding window of buckets of this size.
* Larger window sizes offer beter performance at the cost of stale detection precision. Default is {@link #DEFAULT_STALE_THRESHOLD} */
protected int windowSize = DEFAULT_WINDOW_SIZE;
/** The scheduler to perform scans */
protected final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1, new ThreadFactory(){
final AtomicInteger serial = new AtomicInteger(0);
@Override
public Thread newThread(Runnable r) {
Thread t = new Thread(r, "LastTimeSeenScanThread#" + serial.incrementAndGet());
t.setDaemon(true);
return t;
}
});
/** The handle of the scheduled task */
protected ScheduledFuture<?> taskHandle = null;
/** The default stale threshold level in seconds */
public static int DEFAULT_STALE_THRESHOLD = 90;
/** The default windoiws in seconds */
public static int DEFAULT_WINDOW_SIZE = 15;
/**
* {@inheritDoc}
* @see org.helios.apmrouter.server.ServerComponentBean#doStart()
*/
@Override
protected void doStart() throws Exception {
taskHandle = scheduler.scheduleWithFixedDelay(this, windowSize, windowSize, TimeUnit.SECONDS);
super.doStart();
}
/**
* {@inheritDoc}
* @see org.helios.apmrouter.server.ServerComponentBean#doStop()
*/
@Override
protected void doStop() {
if(taskHandle!=null) {
taskHandle.cancel(true);
}
}
/**
* The scheduler callback
*/
@Override
public void run() {
scan();
}
/**
* Updates the metric timestamp map when a metric is seen.
* @param metricIds The ids of the metrics to add
* @param timestamp The time the metric was seen as a UTC long timestamo
*/
public void onMetricSeen(long timestamp, long...metricIds) {
try {
SystemClock.startTimer();
int timeInSecs = (int)TimeUnit.SECONDS.convert(timestamp, TimeUnit.MILLISECONDS);
int timeWindow = timeInSecs + timeInSecs%windowSize;
ConcurrentLongSortedSet metrics = metricTimestampMap.get(timeWindow);
if(metrics==null) {
synchronized(metricTimestampMap) {
metrics = metricTimestampMap.get(timeWindow);
if(metrics==null) {
metrics = new ConcurrentLongSortedSet();
metricTimestampMap.put(timeWindow, metrics);
}
}
}
metrics.add(metricIds);
} finally {
lastSeenTimesNs.insert(SystemClock.endTimer().elapsedNs);
}
}
/**
* Clears metric Ids from older time windows when a metric is seen now
* @param timeWindow The time window in which a metric was most recently seen
* @param metricIds The metric Ids that were seen in the passed time window
*/
protected void clearOlderEntries(int timeWindow, long...metricIds) {
long startTime = System.nanoTime();
for(Entry<Integer, ConcurrentLongSortedSet> entry: metricTimestampMap.headMap(timeWindow).entrySet()) {
entry.getValue().remove(metricIds);
}
long elapsedTime = System.nanoTime()-startTime;
coeTimesNs.insert(elapsedTime);
}
/**
* Scans for the oldest window in {@link #metricTimestampMap}.
* If the window is stale, stale events will be broadcast for all the assoiated metric Ids, and the window will be removed.
* If the oldest window is not stale, returns without action.
*/
public void scan() {
debug("Scanning for stale windows");
try {
SystemClock.startTimer();
int staleTime = (int)(SystemClock.currentClock().unixTime()-staleThreshold);
Entry<Integer, ConcurrentLongSortedSet> entry = metricTimestampMap.ceilingEntry(staleTime);
if(entry== null) {
debug("No stale window found");
return;
}
int lastSeen = entry.getKey();
metricTimestampMap.remove(lastSeen);
ConcurrentLongSortedSet staleIds = entry.getValue();
for(int i = 0; i < staleIds.size(); i++) {
long metricId = staleIds.get(i);
if(staleEventProcessor!=null) {
staleEventProcessor.onStaleMetric(metricId, lastSeen);
}
}
incr("StaleMetricEvents", staleIds.size());
} finally {
scanTimesNs.insert(SystemClock.endTimer().elapsedNs);
}
}
/**
* {@inheritDoc}
* @see org.helios.apmrouter.server.ServerComponent#getSupportedMetricNames()
*/
@Override
public Set<String> getSupportedMetricNames() {
Set<String> metrics = new HashSet<String>(super.getSupportedMetricNames());
metrics.add("StaleMetricEvents");
return metrics;
}
/** Sliding windows of scan times in ns. */
protected final LongSlidingWindow scanTimesNs = new ConcurrentLongSlidingWindow(15);
/** Sliding windows of last seen processing times in ns. */
protected final LongSlidingWindow lastSeenTimesNs = new ConcurrentLongSlidingWindow(15);
/** Sliding windows of clearing older entries (COE) processing times in ns. */
protected final LongSlidingWindow coeTimesNs = new ConcurrentLongSlidingWindow(15);
/**
* Returns the last scan time in ns.
* @return the last scan time in ns.
*/
@ManagedMetric(category="LastScanTimeNs", metricType=MetricType.GAUGE, description="The last scan time in ns.")
public long getLastScanTimeNs() {
return scanTimesNs.isEmpty() ? -1L : scanTimesNs.get(0);
}
/**
* Returns the rolling average scan time in ns.
* @return the rolling average scan time in ns.
*/
@ManagedMetric(category="AverageScanTimeNs", metricType=MetricType.GAUGE, description="The rolling average scan time in ns.")
public long getAverageScanTimeNs() {
return scanTimesNs.isEmpty() ? -1L : scanTimesNs.avg();
}
/**
* Returns the last last-seen processing time in ns.
* @return the last last-seen processing time in ns.
*/
@ManagedMetric(category="LastLastSeenTimeNs", metricType=MetricType.GAUGE, description="The last last-seen processing time in ns.")
public long getLastLastSeenTimeNs() {
return lastSeenTimesNs.isEmpty() ? -1L : lastSeenTimesNs.get(0);
}
/**
* Returns the rolling average last-seen processing time in ns.
* @return the rolling average last-seen processing time in ns.
*/
@ManagedMetric(category="AverageLastSeenTimeNs", metricType=MetricType.GAUGE, description="The rolling average last-seen processing time in ns.")
public long getAverageLastSeenTimeNs() {
return lastSeenTimesNs.isEmpty() ? -1L : lastSeenTimesNs.avg();
}
/**
* Returns the last coe processing time in ns.
* @return the last coe processing time in ns.
*/
@ManagedMetric(category="LastCOETimeNs", metricType=MetricType.GAUGE, description="The last coe processing time in ns.")
public long getLastCOETimeNs() {
return coeTimesNs.isEmpty() ? -1L : coeTimesNs.get(0);
}
/**
* Returns the rolling average coe processing time in ns.
* @return the rolling average coe processing time in ns.
*/
@ManagedMetric(category="AverageCOETimeNs", metricType=MetricType.GAUGE, description="The rolling average coe processing time in ns.")
public long getAverageCOETimeNs() {
return coeTimesNs.isEmpty() ? -1L : coeTimesNs.avg();
}
}