/**
* Helios, OpenSource Monitoring
* Brought to you by the Helios Development Group
*
* Copyright 2007, Helios Development Group and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*
*/
package org.helios.apmrouter.router;
import java.lang.Thread.UncaughtExceptionHandler;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.helios.apmrouter.metric.IMetric;
import org.helios.apmrouter.server.ServerComponentBean;
import org.springframework.jmx.export.annotation.ManagedAttribute;
import org.springframework.jmx.export.annotation.ManagedMetric;
import org.springframework.jmx.support.MetricType;
/**
* <p>Title: MetricConflationService</p>
* <p>Description: Service to conflate incoming metrics</p>
* <p>Company: Helios Development Group LLC</p>
* @author Whitehead (nwhitehead AT heliosdev DOT org)
* <p><code>org.helios.apmrouter.router.MetricConflationService</code></p>
*/
public class MetricConflationService extends ServerComponentBean implements Runnable, UncaughtExceptionHandler {
/** The maximum number of metrics that can be pending conflation at a time */
protected int maxQueueSize = 100000;
/** The number of worker threads to spin up to service the queue */
protected int workerThreads = 1;
/** The minimum period of time each metric remains in the conflation queue, unless is it conflated into another metric */
protected long conflationPeriod = 1000;
/** The pattern router */
protected PatternRouter router = null;
/** The conflation queue */
protected final ConcurrentSkipListMap<Long, Map<String, IMetric>> conflationQueue = new ConcurrentSkipListMap<Long, Map<String, IMetric>>();
/** Indicates if the threads should be running */
protected boolean keepRunning = false;
/** The thread group containing the worker threads */
protected ThreadGroup threadGroup = new ThreadGroup("MetricConflationService");
/** Id factory for the threads */
protected final AtomicInteger serial = new AtomicInteger(0);
@Override
protected void doStart() throws Exception {
super.doStart();
keepRunning = true;
for(int i = 0; i < workerThreads; i++) {
Thread t = new Thread(threadGroup, this, "MetricConflationThread#" + serial.incrementAndGet());
t.setDaemon(true);
t.setUncaughtExceptionHandler(this);
t.start();
}
}
protected void doStop() {
keepRunning = false;
super.doStop();
}
/**
* Enqueues an array of metrics to the conflation queue
* @param imetrics array of metrics to queue
*/
public void queue(IMetric...imetrics) {
if(imetrics==null || imetrics.length==0) return;
long now = System.currentTimeMillis() + conflationPeriod;
Map<String, IMetric> map = conflationQueue.get(now);
if(map==null) {
synchronized(conflationQueue) {
map = conflationQueue.get(now);
if(map==null) {
map = new ConcurrentHashMap<String, IMetric>();
conflationQueue.put(now, map);
}
}
}
for(IMetric metric: imetrics) {
if(!metric.getType().isLong()) {
router.queue(metric);
incr("MetricsForwarded");
}
IMetric alreadyQueued = map.get(metric.getFQN());
if(alreadyQueued==null) {
synchronized(map) {
alreadyQueued = map.get(metric.getFQN());
if(alreadyQueued!=null) {
alreadyQueued.conflate(metric);
incr("MetricsConflated");
} else {
if(conflationQueue.size() >= maxQueueSize) {
incr("MetricsDropped");
} else {
map.put(metric.getFQN(), metric);
incr("MetricsQueued");
}
}
}
}
}
}
/**
* Returns the number of periods in the queue
* @return the number of periods in the queue
*/
@ManagedAttribute
public int getQueueSize() {
return conflationQueue.size();
}
/**
* {@inheritDoc}
* @see java.lang.Runnable#run()
*/
public void run() {
while(keepRunning) {
try {
long now = System.currentTimeMillis();
Map<Long, Map<String, IMetric>> clearMetrics = conflationQueue.tailMap(now, true);
Map<String, IMetric> forwards = new HashMap<String, IMetric>();
for(Map.Entry<Long, Map<String, IMetric>> entry: clearMetrics.entrySet()) {
Map<String, IMetric> map = conflationQueue.remove(entry.getKey());
if(map!=null) {
for(Map.Entry<String, IMetric> fentry: map.entrySet()) {
IMetric existing = forwards.get(fentry.getKey());
if(existing!=null) {
try {
existing.conflate(fentry.getValue());
} catch (Exception ex) {
incr("ConflationErrors");
//error("Failed to conflate.\n\tThis metric:", existing, "\n\tThat metric:", fentry.getValue());
}
incr("MetricsConflated");
continue;
}
forwards.put(fentry.getKey(), fentry.getValue());
}
}
}
if(!forwards.isEmpty()) {
router.queue(forwards.values());
incr("MetricsForwarded", forwards.size());
forwards.clear();
}
Thread.currentThread().join(50);
} catch (Exception e) {
e.printStackTrace(System.err);
}
}
}
/**
* Returns the maximum number of metrics that can be pending conflation at a time
* @return the maximum number of metrics that can be pending conflation at a time
*/
public int getMaxQueueSize() {
return maxQueueSize;
}
/**
* Sets the maximum number of metrics that can be pending conflation at a time
* @param maxQueueSize the maximum number of metrics that can be pending conflation at a time
*/
public void setMaxQueueSize(int maxQueueSize) {
this.maxQueueSize = maxQueueSize;
}
/**
* Returns the number of worker threads to spin up to service the queue
* @return the number of worker threads to spin up to service the queue
*/
public int getWorkerThreads() {
return workerThreads;
}
/**
* Sets the number of worker threads to spin up to service the queue
* @param workerThreads the number of worker threads to spin up to service the queue
*/
public void setWorkerThreads(int workerThreads) {
this.workerThreads = workerThreads;
}
/**
* Returns the minimum period of time in ms each metric remains in the conflation queue, unless is it conflated into another metric
* @return the minimum period of time in ms each metric remains in the conflation queue, unless is it conflated into another metric
*/
public long getConflationPeriod() {
return conflationPeriod;
}
/**
* Sets the minimum period of time in ms each metric remains in the conflation queue, unless is it conflated into another metric
* @param conflationPeriod the minimum period of time in ms each metric remains in the conflation queue, unless is it conflated into another metric
*/
public void setConflationPeriod(long conflationPeriod) {
this.conflationPeriod = conflationPeriod;
}
/**
* Sets the pattern router where metrics are forwarded to
* @param router the pattern router
*/
public void setRouter(PatternRouter router) {
this.router = router;
}
/**
* Returns the number of metrics forwarded from ConflationService
* @return the number of metrics forwarded from ConflationService
*/
@ManagedMetric(category="ConflationService", metricType=MetricType.COUNTER, description="the number of metrics forwarded from ConflationService")
public long getMetricsForwarded() {
return getMetricValue("MetricsForwarded");
}
/**
* Returns the number of metrics that were dropped because ConflationService was backlogged
* @return the number of metrics that were dropped because ConflationService was backlogged
*/
@ManagedMetric(category="ConflationService", metricType=MetricType.COUNTER, description="the number of metrics that were dropped because ConflationService was backlogged")
public long getMetricsDropped() {
return getMetricValue("MetricsDropped");
}
/**
* Returns the number of metrics that were conflated
* @return the number of metrics that were conflated
*/
@ManagedMetric(category="ConflationService", metricType=MetricType.COUNTER, description="the number of metrics that were conflated")
public long getMetricsConflated() {
return getMetricValue("MetricsConflated");
}
/**
* Returns the number of metrics that were queued
* @return the number of metrics that were queued
*/
@ManagedMetric(category="ConflationService", metricType=MetricType.COUNTER, description="the number of metrics that were queued")
public long getMetricsQueued() {
return getMetricValue("MetricsQueued");
}
/**
* Returns the number of metric conflation errors
* @return the number of metric conflation errors
*/
@ManagedMetric(category="ConflationService", metricType=MetricType.COUNTER, description="the number of metric conflation errors")
public long getConflationErrors() {
return getMetricValue("ConflationErrors");
}
/**
* {@inheritDoc}
* @see org.helios.apmrouter.server.ServerComponent#getSupportedMetricNames()
*/
@Override
public Set<String> getSupportedMetricNames() {
Set<String> _metrics = new HashSet<String>(super.getSupportedMetricNames());
_metrics.add("MetricsQueued");
_metrics.add("MetricsForwarded");
_metrics.add("MetricsDropped");
_metrics.add("MetricsConflated");
_metrics.add("ConflationErrors");
return _metrics;
}
@Override
public void uncaughtException(Thread t, Throwable e) {
error("Failure in worker thread [" + t + "]", e);
}
}