package com.linkedin.databus2.core.container.monitoring.mbean;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.io.IOException;
import java.io.OutputStream;
import java.util.Hashtable;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.locks.Lock;
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;
import org.apache.avro.io.JsonEncoder;
import org.apache.avro.specific.SpecificDatumWriter;
import com.linkedin.databus.core.monitoring.mbean.AbstractMonitoringMBean;
import com.linkedin.databus2.core.container.monitoring.events.ContainerStatsEvent;
/*
* Some of the seemingly odd method-naming in this class (and related classes)
* is due to heuristics elsewhere that sniff for "count" or "num" and use that
* to distinguish between RRD's "counter" and "gauge" types (i.e., aggregated
* vs. instantaneous values). In particular, where "rate" might normally imply
* some sort of time-based value, here it's used as a label for instantaneous
* (non-aggregated) count values of threads.
*
* TODO: Change aggregation variable/method names and heuristics to use
* "counter" instead? Maybe change instantaneous values to use "gauge", too?
*/
public class ContainerStats extends AbstractMonitoringMBean<ContainerStatsEvent>
implements ContainerStatsMBean
{
private final ThreadPoolExecutor _ioThreadPool;
private final ThreadPoolExecutor _workerThreadPool;
public ContainerStats(int containerId, boolean enabled, boolean threadSafe,
ContainerStatsEvent initData, ThreadPoolExecutor ioThreadPool,
ThreadPoolExecutor workerThreadPool)
{
super(enabled, threadSafe, initData);
_event.containerId = containerId;
_ioThreadPool = ioThreadPool;
_workerThreadPool = workerThreadPool;
reset();
}
@Override
public long getErrorCount()
{
Lock readLock = acquireReadLock();
try
{
return _event.errorCount;
}
finally
{
releaseLock(readLock);
}
}
@Override
public long getErrorUncaughtCount()
{
Lock readLock = acquireReadLock();
try
{
return _event.errorUncaughtCount;
}
finally
{
releaseLock(readLock);
}
}
@Override
public long getErrorRequestProcessingCount()
{
Lock readLock = acquireReadLock();
try
{
return _event.errorRequestProcessingCount;
}
finally
{
releaseLock(readLock);
}
}
@Override
public long getTimestampLastResetMs()
{
Lock readLock = acquireReadLock();
try
{
return _event.timestampLastResetMs;
}
finally
{
releaseLock(readLock);
}
}
@Override
public long getTimeSinceLastResetMs()
{
Lock readLock = acquireReadLock();
try
{
return System.currentTimeMillis() - _event.timestampLastResetMs;
}
finally
{
releaseLock(readLock);
}
}
@Override
public int getIoThreadMax()
{
Lock readLock = acquireReadLock();
try
{
return _event.ioThreadMax;
}
finally
{
releaseLock(readLock);
}
}
@Override
public int getIoThreadRate()
{
Lock writeLock = acquireWriteLock();
try
{
_event.ioThreadRate = null != _ioThreadPool ? _ioThreadPool.getActiveCount() : -1;
_event.ioThreadMax = Math.max(_event.ioThreadRate, _event.ioThreadMax);
return _event.ioThreadRate;
}
finally
{
releaseLock(writeLock);
}
}
@Override
public long getIoTaskCount()
{
Lock writeLock = acquireWriteLock();
try
{
if (null != _ioThreadPool)
{
long taskCount = _ioThreadPool.getTaskCount();
long taskDelta = taskCount - _event.ioTaskCount;
// Don't update max on the very first call; it will be artificially high
// if there was a long delay between startup and the first call.
if (_event.ioTaskCount > 0)
{
_event.ioTaskMax = (int)Math.max(taskDelta, _event.ioTaskMax);
}
_event.ioTaskCount = taskCount;
}
return _event.ioTaskCount;
}
finally
{
releaseLock(writeLock);
}
}
@Override
public int getIoTaskMax()
{
Lock readLock = acquireReadLock();
try
{
return _event.ioTaskMax;
}
finally
{
releaseLock(readLock);
}
}
// We expect this metric to be zero virtually all the time since the time tasks
// wait in the queue is vanishingly small in comparison to the update (call)
// rate of the metric. (Or so we believe, anyway. To add timing instrumentation,
// one would need to subclass BlockingQueue and track the entry and exit of each
// item in the queue.)
@Override
public int getIoTaskQueueSize()
{
Lock writeLock = acquireWriteLock();
try
{
if (null != _ioThreadPool)
{
_event.ioTaskQueueSize = _ioThreadPool.getQueue().size();
}
return _event.ioTaskQueueSize; // FIXME: prefer to return -1 if no _ioThreadPool?
}
finally
{
releaseLock(writeLock);
}
}
@Override
public int getWorkerThreadMax()
{
Lock readLock = acquireReadLock();
try
{
return _event.workerThreadMax;
}
finally
{
releaseLock(readLock);
}
}
@Override
public int getWorkerThreadRate()
{
Lock writeLock = acquireWriteLock();
try
{
_event.workerThreadRate = _workerThreadPool.getActiveCount();
_event.workerThreadMax = Math.max(_event.workerThreadRate, _event.workerThreadMax);
return _event.workerThreadRate;
}
finally
{
releaseLock(writeLock);
}
}
@Override
public long getWorkerTaskCount()
{
Lock writeLock = acquireWriteLock();
try
{
long taskCount = _workerThreadPool.getTaskCount();
long taskDelta = taskCount - _event.workerTaskCount;
// Don't update max on the very first call; it will be artificially high
// if there was a long delay between startup and the first call.
if (_event.workerTaskCount > 0)
{
_event.workerTaskMax = (int)Math.max(taskDelta, _event.workerTaskMax);
}
_event.workerTaskCount = taskCount;
return _event.workerTaskCount;
}
finally
{
releaseLock(writeLock);
}
}
@Override
public int getWorkerTaskMax()
{
Lock readLock = acquireReadLock();
try
{
return _event.workerTaskMax;
}
finally
{
releaseLock(readLock);
}
}
// We expect this metric to be zero virtually all the time since the time tasks
// wait in the queue is vanishingly small in comparison to the update (call)
// rate of the metric. (Or so we believe, anyway. To add timing instrumentation,
// one would need to subclass BlockingQueue and track the entry and exit of each
// item in the queue.)
@Override
public int getWorkerTaskQueueSize()
{
Lock writeLock = acquireWriteLock();
try
{
_event.workerTaskQueueSize = _workerThreadPool.getQueue().size();
return _event.workerTaskQueueSize;
}
finally
{
releaseLock(writeLock);
}
}
public void registerError(Throwable error)
{
Lock writeLock = acquireWriteLock();
try
{
++_event.errorCount;
//TODO Move the RequestProcessingException class to databus-container/core and use instanceof (DDSDBUS-104)
if (error.getClass().getSimpleName().equals("RequestProcessingException"))
{
++_event.errorRequestProcessingCount;
}
else
{
++_event.errorUncaughtCount;
}
}
finally
{
releaseLock(writeLock);
}
}
@Override
public JsonEncoder createJsonEncoder(OutputStream out) throws IOException
{
return new JsonEncoder(_event.getSchema(), out);
}
@Override
public ObjectName generateObjectName() throws MalformedObjectNameException
{
Hashtable<String, String> mbeanProps = generateBaseMBeanProps();
mbeanProps.put("containerId", Integer.toString(_event.containerId));
return new ObjectName(AbstractMonitoringMBean.JMX_DOMAIN, mbeanProps);
}
@Override
protected void resetData()
{
_event.timestampLastResetMs = System.currentTimeMillis();
_event.ioThreadRate = 0;
_event.ioThreadMax = 0;
_event.ioTaskCount = 0;
_event.ioTaskMax = 0;
_event.ioTaskQueueSize = 0;
_event.workerThreadRate = 0;
_event.workerThreadMax = 0;
_event.workerTaskCount = 0;
_event.workerTaskMax = 0;
_event.workerTaskQueueSize = 0;
_event.errorCount = 0;
_event.errorRequestProcessingCount = 0;
_event.errorUncaughtCount = 0;
}
@Override
protected void cloneData(ContainerStatsEvent event)
{
event.containerId = _event.containerId;
event.timestampLastResetMs = _event.timestampLastResetMs;
event.ioThreadRate = _event.ioThreadRate;
event.ioThreadMax = _event.ioThreadMax;
event.ioTaskCount = _event.ioTaskCount;
event.ioTaskMax = _event.ioTaskMax;
event.ioTaskQueueSize = _event.ioTaskQueueSize;
event.workerThreadRate = _event.workerThreadRate;
event.workerThreadMax = _event.workerThreadMax;
event.workerTaskCount = _event.workerTaskCount;
event.workerTaskMax = _event.workerTaskMax;
event.workerTaskQueueSize = _event.workerTaskQueueSize;
event.errorCount = _event.errorCount;
event.errorRequestProcessingCount = _event.errorRequestProcessingCount;
event.errorUncaughtCount = _event.errorUncaughtCount;
}
@Override
protected ContainerStatsEvent newDataEvent()
{
return new ContainerStatsEvent();
}
@Override
protected SpecificDatumWriter<ContainerStatsEvent> getAvroWriter()
{
return new SpecificDatumWriter<ContainerStatsEvent>(ContainerStatsEvent.class);
}
@Override
protected void doMergeStats(Object eventData)
{
if (! (eventData instanceof ContainerStatsEvent))
{
LOG.warn("Attempt to merge unknown event class: " + eventData.getClass().getName());
return;
}
ContainerStatsEvent e = (ContainerStatsEvent)eventData;
/** Allow use negative relay IDs for aggregation across multiple relays */
if (_event.containerId > 0 && e.containerId != _event.containerId)
{
LOG.warn("Attempt to data for a different relay " + e.containerId);
return;
}
_event.ioThreadRate = e.ioThreadRate;
_event.ioThreadMax = Math.max(e.ioThreadMax, _event.ioThreadMax);
_event.ioTaskCount += e.ioTaskCount;
_event.ioTaskMax = Math.max(e.ioTaskMax, _event.ioTaskMax);
_event.ioTaskQueueSize = e.ioTaskQueueSize;
_event.workerThreadRate = e.workerThreadRate;
_event.workerThreadMax = Math.max(e.workerThreadMax, _event.workerThreadMax);
_event.workerTaskCount += e.workerTaskCount;
_event.workerTaskMax = Math.max(e.workerTaskMax, _event.workerTaskMax);
_event.workerTaskQueueSize = e.workerTaskQueueSize;
_event.errorCount += e.errorCount;
_event.errorRequestProcessingCount += e.errorRequestProcessingCount;
_event.errorUncaughtCount += e.errorUncaughtCount;
}
}