/*
* RHQ Management Platform
* Copyright (C) 2005-2008 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.rhq.enterprise.communications.command.server;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.rhq.enterprise.communications.command.CommandResponse;
/**
* Contains the statistics collected by the {@link CommandProcessor}.
*
* The processor object is the only object that can update metric data
* held in this object and it will do so in a thread-safe manner.
* Objects that read data from this object automatically do so in a thread-safe manner,
* but the callers may get inconsistent data if the processor updates data
* in between calling multiple getters - but this isn't dangerous
* so we'll leave it as is (the individual getters makes it easy to expose
* this information as individual metrics so we can graph them).
* But the locking done here helps minimize those instances where metric data
* looks inconsistent to callers.
*
* @author John Mazzitelli
*/
public class CommandProcessorMetrics implements Serializable {
private static final long serialVersionUID = 1L;
/**
* The total number of incoming commands this command processor has received and successfully processed.
*/
long numberSuccessfulCommands = 0L;
/**
* The total number of commands that were received but were not successfully processed due to an error.
*/
long numberFailedCommands = 0L;
/**
* The total number of commands that were not permitted to execute due to high concurrency.
*/
long numberDroppedCommands = 0L;
/**
* The total number of commands that were not permitted to execute due to processing suspension.
*/
long numberNotProcessedCommands = 0L;
/**
* The average time (in milliseconds) that successful commands take to complete.
*/
long averageExecutionTime = 0L;
/**
* Call time data for individual command types (or subtypes if remote pojo executions).
*/
private Map<String, Calltime> calltimes = new HashMap<String, Calltime>();
/**
* The lock that will ensure thread-safety.
*/
private ReadWriteLock lock = new ReentrantReadWriteLock();
/**
* Resets all the metric data to 0 and clears the calltime data.
*/
public void clear() {
// not thread-safe, because we might ask for this information in the middle of clearing
// everything so we might get half of these to read 0 but the other half their old values
writeLock();
try {
numberSuccessfulCommands = 0L;
numberFailedCommands = 0L;
numberDroppedCommands = 0L;
numberNotProcessedCommands = 0L;
averageExecutionTime = 0L;
calltimes.clear();
} finally {
writeUnlock();
}
}
/**
* Return the calltime data that includes the different command types/pojo invocations.
* Note that the calltime min/max/avg times are only for the calls that were successful.
*
* @return calltime data
*/
public Map<String, Calltime> getCallTimeData() {
// just do a shallow copy - this allows us to avoid exceptions when we want to concurrently
// traverse the calltimes and add to them but it avoids unnecesary duplication of MinMaxAvg
// objects (thus we are more efficient both in performance and space). Note that MinMaxAvg
// is allowed to be concurrently accessed - even though its possible you could try to get
// its data while we are writing new min/max/avg/count values. Even though the data won't be
// entirely accurate and up-to-date in that case, its good enough for our purposes.
readLock();
try {
return new HashMap<String, Calltime>(calltimes);
} finally {
readUnlock();
}
}
/**
* Returns the total number of commands that were received but failed to be processed succesfully. This count is
* incremented when a command was executed by its command service but the command response was
* {@link CommandResponse#isSuccessful() not successful}. This does not count
* {@link #getNumberDroppedCommands() dropped} or
* {@link #getNumberNotProcessedCommands() unprocessed} commands.
*
* @return count of failed commands
*/
public long getNumberFailedCommands() {
readLock();
try {
return numberFailedCommands;
} finally {
readUnlock();
}
}
/**
* Returns the total number of commands that were received but were not permitted to be executed and were dropped.
* This normally occurs when the limit of concurrent command invocations has been reached.
*
* @return count of commands not permitted to complete
*/
public long getNumberDroppedCommands() {
readLock();
try {
return numberDroppedCommands;
} finally {
readUnlock();
}
}
/**
* Returns the total number of commands that were received but were not processed.
* This normally occurs when global processing of commands has been suspended.
*
* @return count of commands not processed.
*/
public long getNumberNotProcessedCommands() {
readLock();
try {
return numberNotProcessedCommands;
} finally {
readUnlock();
}
}
/**
* Returns the total number of commands that were received and processed succesfully. This count is incremented when
* a command was executed by its command service and the command response was
* {@link CommandResponse#isSuccessful() succesful}.
*
* @return count of commands succesfully processed
*/
public long getNumberSuccessfulCommands() {
readLock();
try {
return numberSuccessfulCommands;
} finally {
readUnlock();
}
}
/**
* Returns the average execution time (in milliseconds) it took to execute all
* {@link #getNumberSuccessfulCommands() successful commands}.
*
* @return average execute time for all successful commands.
*/
public long getAverageExecutionTime() {
readLock();
try {
return averageExecutionTime;
} finally {
readUnlock();
}
}
/**
* Add a newly collected metric value for a particular type of invocation to
* the stored calltime data. This will update the min/max/avg data, but only
* if this represents a succesful call (i.e. <code>failure</code> is <code>false</code>).
* We do not want to skew the min/max/avg results from failed commands because
* they almost always fail-fast and will have very fast execution times.
*
* This is packaged-scoped because only the CommandProcessor should be
* adding calltime data to this object. It will ensure thread-safety
* just like it ensures thread-safety when updating the other metric data.
*
* @param type the type of invocation whose min/max/avg is to be stored
* @param executionTime the time, in milliseconds, that the type invocation took
* @param unsuccessfulReason will be non-null if the invocation that was executed
* actually resulted in a failure - the reason for the unsuccessful invocation
* is the value of the parameter. This will be <code>null</code> if
* the invocation succeeded
*/
void addCallTimeData(String type, long executionTime, UnsuccessfulReason unsuccessfulReason) {
Calltime calltime = calltimes.get(type);
if (calltime == null) {
calltime = new Calltime();
calltimes.put(type, calltime);
}
calltime.count++;
if (unsuccessfulReason != null) {
if (unsuccessfulReason == UnsuccessfulReason.DROPPED) {
calltime.dropped++;
} else if (unsuccessfulReason == UnsuccessfulReason.NOT_PROCESSED) {
calltime.unprocessed++;
} else {
calltime.failures++;
}
} else {
if (executionTime > calltime.max) {
calltime.max = executionTime;
}
if (executionTime < calltime.min) {
calltime.min = executionTime;
}
long successes = calltime.getSuccesses();
calltime.avg = (((successes - 1) * calltime.avg) + executionTime) / successes;
}
return;
}
/**
* The CommandProcessor must call this prior to updating the metric data in this object.
*
* This is packaged scoped so only the CommandProcessor can call it.
*
* @return if <code>true</code> the write lock was acquired and must be unlocked
* if <code>false</code>, the write lock failed to be aquired
*/
boolean writeLock() {
// we try to be good stewards of this object by trying to be thread safe
// but do not block the processor here for too long - we don't want to
// prevent the processor from processing messages for too long just to
// synchronize metric data. If we timeout or are interrupted, return immediately.
// Note also that we don't want to throw any exceptions to the caller, try our
// best to be fault tolerant in here.
try {
return lock.writeLock().tryLock(10, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
// don't wait any longer, just return immediately
} catch (Exception e) {
lock = new ReentrantReadWriteLock(); // something really bad happened, let's create a new one to be safe
}
return false;
}
/**
* CommandProcessor needs to call this to unlock the write lock.
*
* This is packaged scoped so only the CommandProcessor can call it.
*/
void writeUnlock() {
try {
lock.writeLock().unlock();
} catch (Exception e) {
// Note that we don't want to throw any exceptions to the caller, try our
// best to be fault tolerant in here. This exception occurred probably because
// the caller didn't have the lock due to a timeout or interrupt in writeLock.
}
}
private boolean readLock() {
// we try to be good stewards of this object by trying to be thread safe
// but if we can't get the lock, just return and let's read the data
// unlocked. Nothing dangerous will happen, at worst we might read
// inconsistent metric data for this thread, nothing too serious to worry about.
try {
return lock.readLock().tryLock(30, TimeUnit.SECONDS);
} catch (Exception e) {
}
return false;
}
private void readUnlock() {
try {
lock.readLock().unlock();
} catch (Exception e) {
// Note that we don't want to throw any exceptions to the caller, try our
// best to be fault tolerant in here. This exception occurred probably because
// the caller didn't have the lock due to a timeout or interrupt in readLock.
}
}
public enum UnsuccessfulReason {
/** the command encountered an error that caused it to fail */
FAILED,
/** the server is currently under high load and dropped the command */
DROPPED,
/** the server is in maintenance mode and not currently accepting any commands */
NOT_PROCESSED
}
/**
* Used to store the minimum, maximum and average times (in milliseconds)
* for invocations to a particular command. The count of the number
* of times an invocation was executed is also kept. Note that the min/max/avg
* times will only be for successful commands.
*/
public class Calltime implements Serializable {
private static final long serialVersionUID = 1L;
private long count = 0;
private long failures = 0;
private long dropped = 0;
private long unprocessed = 0;
private long min = Long.MAX_VALUE;
private long max = Long.MIN_VALUE;
private long avg = 0;
public long getCount() {
return count;
}
public long getFailures() {
return failures;
}
public long getDropped() {
return dropped;
}
public long getNotProcessed() {
return unprocessed;
}
public long getSuccesses() {
return count - (failures + dropped + unprocessed); // ok if not thread-safe, good enough for what we need
}
public long getMinimum() {
return min;
}
public long getMaximum() {
return max;
}
public long getAverage() {
return avg;
}
@Override
public String toString() {
return "" + count + ':' + failures + ':' + dropped + ':' + unprocessed + ':' + min + ':' + max + ':' + avg;
}
}
}