/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.clientmetrics;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.joda.time.DateTime;
/**
* Simple system for collecting metrics in client/test applications.
*
* <p>The intention is that metrics are immediately interpretable while the test is running. For this purpose,
* each 30 second interval, a summary table is printed with the metric values of the last 30 seconds, as well
* as the running total averages.
*
* <p>This class in itself is usable in any context, independent of Lily. Besides the metrics that you report
* yourself, it is also possible to collect some system/HBase metrics at the end of each interval. These
* are retrieved via HBaseAdmin & JMX. See {@link HBaseMetricsPlugin}.
*
* <p>If afterwards you'd like some charts of the metrics, check out
* {@link org.lilyproject.clientmetrics.postproc.MetricsReportTool}.
*
* <p>The metrics files produced by this class are human readable but also machine readable, they can
* be parsed using {@link org.lilyproject.clientmetrics.postproc.MetricsParser}.
*/
public class Metrics {
private Map<String, Metric> metrics = new TreeMap<String, Metric>();
private int intervalDuration = 1000 * 30;
private DateTime intervalStartedAt;
private DateTime startedAt;
private PrintStream reportStream;
private MetricsPlugin plugin;
private boolean inReport;
private int threadCount = 1;
private Table table;
private boolean inHeader;
private boolean inFooter;
private static final Pattern NAME_PATTERN = Pattern.compile("\\w+");
public Metrics() {
this(System.out, null);
}
public Metrics(File file, MetricsPlugin plugin) throws FileNotFoundException {
this(new PrintStream(new FileOutputStream(file)), plugin);
}
public Metrics(PrintStream reportStream, MetricsPlugin plugin) {
this.reportStream = reportStream;
this.plugin = plugin == null ? new NullPlugin() : plugin;
table = new Table(reportStream);
table.addColumn(40, "Name", "s");
table.addColumn(-1, "Op count", "d");
table.addColumn(-1, "Average", "f");
table.addColumn(-1, "Median", "f");
table.addColumn(-1, "Minimum", "f");
table.addColumn(-1, "Maximum", "f");
table.addColumn(-1, "Alltime ops", "d");
table.addColumn(-1, "Alltime avg", "f");
table.finishDefinition();
}
public void setThreadCount(int threadCount) {
this.threadCount = threadCount;
}
public void setIntervalDuration(int duration) {
this.intervalDuration = duration;
}
public void finish() {
// print one last report
printReport();
printGlobalOpsPerSec();
}
/**
* Does an in-between reset of the metrics. This is useful if your test consists of multiple phases
* which each have different metrics.
*/
public synchronized void restart() {
printReport();
printGlobalOpsPerSec();
metrics.clear();
intervalStartedAt = null;
startedAt = null;
reportStream.println("");
reportStream.println("Metrics have been reset");
reportStream.println("");
}
public void startTest(String testName, String testDescription) {
// The testName should be something simple, as it is used by MetricsReportTool for filenames.
if (!NAME_PATTERN.matcher(testName).matches()) {
throw new IllegalArgumentException("Invalid test name, should be alphanumeric only: " + testName);
}
reportStream.println();
reportStream.println("===================================================================================================");
reportStream.println(testName + ": " + testDescription);
reportStream.println("===================================================================================================");
reportStream.println();
}
public void startHeader() {
reportStream.println("~~begin header");
inHeader = true;
}
public void endHeader() {
reportStream.println("~~end header");
inHeader = false;
}
public void startFooter() {
reportStream.println("~~begin footer");
inFooter = true;
}
public void endFooter() {
reportStream.println("~~end footer");
}
public int getIntervalDuration() {
return intervalDuration;
}
public synchronized void increment(String name, double value) {
increment(name, null, 1, value);
}
public synchronized void increment(String name, String type, double value) {
increment(name, type, 1, value);
}
/**
*
* @param name a name for this metric. Can contain any character but colon. The symbol '@' has a special
* meaning towards post-processing tools such as
* {@link org.lilyproject.clientmetrics.postproc.MetricsReportTool}: it will group metrics with
* the same text before the @ symbol.
* @param type optional, can be null. If specified, number of operations per second will be summarized
* for all metrics of the same type. The value must represent a timing in milliseconds. Only
* alphanumeric characters allowed.
* @param operations the number of operations performed, usually 1, but sometimes operations are performed
* in groups
* @param value the value for the metric, such as a duration (typically in ms), an operation count, or whatever
* quantity you want to keep track of such as free memory, cpu load, ...
*/
public synchronized void increment(String name, String type, int operations, double value) {
if (type != null && !NAME_PATTERN.matcher(type).matches()) {
// Being strict here, can be helpful for further reporting
throw new IllegalArgumentException("Invalid type name, should be alphanumeric only: " + type);
}
if (name.indexOf(':') > 0) {
// Colons are used to separate the type from the name in the output, and type is optional, so if
// would allow colon in the name we would not be able to know what it stands for
throw new IllegalArgumentException("Usage of the colon character is reserved in the metric name: " + name);
}
if (intervalStartedAt == null) {
// it's our very first value
intervalStartedAt = new DateTime();
startedAt = new DateTime();
}
if (!inReport && System.currentTimeMillis() - intervalStartedAt.getMillis() >= intervalDuration) {
printReport();
for (Metric metric : metrics.values()) {
metric.rollInterval();
}
intervalStartedAt = new DateTime();
}
Metric metric = metrics.get(name);
if (metric == null) {
metric = new Metric();
metric.type = type;
metrics.put(name, metric);
}
metric.add(operations, value);
plugin.afterIncrement(this);
}
public void printReport() {
if (intervalStartedAt == null) {
return;
}
inReport = true;
plugin.beforeReport(this);
long now = System.currentTimeMillis();
long actualIntervalDuration = now - intervalStartedAt.getMillis();
table.fullSepLine();
table.crossColumn("Interval started at: " + intervalStartedAt + " (duration: " + (actualIntervalDuration / 1000) + "s).");
table.crossColumn("Measurements started at: " + startedAt + " (duration: " + formatDuration(now - startedAt.getMillis()) + ")");
List<String> extra = plugin.getExtraInfoLines();
for (String line : extra) {
table.crossColumn(line);
}
table.columnSepLine();
table.titles();
table.columnSepLine();
Map<String, CountAndValue> statByType = new TreeMap<String, CountAndValue>();
for (Map.Entry<String, Metric> entry : metrics.entrySet()) {
String name = entry.getKey();
Metric metric = entry.getValue();
if (metric.type != null) {
name = metric.type + ":" + name;
CountAndValue stat = statByType.get(metric.type);
if (stat == null) {
stat = new CountAndValue();
statByType.put(metric.type, stat);
}
stat.count += metric.getIntervalCount();
stat.value += metric.getIntervalValue();
}
table.columns(name, metric.getIntervalCount(), metric.getIntervalAverage(), metric.getIntervalMedian(),
metric.getIntervalMin(), metric.getIntervalMax(), metric.getAllTimeCount(),
metric.getAllTimeAverage());
}
table.columnSepLine();
if (statByType.size() > 0) {
int i = 0;
for (Map.Entry<String, CountAndValue> entry : statByType.entrySet()) {
if (entry.getValue().count == 0) {
continue;
}
i++;
// The real time spent in the operations is the time counted together from multiple threads,
// hence more than the actual elapsed time. E.g. in an interval of 30s, each thread runs 30s,
// hence with 3 threads there are 90 seconds spent. So usually the interval ops/sec will give
// a better picture.
double opsPerSec = (((double)entry.getValue().count) / (entry.getValue().value)) * 1000d;
double opsPerSecInt = (((double)entry.getValue().count) / ((double)actualIntervalDuration)) * 1000d;
table.crossColumn(String.format("%1$s ops/sec: %5$.2f real (=%3$.2fx%4$d), %2$.2f interval",
entry.getKey(), opsPerSecInt, opsPerSec, threadCount, opsPerSec * ((double)threadCount)));
}
if (i > 0) {
table.fullSepLine();
}
}
reportStream.flush();
inReport = false;
}
private void printGlobalOpsPerSec() {
if (startedAt == null) {
return;
}
double testDuration = System.currentTimeMillis() - startedAt.getMillis();
Map<String, CountAndValue> statByType = new TreeMap<String, CountAndValue>();
for (Map.Entry<String, Metric> entry : metrics.entrySet()) {
Metric metric = entry.getValue();
if (metric.type != null) {
CountAndValue stat = statByType.get(metric.type);
if (stat == null) {
stat = new CountAndValue();
statByType.put(metric.type, stat);
}
stat.count += metric.getAllTimeCount();
stat.value += metric.getAllTimeValue();
}
}
if (statByType.size() > 0) {
reportStream.println();
reportStream.println("Global ops/sec of the test:");
for (Map.Entry<String, CountAndValue> entry : statByType.entrySet()) {
if (entry.getValue().count == 0) {
continue;
}
double opsPerSec = (((double)entry.getValue().count) / (entry.getValue().value)) * 1000d;
double opsPerSecInt = (((double)entry.getValue().count) / (testDuration)) * 1000d;
reportStream.printf("%1$s ops/sec: %5$.2f real (=%3$.2fx%4$d), %2$.2f interval\n", entry.getKey(),
opsPerSecInt, opsPerSec, threadCount, opsPerSec * ((double)threadCount));
}
reportStream.println();
}
}
private String formatDuration(long millis) {
long seconds = millis / 1000;
long secondsOverflow = seconds % 60;
long minutes = (seconds - secondsOverflow) / 60;
long minutesOverflow = minutes % 60;
long hours = (minutes - minutesOverflow) / 60;
return String.format("%1$02d:%2$02d:%3$02d", hours, minutesOverflow, secondsOverflow);
}
private static class Metric {
private List<Double> values = new ArrayList<Double>(1000);
String type;
int intervalCount;
double intervalValue;
double intervalMin;
double intervalMax;
int allTimeCount;
double allTimeValue;
Metric() {
rollInterval();
}
/**
*
* @param value will most often be a duration in ms, but could be other kinds of values as well.
*/
public void add(int operations, double value) {
if (operations == 0) {
return;
}
intervalCount += operations;
intervalValue += value;
allTimeCount += operations;
allTimeValue += value;
double valuePerOp = value / (double)operations;
if (valuePerOp < intervalMin) {
intervalMin = valuePerOp;
}
if (valuePerOp > intervalMax) {
intervalMax = valuePerOp;
}
values.add(valuePerOp);
}
public void rollInterval() {
intervalCount = 0;
intervalValue = 0;
intervalMin = Integer.MAX_VALUE;
intervalMax = 0;
values.clear();
}
public int getIntervalCount() {
return intervalCount;
}
public double getIntervalValue() {
return intervalValue;
}
public int getAllTimeCount() {
return allTimeCount;
}
public double getIntervalAverage() {
return intervalCount == 0 ? 0 : intervalValue / (double)intervalCount;
}
public double getAllTimeAverage() {
return allTimeCount == 0 ? 0 : allTimeValue / (double)allTimeCount;
}
public double getAllTimeValue() {
return allTimeValue;
}
public double getIntervalMin() {
return intervalCount == 0 ? 0 : intervalMin;
}
public double getIntervalMax() {
return intervalMax;
}
public double getIntervalMedian() {
if (values.size() == 0) {
return 0;
}
Collections.sort(values);
int middle = values.size() / 2;
if (values.size() % 2 == 1) {
return values.get(middle);
} else {
return (values.get(middle - 1) + values.get(middle)) / 2d;
}
}
}
private static class CountAndValue {
int count;
double value;
}
}