/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.client; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.Date; import java.util.Iterator; import java.util.concurrent.TimeUnit; import org.HdrHistogram_voltpatches.Histogram; import com.google_voltpatches.common.base.Charsets; import com.google_voltpatches.common.base.Throwables; /** * <p>Essentially a set of counters for a specific context with helper * methods. The context has a time window and can apply to all connections * and procedures, or a single of connections and/or procedure.</p> * * <p>The helper methods such as {@link #getTxnThroughput()} or * {@link #kPercentileLatency(double)} perform common operations * on the counters.</p> * * <p>This object is immutable outside of the package scope and does not * directly reference any internal data structures.</p> * * <p>See also {@link ClientStatsContext}.</p> */ public class ClientStats { String m_procName; long m_startTS; // java.util.Date compatible microseconds since epoch long m_endTS; long m_connectionId; String m_hostname; int m_port; long m_invocationsCompleted; long m_invocationAborts; long m_invocationErrors; long m_invocationTimeouts; // cumulative latency measured by client, used to calculate avg. lat. long m_roundTripTimeNanos; // cumulative latency measured by the cluster, used to calculate avg lat. long m_clusterRoundTripTime; // milliseconds /** The number of buckets tracking latency with 1ms granularity. */ final public static int ONE_MS_BUCKET_COUNT = 50; /** The number of buckets tracking latency with 10ms granularity. */ final public static int TEN_MS_BUCKET_COUNT = 20; /** The number of buckets tracking latency with 100ms granularity. */ final public static int HUNDRED_MS_BUCKET_COUNT = 10; Histogram m_latencyHistogram; long m_bytesSent; long m_bytesReceived; private static final long LOWEST_TRACKABLE = 50; private static final long HIGHEST_TRACKABLE = 10L * (1000L * 1000L); private static final int SIGNIFICANT_VALUE_DIGITS = 2; /* * Get a that tracks from 1 microsecond to 10 seconds with * 2 significant value digits */ public static Histogram constructHistogram() { return new Histogram( LOWEST_TRACKABLE, HIGHEST_TRACKABLE, SIGNIFICANT_VALUE_DIGITS); } ClientStats() { m_procName = ""; m_connectionId = -1; m_hostname = ""; m_port = -1; m_startTS = Long.MAX_VALUE; m_endTS = Long.MIN_VALUE; m_invocationsCompleted = m_invocationAborts = m_invocationErrors = 0; m_roundTripTimeNanos = m_clusterRoundTripTime = 0; m_bytesSent = m_bytesReceived = 0; m_latencyHistogram = constructHistogram(); } ClientStats(ClientStats other) { m_procName = other.m_procName; m_connectionId = other.m_connectionId; m_hostname = other.m_hostname; m_port = other.m_port; m_startTS = other.m_startTS; m_endTS = other.m_endTS; m_invocationsCompleted = other.m_invocationsCompleted; m_invocationAborts = other.m_invocationAborts; m_invocationErrors = other.m_invocationErrors; m_invocationTimeouts = other.m_invocationTimeouts; m_roundTripTimeNanos = other.m_roundTripTimeNanos; m_clusterRoundTripTime = other.m_clusterRoundTripTime; m_latencyHistogram = other.m_latencyHistogram.copy(); m_latencyHistogram.reestablishTotalCount(); m_bytesSent = other.m_bytesSent; m_bytesReceived = other.m_bytesReceived; } static ClientStats diff(ClientStats newer, ClientStats older) { if ((newer.m_procName != older.m_procName) || (newer.m_connectionId != older.m_connectionId)) { throw new IllegalArgumentException("Can't diff these ClientStats instances."); } ClientStats retval = new ClientStats(); retval.m_procName = older.m_procName; retval.m_connectionId = older.m_connectionId; retval.m_hostname = older.m_hostname; retval.m_port = older.m_port; // the next two values are essentially useless after a diff, but will // be overwritten by ClientStatsContext retval.m_startTS = older.m_startTS; retval.m_endTS = newer.m_endTS; retval.m_invocationsCompleted = newer.m_invocationsCompleted - older.m_invocationsCompleted; retval.m_invocationAborts = newer.m_invocationAborts - older.m_invocationAborts; retval.m_invocationErrors = newer.m_invocationErrors - older.m_invocationErrors; retval.m_invocationTimeouts = newer.m_invocationTimeouts - older.m_invocationTimeouts; retval.m_roundTripTimeNanos = newer.m_roundTripTimeNanos - older.m_roundTripTimeNanos; retval.m_clusterRoundTripTime = newer.m_clusterRoundTripTime - older.m_clusterRoundTripTime; retval.m_latencyHistogram = Histogram.diff(newer.m_latencyHistogram, older.m_latencyHistogram); retval.m_bytesSent = newer.m_bytesSent - older.m_bytesSent; retval.m_bytesReceived = newer.m_bytesReceived - older.m_bytesReceived; return retval; } static ClientStats merge(Iterable<ClientStats> statsIterable) { return merge(statsIterable.iterator()); } static ClientStats merge(Iterator<ClientStats> statsIter) { // empty set if (!statsIter.hasNext()) { return new ClientStats(); } // seed the grouping by the first element ClientStats seed = statsIter.next(); assert(seed != null); // non-destructive seed = (ClientStats) seed.clone(); // add in all the other elements while (statsIter.hasNext()) { seed.add(statsIter.next()); } return seed; } void add(ClientStats other) { if (m_procName.equals(other.m_procName) == false) m_procName = ""; if (m_connectionId != other.m_connectionId) m_connectionId = -1; if (m_hostname.equals(other.m_hostname) == false) m_hostname = ""; if (m_port != other.m_port) m_port = -1; m_startTS = Math.min(other.m_startTS, m_startTS); m_endTS = Math.max(other.m_endTS, m_endTS); m_invocationsCompleted += other.m_invocationsCompleted; m_invocationAborts += other.m_invocationAborts; m_invocationErrors += other.m_invocationErrors; m_invocationTimeouts += other.m_invocationTimeouts; m_roundTripTimeNanos += other.m_roundTripTimeNanos; m_clusterRoundTripTime += other.m_clusterRoundTripTime; m_latencyHistogram.add(other.m_latencyHistogram); m_latencyHistogram.reestablishTotalCount(); m_bytesSent += other.m_bytesSent; m_bytesReceived += other.m_bytesReceived; } void update(long roundTripTimeNanos, int clusterRoundTripTime, boolean abort, boolean error, boolean timeout) { m_invocationsCompleted++; if (abort) m_invocationAborts++; if (error) m_invocationErrors++; if (timeout) m_invocationTimeouts++; m_roundTripTimeNanos += roundTripTimeNanos; m_clusterRoundTripTime += clusterRoundTripTime; //Round up to 50 microseconds. Average is still accurate and it doesn't change the percentile distribution //above 50 micros final long roundTripMicros = Math.max(LOWEST_TRACKABLE, TimeUnit.NANOSECONDS.toMicros(roundTripTimeNanos)); if (roundTripMicros > HIGHEST_TRACKABLE) { m_latencyHistogram.recordValue(roundTripMicros % HIGHEST_TRACKABLE); int count = (int)(roundTripMicros / HIGHEST_TRACKABLE); for (int ii = 0; ii < count; ii++) { m_latencyHistogram.recordValue(HIGHEST_TRACKABLE); } } else { m_latencyHistogram.recordValue(roundTripMicros); } } /** * Get the name of the procedure this statistics instance applies to. * * @return The name of the procedure or the empty string if this stats * instance covers more than one procedure. */ public String getProcedureName() { return m_procName; } /** * Get the {@link Date}-compatible timestamp that describes the start of * the range of time this stats instance covers. * * @return A timestamp in milliseconds since the epoch. */ public long getStartTimestamp() { return m_startTS; } /** * Get the {@link Date}-compatible timestamp that describes the end of * the range of time this stats instance covers. * * @return A timestamp in milliseconds since the epoch. */ public long getEndTimestamp() { return m_endTS; } /** * Get the number of milliseconds this stats instance covers. * * @return The number of milliseconds this stats instance covers. */ public long getDuration() { // this value should never be MIN_VALUE by the time a user can call this assert(m_endTS != Long.MIN_VALUE); return m_endTS - m_startTS; } /** * Get the id of the individual socket connection this statistics instance * applies to. Note that hostname and port combos might not be unique, * but connection ids will be. * * @return The id of the connection or -1 if this stats instance covers more * than one connection. */ public long getConnectionId() { return m_connectionId; } /** * The hostname or IP (as string) of the connection this stats instance * covers. * * @return The hostname or ip as string, or the empty string if this stats * instance covers more than one connection. */ public String getHostname() { return m_hostname; } /** * The port number of the connection this stats instance covers. * * @return The port number, or -1 if this stats instance covers more than * one connection. */ public int getPort() { return m_port; } /** * Get the number of transactions acknowledged by the VoltDB server(s) * during the time period covered by this stats instance. * * @return The number of transactions completed. */ public long getInvocationsCompleted() { return m_invocationsCompleted; } /** * Get the number of transactions aborted by the VoltDB server(s) * during the time period covered by this stats instance. * * @return The number of transactions aborted. */ public long getInvocationAborts() { return m_invocationAborts; } /** * Get the number of transactions failed by the VoltDB server(s) * during the time period covered by this stats instance. * * @return The number of transactions that failed. */ public long getInvocationErrors() { return m_invocationErrors; } /** * Get the number of transactions timed out before being sent to or responded by VoltDB server(s) * during the time period covered by this stats instance. * * @return The number of transactions that failed. */ public long getInvocationTimeouts() { return m_invocationTimeouts; } /** * Get the average latency in milliseconds for the time period * covered by this stats instance. This is computed by summing the client-measured * round trip times of all transactions and dividing by the competed * invocation count. * * @return Average latency in milliseconds. */ public double getAverageLatency() { if (m_invocationsCompleted == 0) return 0; return (m_roundTripTimeNanos / (double)m_invocationsCompleted) / 1000000.0D; } /** * <p>Get the server-side average latency in milliseconds for the time period * covered by this stats instance. This is computed by summing the server-reported * latency times of all transactions and dividing by the competed invocation count.</p> * * <p>The server reported latency number measures the time from when a transaction * is accepted from the socket to when the response is written back. It will be higher * for multi-node clusters, for clusters with too much load, or for clusters with longer * running transactions.</p> * * @return Average latency in milliseconds. */ public double getAverageInternalLatency() { if (m_invocationsCompleted == 0) return 0; return (double)m_clusterRoundTripTime / (double)m_invocationsCompleted; } /** * <p>Get the raw buckets used for latency tracking in 1ms increments. For example, if * a transaction returns in 3.2ms, then the array at index 3 will be incremented by * one. It can be thought of as a histogram of latencies. It has * {@link #ONE_MS_BUCKET_COUNT} buckets, for a range of * <code>ONE_MS_BUCKET_COUNT x 1ms</code></p> * * <p>This raw data, along with other bucket sets of different granularity, is used to * support the {@link #kPercentileLatency(double)} method. This returns a copy of the * internal array so it is threadsafe and mutable if you wish. Note that the buckets * * @return An array containing counts for different latency values. */ public long[] getLatencyBucketsBy1ms() { final long buckets[] = new long[ONE_MS_BUCKET_COUNT]; for (int ii = 0; ii < ONE_MS_BUCKET_COUNT; ii++) { buckets[ii] = m_latencyHistogram.getCountBetweenValues(ii * 1000L, (ii + 1) * 1000L); } return buckets; } /** * <p>Get the raw buckets used for latency tracking in 10ms increments. For example, if * a transaction returns in 42ms, then the array at index 4 will be incremented by * one. It can be thought of as a histogram of latencies. It has * {@link #TEN_MS_BUCKET_COUNT} buckets, for a range of * <code>TEN_MS_BUCKET_COUNT x 10ms</code>.</p> * * <p>This raw data, along with other bucket sets of different granularity, is used to * support the {@link #kPercentileLatency(double)} method. This returns a copy of the * internal array so it is threadsafe and mutable if you wish. Note that the buckets * * @return An array containing counts for different latency values. */ public long[] getLatencyBucketsBy10ms() { final long buckets[] = new long[TEN_MS_BUCKET_COUNT]; for (int ii = 0; ii < TEN_MS_BUCKET_COUNT; ii++) { buckets[ii] = m_latencyHistogram.getCountBetweenValues(ii * 10000L, (ii + 1) * 10000L); } return buckets; } /** * <p>Get the raw buckets used for latency tracking in 1ms increments. For example, if * a transaction returns in 3.2ms, then the array at index 3 will be incremented by * one. It can be thought of as a histogram of latencies. It has * {@link #HUNDRED_MS_BUCKET_COUNT} buckets, for a range of * <code>HUNDRED_MS_BUCKET_COUNT x 100ms</code>.</p> * * <p>This raw data, along with other bucket sets of different granularity, is used to * support the {@link #kPercentileLatency(double)} method. This returns a copy of the * internal array so it is threadsafe and mutable if you wish. Note that the buckets * * @return An array containing counts for different latency values. */ public long[] getLatencyBucketsBy100ms() { final long buckets[] = new long[HUNDRED_MS_BUCKET_COUNT]; for (int ii = 0; ii < HUNDRED_MS_BUCKET_COUNT; ii++) { buckets[ii] = m_latencyHistogram.getCountBetweenValues(ii * 100000L, (ii + 1) * 100000L); } return buckets; } /** * Return the number of bytes written over the network during the time period * covered by this stats instance. This can be specific to a connection or global, * but is not recorded for per-procedure statistics. * * @return The number of bytes written or 0 for per-procedure statistics. */ public long getBytesWritten() { return m_bytesSent; } /** * Return the number of bytes read from the network during the time period * covered by this stats instance. This can be specific to a connection or global, * but is not recorded for per-procedure statistics. * * @return The number of bytes read or 0 for per-procedure statistics. */ public long getBytesRead() { return m_bytesReceived; } /** * <p>Using the latency bucketing statistics gathered by the client, estimate * the k-percentile latency value for the time period covered by this stats * instance.</p> * * <p>For example, k=.5 returns an estimate of the median. k=0 returns the * minimum. k=1.0 returns the maximum.</p> * * <p>Latencies longer than the highest trackable value (10 seconds) will be * reported as multiple entries at the highest trackable value</p> * * @param percentile A floating point number between 0.0 and 1.0. * @return An estimate of k-percentile latency in whole milliseconds. */ public int kPercentileLatency(double percentile) { if (m_latencyHistogram.getTotalCount() == 0) return 0; percentile = Math.max(0.0D, percentile); //Convert from micros to millis for return value, round to nearest integer return (int) (Math.round(m_latencyHistogram.getValueAtPercentile(percentile * 100.0D)) / 1000.0); } /** * <p>Using the latency bucketing statistics gathered by the client, estimate * the k-percentile latency value for the time period covered by this stats * instance.</p> * * <p>For example, k=.5 returns an estimate of the median. k=0 returns the * minimum. k=1.0 returns the maximum.</p> * * <p>Latencies longer than the highest trackable value (10 seconds) will be * reported as multiple entries at the highest trackable value</p> * * @param percentile A floating point number between 0.0 and 1.0. * @return An estimate of k-percentile latency in whole milliseconds. */ public double kPercentileLatencyAsDouble(double percentile) { if (m_latencyHistogram.getTotalCount() == 0) return 0.0; percentile = Math.max(0.0D, percentile); //Convert from micros to millis for return value, enjoy having precision return m_latencyHistogram.getValueAtPercentile(percentile * 100.0D) / 1000.0; } /** * Generate a human-readable report of latencies in the form of a histogram. Latency is * in milliseconds * * @return String containing human-readable report. */ public String latencyHistoReport() { ByteArrayOutputStream baos= new ByteArrayOutputStream(); PrintStream pw = null; try { pw = new PrintStream(baos, false, Charsets.UTF_8.name()); } catch (UnsupportedEncodingException e) { Throwables.propagate(e); } //Get a latency report in milliseconds m_latencyHistogram.outputPercentileDistributionVolt(pw, 1, 1000.0); return new String(baos.toByteArray(), Charsets.UTF_8); } /** * <p>Return an average throughput of transactions acknowledged per * second for the duration covered by this stats instance.</p> * * <p>Essentially <code>{@link #getInvocationsCompleted()} divided by * ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>, * but with additional safety checks.</p> * * @return Throughput in transactions acknowledged per second. */ public long getTxnThroughput() { assert(m_startTS != Long.MAX_VALUE); assert(m_endTS != Long.MIN_VALUE); if (m_invocationsCompleted == 0) return 0; if (m_endTS < m_startTS) { m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin' } long durationMs = m_endTS - m_startTS; return (long) (m_invocationsCompleted / (durationMs / 1000.0)); } /** * <p>Return an average throughput of bytes sent per second over the * network for the duration covered by this stats instance.</p> * * <p>Essentially <code>{@link #getBytesWritten()} divided by * ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>, * but with additional safety checks.</p> * * @return Throughput in bytes sent per second. */ public long getIOWriteThroughput() { assert(m_startTS != Long.MAX_VALUE); assert(m_endTS != Long.MIN_VALUE); if (m_bytesSent == 0) return 0; if (m_endTS < m_startTS) { m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin' } long durationMs = m_endTS - m_startTS; return (long) (m_bytesSent / (durationMs / 1000.0)); } /** * <p>Return an average throughput of bytes read per second from the * network for the duration covered by this stats instance.</p> * * <p>Essentially <code>{@link #getBytesRead()} divided by * ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>, * but with additional safety checks.</p> * * @return Throughput in bytes read per second. */ public long getIOReadThroughput() { assert(m_startTS != Long.MAX_VALUE); assert(m_endTS != Long.MIN_VALUE); if (m_bytesReceived == 0) return 0; if (m_endTS < m_startTS) { m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin' } long durationMs = m_endTS - m_startTS; return (long) (m_bytesReceived / (durationMs / 1000.0)); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(String.format("Start %s - End %s - Procedure: %s - ConnectionId: %d {\n", new Date(m_startTS).toString(), new Date(m_endTS).toString(), m_procName, m_connectionId)); sb.append(String.format(" hostname: %s:%d\n", m_hostname, m_port)); sb.append(String.format(" invocations completed/aborted/errors/timeouts: %d/%d/%d/%d\n", m_invocationsCompleted, m_invocationAborts, m_invocationErrors, m_invocationTimeouts)); if (m_invocationsCompleted > 0) { sb.append(String.format(" avg latency client/internal: %.2f/%d\n", (m_roundTripTimeNanos / (double)m_invocationsCompleted) / 1000000.0, m_clusterRoundTripTime / m_invocationsCompleted)); sb.append(latencyHistoReport()).append("\n"); } return sb.toString(); } /* (non-Javadoc) * @see java.lang.Object#clone() */ @Override protected Object clone() { return new ClientStats(this); } }