/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.logging.log4j.core.async.perftest;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.HdrHistogram.Histogram;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.core.async.DefaultAsyncQueueFullPolicy;
import org.apache.logging.log4j.core.async.EventRoute;
import org.apache.logging.log4j.core.util.Constants;
import org.apache.logging.log4j.core.util.Loader;
/**
* Latency test showing both service time and response time.
* <p>Service time = time to perform the desired operation, response time = service time + queueing time.</p>
*
* <h4>Gil's DOs and DONTs for latency testing</h4>
* <p>
* <a href="https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ">https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ</a>
* </p>
* If you are looking at the set of "stacks" below (all of which are queues/transports),
* I would strongly encourage you to avoid repeating the mistakes of testing methodologies
* that focus entirely on max achievable throughput and then report some (usually bogus)
* latency stats at those max throughout modes.
* The tech empower numbers are a classic example of this in play, and while they do provide some basis
* for comparing a small aspect of behavior (what I call the "how fast can this thing drive off a cliff"
* comparison, or "peddle to the metal" testing), those results are not very useful for comparing load
* carrying capacities for anything that actually needs to maintain some form of responsiveness SLA or
* latency spectrum requirements.
* <p>
* Rules of thumb I'd start with (some simple DOs and DON'Ts):
* </p>
* <ol>
* <li>DO measure max achievable throughput, but DON'T get focused on it as the main or single axis of measurement /
* comparison.
* </li>
* <li>DO measure response time / latency behaviors across a spectrum of attempted load levels (e.g. at attempted loads
* between 2% to 100%+ of max established thoughout).</li>
* <li>DO measure the response time / latency spectrum for each tested load (even for max throughout, for which response
* time should linearly grow with test length, or the test is wrong). HdrHistogram is one good way to capture this
* information.</li>
* <li>DO make sure you are measuring response time correctly and labeling it right. If you also measure and report
* service time, label it as such (don't call it "latency").</li>
* <li>DO compare response time / latency spectrum at given loads.</li>
* <li>DO [repeatedly] sanity check and calibrate the benchmark setup to verify that it produces expected results for
* known forced scenarios. E.g. forced pauses of known size via ^Z or SIGSTOP/SIGCONT should produce expected response
* time percentile levels. Attempting to load at >100% than achieved throughput should result in response time / latency
* measurements that grow with benchmark run length, while service time (if measured) should remain fairly flat well
* past saturation.</li>
* <li>DON'T use or report standard deviation for latency. Ever. Except if you mean it as a joke.</li>
* <li>DON'T use average latency as a way to compare things with one another. [use median or 90%'ile instead, if what
* you want to compare is "common case" latencies]. Consider not reporting avg. at all.</li>
* <li>DON'T compare results of different setups or loads from short runs (< 20-30 minutes).</li>
* <li>DON'T include process warmup behavior (e.g. 1st minute and 1st 50K messages) in compared or reported
* results.</li>
* </ol>
* <p.
* See <a href="https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ">https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ</a>
* for some concrete visual examples.
*/
// RUN
// java -XX:+UnlockDiagnosticVMOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution
// -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCApplicationStoppedTime -XX:GuaranteedSafepointInterval=500000
// -XX:CompileCommand=dontinline,org.apache.logging.log4j.core.async.perftest.NoOpIdleStrategy::idle
// -cp HdrHistogram-2.1.8.jar:disruptor-3.3.4.jar:log4j-api-2.6-SNAPSHOT.jar:log4j-core-2.6-SNAPSHOT.jar:log4j-core-2.6-SNAPSHOT-tests.jar
// -DAsyncLogger.WaitStrategy=busyspin -DLog4jContextSelector=org.apache.logging.log4j.core.async.AsyncLoggerContextSelector
// -Dlog4j2.enable.threadlocals=true -Dlog4j2.enable.direct.encoders=true
// -Xms1G -Xmx1G org.apache.logging.log4j.core.async.perftest.ResponseTimeTest 1 100000
//
// RUN recording in Java Flight Recorder:
// %JAVA_HOME%\bin\java -XX:+UnlockCommercialFeatures -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+FlightRecorder -XX:StartFlightRecording=duration=10m,filename=replayStats-2.6-latency.jfr -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCApplicationStoppedTime -XX:CompileCommand=dontinline,org.apache.logging.log4j.core.async.perftest.NoOpIdleStrategy::idle -DAsyncLogger.WaitStrategy=yield -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE -cp .;HdrHistogram-2.1.8.jar;disruptor-3.3.4.jar;log4j-api-2.6-SNAPSHOT.jar;log4j-core-2.6-SNAPSHOT.jar;log4j-core-2.6-SNAPSHOT-tests.jar org.apache.logging.log4j.core.async.perftest.ResponseTimeTest 1 50000
public class ResponseTimeTest {
private static final String LATENCY_MSG = new String(new char[64]);
public static void main(final String[] args) throws Exception {
if (args.length < 2) {
System.out.println("Please specify thread count, target throughput (msg/sec) " +
"and logger library (Log4j1, Log4j2, Logback, JUL)");
return;
}
final int threadCount = Integer.parseInt(args[0]);
final double loadMessagesPerSec = Double.parseDouble(args[1]);
final String loggerLib = args.length > 2 ? args[2] : "Log4j2";
// print to console if ringbuffer is full
System.setProperty("log4j2.AsyncQueueFullPolicy", PrintingAsyncQueueFullPolicy.class.getName());
System.setProperty("AsyncLogger.RingBufferSize", String.valueOf(256 * 1024));
//System.setProperty("Log4jContextSelector", AsyncLoggerContextSelector.class.getName());
//System.setProperty("log4j.configurationFile", "perf3PlainNoLoc.xml");
if (System.getProperty("AsyncLogger.WaitStrategy") == null) {
System.setProperty("AsyncLogger.WaitStrategy", "Yield");
}
//for (Object key : System.getProperties().keySet()) {
// System.out.println(key + "=" + System.getProperty((String) key));
//}
// initialize the logger
final String wrapper = loggerLib.startsWith("Run") ? loggerLib : "Run" + loggerLib;
final String loggerWrapperClass = "org.apache.logging.log4j.core.async.perftest." + wrapper;
final IPerfTestRunner logger = Loader.newCheckedInstanceOf(loggerWrapperClass, IPerfTestRunner.class);
logger.log("Starting..."); // ensure initialized
Thread.sleep(100);
final int requiredProcessors = threadCount + 1 + 1; // producers + 1 consumer + 1 for OS
final IdleStrategy idleStrategy = Runtime.getRuntime().availableProcessors() > requiredProcessors
? new NoOpIdleStrategy()
: new YieldIdleStrategy();
System.out.printf("%s: %d threads, load is %,f msg/sec, using %s%n", loggerLib, threadCount,
loadMessagesPerSec, idleStrategy.getClass().getSimpleName());
// Warmup: run as many iterations of 50,000 calls to logger.log as we can in 1 minute
final long WARMUP_DURATION_MILLIS = TimeUnit.MINUTES.toMillis(1);
final List<Histogram> warmupServiceTmHistograms = new ArrayList<>(threadCount);
final List<Histogram> warmupResponseTmHistograms = new ArrayList<>(threadCount);
final int WARMUP_COUNT = 50000 / threadCount;
runLatencyTest(logger, WARMUP_DURATION_MILLIS, WARMUP_COUNT, loadMessagesPerSec, idleStrategy,
warmupServiceTmHistograms, warmupResponseTmHistograms, threadCount);
System.out.println("-----------------Warmup done. load=" + loadMessagesPerSec);
if (!Constants.ENABLE_DIRECT_ENCODERS || !Constants.ENABLE_THREADLOCALS) {
//System.gc();
//Thread.sleep(5000);
}
System.out.println("-----------------Starting measured run. load=" + loadMessagesPerSec);
final long start = System.currentTimeMillis();
final List<Histogram> serviceTmHistograms = new ArrayList<>(threadCount);
final List<Histogram> responseTmHistograms = new ArrayList<>(threadCount);
PrintingAsyncQueueFullPolicy.ringbufferFull.set(0);
// Actual test: run as many iterations of 1,000,000 calls to logger.log as we can in 4 minutes.
final long TEST_DURATION_MILLIS = TimeUnit.MINUTES.toMillis(4);
final int COUNT = (1000 * 1000) / threadCount;
runLatencyTest(logger, TEST_DURATION_MILLIS, COUNT, loadMessagesPerSec, idleStrategy, serviceTmHistograms,
responseTmHistograms, threadCount);
logger.shutdown();
final long end = System.currentTimeMillis();
// ... and report the results
final Histogram resultServiceTm = createResultHistogram(serviceTmHistograms, start, end);
resultServiceTm.outputPercentileDistribution(System.out, 1000.0);
writeToFile("s", resultServiceTm, (int) (loadMessagesPerSec / 1000), 1000.0);
final Histogram resultResponseTm = createResultHistogram(responseTmHistograms, start, end);
resultResponseTm.outputPercentileDistribution(System.out, 1000.0);
writeToFile("r", resultResponseTm, (int) (loadMessagesPerSec / 1000), 1000.0);
System.out.printf("%n%s: %d threads, load %,f msg/sec, ringbuffer full=%d%n", loggerLib, threadCount,
loadMessagesPerSec, PrintingAsyncQueueFullPolicy.ringbufferFull.get());
System.out.println("Test duration: " + (end - start) / 1000.0 + " seconds");
}
private static void writeToFile(final String suffix, final Histogram hist, final int thousandMsgPerSec,
final double scale) throws IOException {
try (PrintStream pout = new PrintStream(new FileOutputStream(thousandMsgPerSec + "k" + suffix))) {
hist.outputPercentileDistribution(pout, scale);
}
}
private static Histogram createResultHistogram(final List<Histogram> list, final long start, final long end) {
final Histogram result = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
result.setStartTimeStamp(start);
result.setEndTimeStamp(end);
for (final Histogram hist : list) {
result.add(hist);
}
return result;
}
public static void runLatencyTest(final IPerfTestRunner logger, final long durationMillis, final int samples,
final double loadMessagesPerSec, final IdleStrategy idleStrategy,
final List<Histogram> serviceTmHistograms, final List<Histogram> responseTmHistograms,
final int threadCount) throws InterruptedException {
final Thread[] threads = new Thread[threadCount];
final CountDownLatch LATCH = new CountDownLatch(threadCount);
for (int i = 0; i < threadCount; i++) {
final Histogram serviceTmHist = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
final Histogram responseTmHist = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
serviceTmHistograms.add(serviceTmHist);
responseTmHistograms.add(responseTmHist);
threads[i] = new Thread("latencytest-" + i) {
@Override
public void run() {
LATCH.countDown();
try {
LATCH.await(); // wait until all threads are ready to go
} catch (final InterruptedException e) {
interrupt();
return;
}
final long endTimeMillis = System.currentTimeMillis() + durationMillis;
do {
final Pacer pacer = new Pacer(loadMessagesPerSec, idleStrategy);
runLatencyTest(samples, logger, serviceTmHist, responseTmHist, pacer);
} while (System.currentTimeMillis() < endTimeMillis);
}
};
threads[i].start();
}
for (int i = 0; i < threadCount; i++) {
threads[i].join();
}
}
private static void runLatencyTest(final int samples, final IPerfTestRunner logger, final Histogram serviceTmHist,
final Histogram responseTmHist, final Pacer pacer) {
for (int i = 0; i < samples; i++) {
final long expectedStartTimeNanos = pacer.expectedNextOperationNanoTime();
pacer.acquire(1);
final long actualStartTime = System.nanoTime();
logger.log(LATENCY_MSG);
final long doneTime = System.nanoTime();
serviceTmHist.recordValue(doneTime - actualStartTime);
responseTmHist.recordValue(doneTime - expectedStartTimeNanos);
}
}
public static class PrintingAsyncQueueFullPolicy extends DefaultAsyncQueueFullPolicy {
static AtomicLong ringbufferFull = new AtomicLong();
@Override
public EventRoute getRoute(final long backgroundThreadId, final Level level) {
ringbufferFull.incrementAndGet();
System.out.print('!');
return super.getRoute(backgroundThreadId, level);
}
}
/**
* Pacer determines the pace at which measurements are taken. Sample usage:
* <p/>
* <pre>
* - each thread has a Pacer instance
* - at start of test, call pacer.setInitialStartTime(System.nanoTime());
* - loop:
* - store result of pacer.expectedNextOperationNanoTime() as expectedStartTime
* - pacer.acquire(1);
* - before the measured operation: store System.nanoTime() as actualStartTime
* - perform the measured operation
* - store System.nanoTime() as doneTime
* - serviceTimeHistogram.recordValue(doneTime - actualStartTime);
* - responseTimeHistogram.recordValue(doneTime - expectedStartTime);
* </pre>
* <p>
* Borrowed with permission from Gil Tene's Cassandra stress test:
* https://github.com/LatencyUtils/cassandra-stress2/blob/trunk/tools/stress/src/org/apache/cassandra/stress/StressAction.java#L374
* </p>
*/
static class Pacer {
private long initialStartTime;
private double throughputInUnitsPerNsec;
private long unitsCompleted;
private boolean caughtUp = true;
private long catchUpStartTime;
private long unitsCompletedAtCatchUpStart;
private double catchUpThroughputInUnitsPerNsec;
private double catchUpRateMultiple;
private final IdleStrategy idleStrategy;
public Pacer(final double unitsPerSec, final IdleStrategy idleStrategy) {
this(unitsPerSec, 3.0, idleStrategy); // Default to catching up at 3x the set throughput
}
public Pacer(final double unitsPerSec, final double catchUpRateMultiple, final IdleStrategy idleStrategy) {
this.idleStrategy = idleStrategy;
setThroughout(unitsPerSec);
setCatchupRateMultiple(catchUpRateMultiple);
initialStartTime = System.nanoTime();
}
public void setInitialStartTime(final long initialStartTime) {
this.initialStartTime = initialStartTime;
}
public void setThroughout(final double unitsPerSec) {
throughputInUnitsPerNsec = unitsPerSec / 1000000000.0;
catchUpThroughputInUnitsPerNsec = catchUpRateMultiple * throughputInUnitsPerNsec;
}
public void setCatchupRateMultiple(final double multiple) {
catchUpRateMultiple = multiple;
catchUpThroughputInUnitsPerNsec = catchUpRateMultiple * throughputInUnitsPerNsec;
}
/**
* @return the time for the next operation
*/
public long expectedNextOperationNanoTime() {
return initialStartTime + (long) (unitsCompleted / throughputInUnitsPerNsec);
}
public long nsecToNextOperation() {
final long now = System.nanoTime();
long nextStartTime = expectedNextOperationNanoTime();
boolean sendNow = true;
if (nextStartTime > now) {
// We are on pace. Indicate caught_up and don't send now.}
caughtUp = true;
sendNow = false;
} else {
// We are behind
if (caughtUp) {
// This is the first fall-behind since we were last caught up
caughtUp = false;
catchUpStartTime = now;
unitsCompletedAtCatchUpStart = unitsCompleted;
}
// Figure out if it's time to send, per catch up throughput:
final long unitsCompletedSinceCatchUpStart =
unitsCompleted - unitsCompletedAtCatchUpStart;
nextStartTime = catchUpStartTime +
(long) (unitsCompletedSinceCatchUpStart / catchUpThroughputInUnitsPerNsec);
if (nextStartTime > now) {
// Not yet time to send, even at catch-up throughout:
sendNow = false;
}
}
return sendNow ? 0 : (nextStartTime - now);
}
/**
* Will wait for next operation time. After this the expectedNextOperationNanoTime() will move forward.
*
* @param unitCount
*/
public void acquire(final long unitCount) {
final long nsecToNextOperation = nsecToNextOperation();
if (nsecToNextOperation > 0) {
sleepNs(nsecToNextOperation);
}
unitsCompleted += unitCount;
}
private void sleepNs(final long ns) {
long now = System.nanoTime();
final long deadline = now + ns;
while ((now = System.nanoTime()) < deadline) {
idleStrategy.idle();
}
}
}
}