ResponseTimeTest.java example

Explorer
logging-log4j2-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache license, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the license for the specific language governing permissions and
 * limitations under the license.
 */
package org.apache.logging.log4j.core.async.perftest;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.HdrHistogram.Histogram;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.core.async.DefaultAsyncQueueFullPolicy;
import org.apache.logging.log4j.core.async.EventRoute;
import org.apache.logging.log4j.core.util.Constants;
import org.apache.logging.log4j.core.util.Loader;

/**
 * Latency test showing both service time and response time.
 * <p>Service time = time to perform the desired operation, response time = service time + queueing time.</p>
 *
 * <h4>Gil's DOs and DONTs for latency testing</h4>
 * <p>
 * <a href="https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ">https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ</a>
 * </p>
 * If you are looking at the set of "stacks" below (all of which are queues/transports),
 * I would strongly encourage you to avoid repeating the mistakes of testing methodologies
 * that focus entirely on max achievable throughput and then report some (usually bogus)
 * latency stats at those max throughout modes.
 * The tech empower numbers are a classic example of this in play, and while they do provide some basis
 * for comparing a small aspect of behavior (what I call the "how fast can this thing drive off a cliff"
 * comparison, or "peddle to the metal" testing), those results are not very useful for comparing load
 * carrying capacities for anything that actually needs to maintain some form of responsiveness SLA or
 * latency spectrum requirements.
 * <p>
 * Rules of thumb I'd start with (some simple DOs and DON'Ts):
 * </p>
 * <ol>
 * <li>DO measure max achievable throughput, but DON'T get focused on it as the main or single axis of measurement /
 * comparison.
 * </li>
 * <li>DO measure response time / latency behaviors across a spectrum of attempted load levels (e.g. at attempted loads
 * between 2% to 100%+ of max established thoughout).</li>
 * <li>DO measure the response time / latency spectrum for each tested load (even for max throughout, for which response
 * time should linearly grow with test length, or the test is wrong). HdrHistogram is one good way to capture this
 * information.</li>
 * <li>DO make sure you are measuring response time correctly and labeling it right. If you also measure and report
 * service time, label it as such (don't call it "latency").</li>
 * <li>DO compare response time / latency spectrum at given loads.</li>
 * <li>DO [repeatedly] sanity check and calibrate the benchmark setup to verify that it produces expected results for
 * known forced scenarios. E.g. forced pauses of known size via ^Z or SIGSTOP/SIGCONT should produce expected response
 * time percentile levels. Attempting to load at >100% than achieved throughput should result in response time / latency
 * measurements that grow with benchmark run length, while service time (if measured) should remain fairly flat well
 * past saturation.</li>
 * <li>DON'T use or report standard deviation for latency. Ever. Except if you mean it as a joke.</li>
 * <li>DON'T use average latency as a way to compare things with one another. [use median or 90%'ile instead, if what
 * you want to compare is "common case" latencies]. Consider not reporting avg. at all.</li>
 * <li>DON'T compare results of different setups or loads from short runs (< 20-30 minutes).</li>
 * <li>DON'T include process warmup behavior (e.g. 1st minute and 1st 50K messages) in compared or reported
 * results.</li>
 * </ol>
 * <p.
 * See  <a href="https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ">https://groups.google.com/d/msg/mechanical-sympathy/0gaBXxFm4hE/O9QomwHIJAAJ</a>
 * for some concrete visual examples.
 */
// RUN
// java -XX:+UnlockDiagnosticVMOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution
// -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCApplicationStoppedTime -XX:GuaranteedSafepointInterval=500000
// -XX:CompileCommand=dontinline,org.apache.logging.log4j.core.async.perftest.NoOpIdleStrategy::idle
// -cp HdrHistogram-2.1.8.jar:disruptor-3.3.4.jar:log4j-api-2.6-SNAPSHOT.jar:log4j-core-2.6-SNAPSHOT.jar:log4j-core-2.6-SNAPSHOT-tests.jar
// -DAsyncLogger.WaitStrategy=busyspin -DLog4jContextSelector=org.apache.logging.log4j.core.async.AsyncLoggerContextSelector
// -Dlog4j2.enable.threadlocals=true -Dlog4j2.enable.direct.encoders=true
//  -Xms1G -Xmx1G org.apache.logging.log4j.core.async.perftest.ResponseTimeTest 1 100000
//
// RUN recording in Java Flight Recorder:
// %JAVA_HOME%\bin\java -XX:+UnlockCommercialFeatures -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+FlightRecorder -XX:StartFlightRecording=duration=10m,filename=replayStats-2.6-latency.jfr -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCApplicationStoppedTime -XX:CompileCommand=dontinline,org.apache.logging.log4j.core.async.perftest.NoOpIdleStrategy::idle -DAsyncLogger.WaitStrategy=yield  -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE -cp .;HdrHistogram-2.1.8.jar;disruptor-3.3.4.jar;log4j-api-2.6-SNAPSHOT.jar;log4j-core-2.6-SNAPSHOT.jar;log4j-core-2.6-SNAPSHOT-tests.jar org.apache.logging.log4j.core.async.perftest.ResponseTimeTest 1 50000
public class ResponseTimeTest {
    private static final String LATENCY_MSG = new String(new char[64]);

    public static void main(final String[] args) throws Exception {
        if (args.length < 2) {
            System.out.println("Please specify thread count, target throughput (msg/sec) " +
                    "and logger library (Log4j1, Log4j2, Logback, JUL)");
            return;
        }
        final int threadCount = Integer.parseInt(args[0]);
        final double loadMessagesPerSec = Double.parseDouble(args[1]);
        final String loggerLib = args.length > 2 ? args[2] : "Log4j2";

        // print to console if ringbuffer is full
        System.setProperty("log4j2.AsyncQueueFullPolicy", PrintingAsyncQueueFullPolicy.class.getName());
        System.setProperty("AsyncLogger.RingBufferSize", String.valueOf(256 * 1024));
        //System.setProperty("Log4jContextSelector", AsyncLoggerContextSelector.class.getName());
        //System.setProperty("log4j.configurationFile", "perf3PlainNoLoc.xml");
        if (System.getProperty("AsyncLogger.WaitStrategy") == null) {
            System.setProperty("AsyncLogger.WaitStrategy", "Yield");
        }
        //for (Object key : System.getProperties().keySet()) {
        //    System.out.println(key + "=" + System.getProperty((String) key));
        //}

        // initialize the logger
        final String wrapper = loggerLib.startsWith("Run") ? loggerLib : "Run" + loggerLib;
        final String loggerWrapperClass = "org.apache.logging.log4j.core.async.perftest." + wrapper;
        final IPerfTestRunner logger = Loader.newCheckedInstanceOf(loggerWrapperClass, IPerfTestRunner.class);
        logger.log("Starting..."); // ensure initialized
        Thread.sleep(100);

        final int requiredProcessors = threadCount + 1 + 1; // producers + 1 consumer + 1 for OS
        final IdleStrategy idleStrategy = Runtime.getRuntime().availableProcessors() > requiredProcessors
                ? new NoOpIdleStrategy()
                : new YieldIdleStrategy();

        System.out.printf("%s: %d threads, load is %,f msg/sec, using %s%n", loggerLib, threadCount,
                loadMessagesPerSec, idleStrategy.getClass().getSimpleName());

        // Warmup: run as many iterations of 50,000 calls to logger.log as we can in 1 minute
        final long WARMUP_DURATION_MILLIS = TimeUnit.MINUTES.toMillis(1);
        final List<Histogram> warmupServiceTmHistograms = new ArrayList<>(threadCount);
        final List<Histogram> warmupResponseTmHistograms = new ArrayList<>(threadCount);

        final int WARMUP_COUNT = 50000 / threadCount;
        runLatencyTest(logger, WARMUP_DURATION_MILLIS, WARMUP_COUNT, loadMessagesPerSec, idleStrategy,
                warmupServiceTmHistograms, warmupResponseTmHistograms, threadCount);
        System.out.println("-----------------Warmup done. load=" + loadMessagesPerSec);
        if (!Constants.ENABLE_DIRECT_ENCODERS || !Constants.ENABLE_THREADLOCALS) {
            //System.gc();
            //Thread.sleep(5000);
        }
        System.out.println("-----------------Starting measured run. load=" + loadMessagesPerSec);

        final long start = System.currentTimeMillis();
        final List<Histogram> serviceTmHistograms = new ArrayList<>(threadCount);
        final List<Histogram> responseTmHistograms = new ArrayList<>(threadCount);
        PrintingAsyncQueueFullPolicy.ringbufferFull.set(0);

        // Actual test: run as many iterations of 1,000,000 calls to logger.log as we can in 4 minutes.
        final long TEST_DURATION_MILLIS = TimeUnit.MINUTES.toMillis(4);
        final int COUNT = (1000 * 1000) / threadCount;
        runLatencyTest(logger, TEST_DURATION_MILLIS, COUNT, loadMessagesPerSec, idleStrategy, serviceTmHistograms,
                responseTmHistograms, threadCount);
        logger.shutdown();
        final long end = System.currentTimeMillis();

        // ... and report the results
        final Histogram resultServiceTm = createResultHistogram(serviceTmHistograms, start, end);
        resultServiceTm.outputPercentileDistribution(System.out, 1000.0);
        writeToFile("s", resultServiceTm, (int) (loadMessagesPerSec / 1000), 1000.0);

        final Histogram resultResponseTm = createResultHistogram(responseTmHistograms, start, end);
        resultResponseTm.outputPercentileDistribution(System.out, 1000.0);
        writeToFile("r", resultResponseTm, (int) (loadMessagesPerSec / 1000), 1000.0);

        System.out.printf("%n%s: %d threads, load %,f msg/sec, ringbuffer full=%d%n", loggerLib, threadCount,
                loadMessagesPerSec, PrintingAsyncQueueFullPolicy.ringbufferFull.get());
        System.out.println("Test duration: " + (end - start) / 1000.0 + " seconds");
    }

    private static void writeToFile(final String suffix, final Histogram hist, final int thousandMsgPerSec,
            final double scale) throws IOException {
        try (PrintStream pout = new PrintStream(new FileOutputStream(thousandMsgPerSec + "k" + suffix))) {
            hist.outputPercentileDistribution(pout, scale);
        }
    }

    private static Histogram createResultHistogram(final List<Histogram> list, final long start, final long end) {
        final Histogram result = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
        result.setStartTimeStamp(start);
        result.setEndTimeStamp(end);
        for (final Histogram hist : list) {
            result.add(hist);
        }
        return result;
    }

    public static void runLatencyTest(final IPerfTestRunner logger, final long durationMillis, final int samples,
            final double loadMessagesPerSec, final IdleStrategy idleStrategy,
            final List<Histogram> serviceTmHistograms, final List<Histogram> responseTmHistograms,
            final int threadCount) throws InterruptedException {

        final Thread[] threads = new Thread[threadCount];
        final CountDownLatch LATCH = new CountDownLatch(threadCount);
        for (int i = 0; i < threadCount; i++) {
            final Histogram serviceTmHist = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
            final Histogram responseTmHist = new Histogram(TimeUnit.SECONDS.toNanos(10), 3);
            serviceTmHistograms.add(serviceTmHist);
            responseTmHistograms.add(responseTmHist);

            threads[i] = new Thread("latencytest-" + i) {
                @Override
                public void run() {
                    LATCH.countDown();
                    try {
                        LATCH.await(); // wait until all threads are ready to go
                    } catch (final InterruptedException e) {
                        interrupt();
                        return;
                    }
                    final long endTimeMillis = System.currentTimeMillis() + durationMillis;
                    do {
                        final Pacer pacer = new Pacer(loadMessagesPerSec, idleStrategy);
                        runLatencyTest(samples, logger, serviceTmHist, responseTmHist, pacer);
                    } while (System.currentTimeMillis() < endTimeMillis);
                }
            };
            threads[i].start();
        }
        for (int i = 0; i < threadCount; i++) {
            threads[i].join();
        }
    }

    private static void runLatencyTest(final int samples, final IPerfTestRunner logger, final Histogram serviceTmHist,
            final Histogram responseTmHist, final Pacer pacer) {

        for (int i = 0; i < samples; i++) {
            final long expectedStartTimeNanos = pacer.expectedNextOperationNanoTime();
            pacer.acquire(1);
            final long actualStartTime = System.nanoTime();
            logger.log(LATENCY_MSG);
            final long doneTime = System.nanoTime();
            serviceTmHist.recordValue(doneTime - actualStartTime);
            responseTmHist.recordValue(doneTime - expectedStartTimeNanos);
        }
    }

    public static class PrintingAsyncQueueFullPolicy extends DefaultAsyncQueueFullPolicy {
        static AtomicLong ringbufferFull = new AtomicLong();

        @Override
        public EventRoute getRoute(final long backgroundThreadId, final Level level) {
            ringbufferFull.incrementAndGet();
            System.out.print('!');
            return super.getRoute(backgroundThreadId, level);
        }
    }

    /**
     * Pacer determines the pace at which measurements are taken. Sample usage:
     * <p/>
     * <pre>
     * - each thread has a Pacer instance
     * - at start of test, call pacer.setInitialStartTime(System.nanoTime());
     * - loop:
     *   - store result of pacer.expectedNextOperationNanoTime() as expectedStartTime
     *   - pacer.acquire(1);
     *   - before the measured operation: store System.nanoTime() as actualStartTime
     *   - perform the measured operation
     *   - store System.nanoTime() as doneTime
     *   - serviceTimeHistogram.recordValue(doneTime - actualStartTime);
     *   - responseTimeHistogram.recordValue(doneTime - expectedStartTime);
     * </pre>
     * <p>
     * Borrowed with permission from Gil Tene's Cassandra stress test:
     * https://github.com/LatencyUtils/cassandra-stress2/blob/trunk/tools/stress/src/org/apache/cassandra/stress/StressAction.java#L374
     * </p>
     */
    static class Pacer {
        private long initialStartTime;
        private double throughputInUnitsPerNsec;
        private long unitsCompleted;

        private boolean caughtUp = true;
        private long catchUpStartTime;
        private long unitsCompletedAtCatchUpStart;
        private double catchUpThroughputInUnitsPerNsec;
        private double catchUpRateMultiple;
        private final IdleStrategy idleStrategy;

        public Pacer(final double unitsPerSec, final IdleStrategy idleStrategy) {
            this(unitsPerSec, 3.0, idleStrategy); // Default to catching up at 3x the set throughput
        }

        public Pacer(final double unitsPerSec, final double catchUpRateMultiple, final IdleStrategy idleStrategy) {
            this.idleStrategy = idleStrategy;
            setThroughout(unitsPerSec);
            setCatchupRateMultiple(catchUpRateMultiple);
            initialStartTime = System.nanoTime();
        }

        public void setInitialStartTime(final long initialStartTime) {
            this.initialStartTime = initialStartTime;
        }

        public void setThroughout(final double unitsPerSec) {
            throughputInUnitsPerNsec = unitsPerSec / 1000000000.0;
            catchUpThroughputInUnitsPerNsec = catchUpRateMultiple * throughputInUnitsPerNsec;
        }

        public void setCatchupRateMultiple(final double multiple) {
            catchUpRateMultiple = multiple;
            catchUpThroughputInUnitsPerNsec = catchUpRateMultiple * throughputInUnitsPerNsec;
        }

        /**
         * @return the time for the next operation
         */
        public long expectedNextOperationNanoTime() {
            return initialStartTime + (long) (unitsCompleted / throughputInUnitsPerNsec);
        }

        public long nsecToNextOperation() {

            final long now = System.nanoTime();

            long nextStartTime = expectedNextOperationNanoTime();

            boolean sendNow = true;

            if (nextStartTime > now) {
                // We are on pace. Indicate caught_up and don't send now.}
                caughtUp = true;
                sendNow = false;
            } else {
                // We are behind
                if (caughtUp) {
                    // This is the first fall-behind since we were last caught up
                    caughtUp = false;
                    catchUpStartTime = now;
                    unitsCompletedAtCatchUpStart = unitsCompleted;
                }

                // Figure out if it's time to send, per catch up throughput:
                final long unitsCompletedSinceCatchUpStart =
                        unitsCompleted - unitsCompletedAtCatchUpStart;

                nextStartTime = catchUpStartTime +
                        (long) (unitsCompletedSinceCatchUpStart / catchUpThroughputInUnitsPerNsec);

                if (nextStartTime > now) {
                    // Not yet time to send, even at catch-up throughout:
                    sendNow = false;
                }
            }

            return sendNow ? 0 : (nextStartTime - now);
        }

        /**
         * Will wait for next operation time. After this the expectedNextOperationNanoTime() will move forward.
         *
         * @param unitCount
         */
        public void acquire(final long unitCount) {
            final long nsecToNextOperation = nsecToNextOperation();
            if (nsecToNextOperation > 0) {
                sleepNs(nsecToNextOperation);
            }
            unitsCompleted += unitCount;
        }

        private void sleepNs(final long ns) {
            long now = System.nanoTime();
            final long deadline = now + ns;
            while ((now = System.nanoTime()) < deadline) {
                idleStrategy.idle();
            }
        }
    }
}