/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.tdunning; import com.tdunning.math.stats.MergingDigest; import com.tdunning.math.stats.TDigest; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.results.format.ResultFormatType; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import java.util.Random; import java.util.concurrent.TimeUnit; /** * Explores the value of using a large buffer for the MergingDigest. The rationale is that the internal * sort is extremely fast while the merging function in the t-digest can be quite slow, if only because * computing the asin function involved in the merge is expensive. This argues for collecting more samples * before sorting and merging them into the digest. */ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) @Fork(1) @Threads(1) @State(Scope.Thread) public class MergeBench { private Random gen = new Random(); private double[] data; @Param({"50", "100", "200", "500"}) public int compression; @Param({"2", "5", "10", "20"}) public int factor; private TDigest td; @Setup public void setup() { data = new double[10000000]; for (int i = 0; i < data.length; i++) { data[i] = gen.nextDouble(); } td = new MergingDigest(compression, (factor + 1) * compression, 2 * compression); // First values are very cheap to add, we are more interested in the steady state, // when the summary is full. Summaries are expected to contain about 5*compression // centroids, hence the 5 factor for (int i = 0; i < 5 * compression; ++i) { td.add(gen.nextDouble()); } } @State(Scope.Thread) public static class ThreadState { int index = 0; } @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) public void add(ThreadState state) { if (state.index >= data.length) { state.index = 0; } td.add(data[state.index++]); } public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(MergeBench.class.getSimpleName()) .warmupIterations(5) .measurementIterations(5) .forks(1) .resultFormat(ResultFormatType.CSV) .build(); new Runner(opt).run(); } }