/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.basis.util; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; /** * Simple benchmark that mostly serves as an example. It measures the throughput of several possible * methods of bit reversal. The default settings provided in the annotations are super-fast/ rough, * and it is also worth noting that the added memory requirements of the Bytes.reverseBits() method * are not accounted for -- the CPU cache misses and flushes produced by it are outside of the scope * of these benchmarks. That said, depending on your JVM, hardware, etc, you may discover that a few * of the other methods that do not impose ambiguous memory costs also perform better. This is an * operation that already performs insanely fast, but it is a good lesson in unexpected performance * properties. */ @BenchmarkMode(Mode.Throughput) // measure as ops/ time_unit @OutputTimeUnit(TimeUnit.MICROSECONDS) // time_unit is microseconds @Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) // how long to warm up the jvm @Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) // how many runs to average over @Fork(1) // how many JVM forks per test; measurements are run per fork @Threads(1) // how many threads to run concurrently; thread count is per test -- not shared @State(Scope.Thread) // treat this enclosing class as a State object that can be used in tests public class BitReversals { /** * To run this benchmark, do 'mvn clean package' from the bench directory, and then either * * use the default JMH main class (it takes a regex of benchmark names): * 'java -jar target/microbenchmarks.jar ".*BitReversals.*"' * * call this main method instead or use the code therein to start it programmatically * eg. 'java -cp target/microbenchmarks.jar com.addthis.basis.util.BitReversals' */ public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + BitReversals.class.getSimpleName() + ".*") .build(); new Runner(opt).run(); } /** our only actual state is this int we will increment to provide a bit of input variety */ int inty = 0; /** * Each method marked with @Benchmark becomes a separate benchmark with its own warm ups, * measurements, forks, threads, and states. The static methods could be inlined -- the * extra method call in the first two would likely be inlined by the jit near instantly * anyway. However, this was easier to test and organize at the time. */ @Benchmark public int reverseJdk() { inty += 1; return Integer.reverse(inty); } @Benchmark public int reverseBasis() { inty += 1; return Bytes.reverseBits(inty); } @Benchmark public int stackReverse() { inty += 1; return stackOverflowMethod(inty); } @Benchmark public int stanfordParallelReverse() { inty += 1; return stanfordParallelMethod(inty); } @Benchmark public int stanfordSevenOpReverse() { inty += 1; return stanfordSevenOpMethod(inty); } @Benchmark public int stanfordMultiplyReverse() { inty += 1; return stanfordMultiplyMethod(inty); } @Benchmark public int loopReverse() { inty += 1; return loopMethod(inty); } public static int loopMethod(int v1) { int s = 32; int mask = ~0; while ((s >>= 1) > 0) { mask ^= (mask << s); v1 = ((v1 >> s) & mask) | ((v1 << s) & ~mask); } return v1; } public static int stanfordSevenOpMethod(int v1) { int out = ((( (((v1 & 0xFF) * 0x0802) & 0x22110) | (((v1 & 0xFF) * 0x08020) & 0x88440)) * 0x10101) & 0xFF0000) << 8; out |= (( ((((v1 >>> 8) & 0xFF) * 0x0802) & 0x22110) | ((((v1 >>> 8) & 0xFF) * 0x08020) & 0x88440)) * 0x10101) & 0xFF0000; out |= ((( ((((v1 >>> 16) & 0xFF) * 0x0802) & 0x22110) | ((((v1 >>> 16) & 0xFF) * 0x08020) & 0x88440)) * 0x10101) & 0xFF0000) >>> 8; out |= ((( ((((v1 >>> 24) & 0xFF) * 0x0802) & 0x22110) | ((((v1 >>> 24) & 0xFF) * 0x0802) & 0x88440)) * 0x10101) & 0xFF0000) >>> 16; return out; } public static int stanfordMultiplyMethod(long v1) { return (int) (((((((v1 & 0xFF) * 0x80200802L) & 0x0884422110L) * 0x0101010101L) & 0xFF00000000L) >>> 8) | (((((((v1 >>> 8) & 0xFF) * 0x80200802L) & 0x0884422110L) * 0x0101010101L) & 0xFF00000000L) >>> 16) | (((((((v1 >>> 16) & 0xFF) * 0x80200802L) & 0x0884422110L) * 0x0101010101L) & 0xFF00000000L) >>> 24) | (((((((v1 >>> 24) & 0xFF) * 0x80200802L) & 0x0884422110L) * 0x0101010101L) & 0xFF00000000L) >>> 32)); } public static int stanfordParallelMethod(int v) { // swap odd and even bits v = ((v >>> 1) & 0x55555555) | ((v & 0x55555555) << 1); // swap consecutive pairs v = ((v >>> 2) & 0x33333333) | ((v & 0x33333333) << 2); // swap nibbles ... v = ((v >>> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); // swap bytes v = ((v >>> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); // swap 2-byte long pairs v = (v >>> 16) | (v << 16); return v; } public static int stackOverflowMethod(int x) { x = (x & 0x55555555) << 1 | (x & 0xAAAAAAAA) >>> 1; x = (x & 0x33333333) << 2 | (x & 0xCCCCCCCC) >>> 2; x = (x & 0x0F0F0F0F) << 4 | (x & 0xF0F0F0F0) >>> 4; x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >>> 8; x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >>> 16; return x; } }