/* * Copyright 2014 Ben Manes. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.benmanes.caffeine; import java.util.Arrays; import java.util.concurrent.ThreadLocalRandom; import net.openhft.koloboke.collect.impl.hash.LHashSeparateKVLongIntMapFactoryImpl; import net.openhft.koloboke.collect.map.hash.HashLongIntMap; import net.openhft.koloboke.collect.map.hash.HashLongIntMapFactory; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import com.github.benmanes.caffeine.base.UnsafeAccess; /** * A comparison of different lookup approaches for indexes for a slot in a fixed-sized shared array. * This approach is used for elimination (threads backoff and rendezvous) and striping (reduced * contention when recording an update). * <p> * The obvious approach is to store a slot in a ThreadLocal, which is a hashmap stored within a * {@link java.lang.Thread} object. The current implementation has a poor ops/s, making it a poor * choice if accessed frequently in tight code. * <p> * The next approach might be to use a table lookup keyed by the thread's id or the thread * instance's hashCode. A trick used by Java's concurrent adders is to use the thread local * random's probe and update it after each usage to avoid pinning to a slot. These approaches try * to be fast and provide a good distribution. However, according to the jemalloc paper a * round-robin assignment provides the best load balancing strategy. * <p> * A round-robin assignment requires a mapping of the thread's id to its slot. This can be * implemented multiple ways such as using binary search, a hash table, or a sparse array. * * @author ben.manes@gmail.com (Ben Manes) */ @State(Scope.Benchmark) public class SlotLookupBenchmark { static final int ARENA_SIZE = 2 << 6; static final int SPARSE_SIZE = 2 << 14; ThreadLocal<Integer> threadLocal; long element; long[] array; long probeOffset; long index; HashLongIntMap mapping; int[] sparse; @Setup public void setupThreadLocal() { threadLocal = ThreadLocal.withInitial(() -> { for (int i = 0; i < ARENA_SIZE; i++) { // Populates the internal hashmap to emulate other thread local usages ThreadLocal.withInitial(Thread.currentThread()::getId); } return selectSlot(ThreadLocalRandom.current().nextInt()); }); } @Setup public void setupBinarySearch() { array = new long[ARENA_SIZE]; element = ThreadLocalRandom.current().nextLong(ARENA_SIZE); for (int i = 0; i < ARENA_SIZE; i++) { array[i] = selectSlot(i); } Arrays.sort(array); } @Setup public void setupStriped64() { probeOffset = UnsafeAccess.objectFieldOffset(Thread.class, "threadLocalRandomProbe"); } @Setup public void setupHashing() { long[] keys = new long[ARENA_SIZE]; int[] values = new int[ARENA_SIZE]; for (int i = 0; i < ARENA_SIZE; i++) { keys[i] = i; values[i] = selectSlot(i); } HashLongIntMapFactory factory = new LHashSeparateKVLongIntMapFactoryImpl(); mapping = factory.newImmutableMap(keys, values); index = ThreadLocalRandom.current().nextInt(ARENA_SIZE); } @Setup public void setupSparseArray() { sparse = new int[SPARSE_SIZE]; for (int i = 0; i < SPARSE_SIZE; i++) { sparse[i] = selectSlot(i); } } @Benchmark public int threadLocal() { // Emulates holding the arena slot in a thread-local return threadLocal.get(); } @Benchmark public int binarySearch() { // Emulates finding the arena slot by a COW mapping of thread ids return Arrays.binarySearch(array, element); } @Benchmark public int hashing() { // Emulates finding the arena slot by a COW mapping the thread id to a slot index return mapping.get(index); } @Benchmark public int sparseArray() { // Emulates having a COW sparse array mapping the thread id to a slot location return sparse[(int) Thread.currentThread().getId()]; } @Benchmark public int threadIdHash() { // Emulates finding the arena slot by hashing the thread id long id = Thread.currentThread().getId(); int hash = (((int) (id ^ (id >>> 32))) ^ 0x811c9dc5) * 0x01000193; return selectSlot(hash); } @Benchmark public int threadHashCode() { // Emulates finding the arena slot by the thread's hashCode long id = Thread.currentThread().hashCode(); int hash = (((int) (id ^ (id >>> 32))) ^ 0x811c9dc5) * 0x01000193; return selectSlot(hash); } @Benchmark public long striped64() { // Emulates finding the arena slot by reusing the thread-local random seed (j.u.c.a.Striped64) int hash = getProbe(); if (hash == 0) { ThreadLocalRandom.current(); // force initialization hash = getProbe(); } advanceProbe(hash); int index = selectSlot(hash); return array[index]; } private int getProbe() { return UnsafeAccess.UNSAFE.getInt(Thread.currentThread(), probeOffset); } private void advanceProbe(int probe) { probe ^= probe << 13; // xorshift probe ^= probe >>> 17; probe ^= probe << 5; UnsafeAccess.UNSAFE.putInt(Thread.currentThread(), probeOffset, probe); } private static int selectSlot(int i) { return i & (ARENA_SIZE - 1); } }