HashUtil.java example

Explorer
haze-master
- hazelcast-master
/*
 * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.util;

import com.hazelcast.internal.memory.ByteAccessStrategy;
import com.hazelcast.internal.memory.MemoryAccessor;
import com.hazelcast.internal.memory.impl.EndiannessUtil;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import java.util.Arrays;

import static com.hazelcast.internal.memory.GlobalMemoryAccessorRegistry.MEM;
import static com.hazelcast.util.Preconditions.checkPositive;
import static java.lang.Math.abs;

/**
 * Utility methods related to hash tables.
 */
@SuppressFBWarnings({"SF_SWITCH_FALLTHROUGH", "SF_SWITCH_NO_DEFAULT"})
@SuppressWarnings({
        "checkstyle:magicnumber",
        "checkstyle:methodname",
        "checkstyle:fallthrough",
        "checkstyle:cyclomaticcomplexity",
        "checkstyle:booleanexpressioncomplexity",
        "checkstyle:methodlength"})
public final class HashUtil {

    private static final int MURMUR32_BLOCK_SIZE = 4;
    private static final int MURMUR64_BLOCK_SIZE = 16;
    private static final int DEFAULT_MURMUR_SEED = 0x01000193;
    private static final int[] PERTURBATIONS = new int[Integer.SIZE];

    private static final LoadStrategy<byte[]> BYTE_ARRAY_LOADER = new ByteArrayLoadStrategy();
    private static final LoadStrategy<MemoryAccessor> WIDE_DIRECT_LOADER = new WideDirectLoadStrategy();
    private static final LoadStrategy<MemoryAccessor> NARROW_DIRECT_LOADER = new NarrowDirectLoadStrategy();

    static {
        final int primeDisplacement = 17;
        for (int i = 0; i < PERTURBATIONS.length; i++) {
            PERTURBATIONS[i] = MurmurHash3_fmix(primeDisplacement + i);
        }
    }

    private HashUtil() {
    }

    /**
     * Returns the MurmurHash3_x86_32 hash of a block inside a byte array.
     */
    public static int MurmurHash3_x86_32(byte[] data, int offset, int len) {
        final long endIndex = (long) offset + len - 1;
        assert endIndex >= Integer.MIN_VALUE && endIndex <= Integer.MAX_VALUE
                : String.format("offset %,d len %,d would cause int overflow", offset, len);
        return MurmurHash3_x86_32(BYTE_ARRAY_LOADER, data, offset, len, DEFAULT_MURMUR_SEED);
    }

    public static int MurmurHash3_x86_32_direct(long base, int offset, int len) {
        return MurmurHash3_x86_32_direct(MEM, base, offset, len);
    }

    /**
     * Returns the {@code MurmurHash3_x86_32} hash of a memory block accessed by the provided {@link MemoryAccessor}.
     * The {@code MemoryAccessor} will be used to access {@code int}-sized data at addresses {@code (base + offset)},
     * {@code (base + offset + 4)}, etc. The caller must ensure that the {@code MemoryAccessor} supports it, especially
     * when {@code (base + offset)} is not guaranteed to be 4 byte-aligned.
     */
    public static int MurmurHash3_x86_32_direct(MemoryAccessor mem, long base, int offset, int len) {
        return MurmurHash3_x86_32(mem.isBigEndian() ? NARROW_DIRECT_LOADER : WIDE_DIRECT_LOADER,
                mem, base + offset, len, DEFAULT_MURMUR_SEED);
    }

    private static <R> int MurmurHash3_x86_32(LoadStrategy<R> loader, R resource, long offset, int len, int seed) {
        // (len & ~(MURMUR32_BLOCK_SIZE - 1)) is the length rounded down to the Murmur32 block size boundary
        final long tailStart = offset + (len & ~(MURMUR32_BLOCK_SIZE - 1));

        int c1 = 0xcc9e2d51;
        int c2 = 0x1b873593;

        int h1 = seed;

        for (long blockAddr = offset; blockAddr < tailStart; blockAddr += MURMUR32_BLOCK_SIZE) {
            // little-endian load order
            int k1 = loader.getInt(resource, blockAddr);
            k1 *= c1;
            // ROTL32(k1,15);
            k1 = (k1 << 15) | (k1 >>> 17);
            k1 *= c2;

            h1 ^= k1;
            // ROTL32(h1,13);
            h1 = (h1 << 13) | (h1 >>> 19);
            h1 = h1 * 5 + 0xe6546b64;
        }

        // tail
        int k1 = 0;

        switch (len & 0x03) {
            case 3:
                k1 = (loader.getByte(resource, tailStart + 2) & 0xff) << 16;
                // fallthrough
            case 2:
                k1 |= (loader.getByte(resource, tailStart + 1) & 0xff) << 8;
                // fallthrough
            case 1:
                k1 |= loader.getByte(resource, tailStart) & 0xff;
                k1 *= c1;
                // ROTL32(k1,15);
                k1 = (k1 << 15) | (k1 >>> 17);
                k1 *= c2;
                h1 ^= k1;
            default:
        }

        // finalization
        h1 ^= len;
        h1 = MurmurHash3_fmix(h1);
        return h1;
    }

    /**
     * Returns the MurmurHash3_x86_32 hash of a block inside a byte array.
     */
    public static long MurmurHash3_x64_64(byte[] data, int offset, int len) {
        return MurmurHash3_x64_64(BYTE_ARRAY_LOADER, data, offset, len, DEFAULT_MURMUR_SEED);
    }

    public static long MurmurHash3_x64_64_direct(long base, int offset, int len) {
        return MurmurHash3_x64_64_direct(MEM, base, offset, len);
    }

    /**
     * Returns the {@code MurmurHash3_x64_64} hash of a memory block accessed by the provided {@link MemoryAccessor}.
     * The {@code MemoryAccessor} will be used to access {@code long}-sized data at addresses {@code (base + offset)},
     * {@code (base + offset + 8)}, etc. The caller must ensure that the {@code MemoryAccessor} supports it, especially
     * when {@code (base + offset)} is not guaranteed to be 8 byte-aligned.
     */
    public static long MurmurHash3_x64_64_direct(MemoryAccessor mem, long base, int offset, int len) {
        return MurmurHash3_x64_64(mem.isBigEndian() ? NARROW_DIRECT_LOADER : WIDE_DIRECT_LOADER,
                mem, base + offset, len, DEFAULT_MURMUR_SEED);
    }

    static <R> long MurmurHash3_x64_64(LoadStrategy<R> loader, R resource, long offset, int len) {
        return MurmurHash3_x64_64(loader, resource, offset, len, DEFAULT_MURMUR_SEED);
    }

    static <R> long MurmurHash3_x64_64(LoadStrategy<R> loader, R resource, long offset, int len, final int seed) {

        // (len & ~(MURMUR64_BLOCK_SIZE - 1)) is the length rounded down to the Murmur64 block boundary
        final long tailStart = offset + (len & ~(MURMUR64_BLOCK_SIZE - 1));

        long h1 = 0x9368e53c2f6af274L ^ seed;
        long h2 = 0x586dcd208f7cd3fdL ^ seed;

        long c1 = 0x87c37b91114253d5L;
        long c2 = 0x4cf5ad432745937fL;

        long k1;
        long k2;

        for (long blockAddr = offset; blockAddr < tailStart; blockAddr += MURMUR64_BLOCK_SIZE) {
            k1 = loader.getLong(resource, blockAddr);
            k2 = loader.getLong(resource, blockAddr + 8);
            // bmix(state);
            k1 *= c1;
            k1 = (k1 << 23) | (k1 >>> 64 - 23);
            k1 *= c2;
            h1 ^= k1;
            h1 += h2;

            h2 = (h2 << 41) | (h2 >>> 64 - 41);

            k2 *= c2;
            k2 = (k2 << 23) | (k2 >>> 64 - 23);
            k2 *= c1;
            h2 ^= k2;
            h2 += h1;

            h1 = h1 * 3 + 0x52dce729;
            h2 = h2 * 3 + 0x38495ab5;

            c1 = c1 * 5 + 0x7b7d159c;
            c2 = c2 * 5 + 0x6bce6396;
        }

        k1 = 0;
        k2 = 0;

        switch (len & 15) {
            case 15:
                k2 ^= (long) loader.getByte(resource, tailStart + 14) << 48;
            case 14:
                k2 ^= (long) loader.getByte(resource, tailStart + 13) << 40;
            case 13:
                k2 ^= (long) loader.getByte(resource, tailStart + 12) << 32;
            case 12:
                k2 ^= (long) loader.getByte(resource, tailStart + 11) << 24;
            case 11:
                k2 ^= (long) loader.getByte(resource, tailStart + 10) << 16;
            case 10:
                k2 ^= (long) loader.getByte(resource, tailStart + 9) << 8;
            case 9:
                k2 ^= loader.getByte(resource, tailStart + 8);

            case 8:
                k1 ^= (long) loader.getByte(resource, tailStart + 7) << 56;
            case 7:
                k1 ^= (long) loader.getByte(resource, tailStart + 6) << 48;
            case 6:
                k1 ^= (long) loader.getByte(resource, tailStart + 5) << 40;
            case 5:
                k1 ^= (long) loader.getByte(resource, tailStart + 4) << 32;
            case 4:
                k1 ^= (long) loader.getByte(resource, tailStart + 3) << 24;
            case 3:
                k1 ^= (long) loader.getByte(resource, tailStart + 2) << 16;
            case 2:
                k1 ^= (long) loader.getByte(resource, tailStart + 1) << 8;
            case 1:
                k1 ^= loader.getByte(resource, tailStart);

                // bmix();
                k1 *= c1;
                k1 = (k1 << 23) | (k1 >>> 64 - 23);
                k1 *= c2;
                h1 ^= k1;
                h1 += h2;

                h2 = (h2 << 41) | (h2 >>> 64 - 41);

                k2 *= c2;
                k2 = (k2 << 23) | (k2 >>> 64 - 23);
                k2 *= c1;
                h2 ^= k2;
                h2 += h1;

                h1 = h1 * 3 + 0x52dce729;
                h2 = h2 * 3 + 0x38495ab5;
            default:
        }

        h2 ^= len;

        h1 += h2;
        h2 += h1;

        h1 = MurmurHash3_fmix(h1);
        h2 = MurmurHash3_fmix(h2);

        return h1 + h2;
    }

    public static int MurmurHash3_fmix(int k) {
        k ^= k >>> 16;
        k *= 0x85ebca6b;
        k ^= k >>> 13;
        k *= 0xc2b2ae35;
        k ^= k >>> 16;
        return k;
    }

    public static long MurmurHash3_fmix(long k) {
        k ^= k >>> 33;
        k *= 0xff51afd7ed558ccdL;
        k ^= k >>> 33;
        k *= 0xc4ceb9fe1a85ec53L;
        k ^= k >>> 33;
        return k;
    }

    /**
     * Hash function based on Knuth's multiplicative method. This version is faster than using Murmur hash but provides
     * acceptable behavior.
     *
     * @param k the long for which the hash will be calculated
     * @return the hash
     */
    public static long fastLongMix(long k) {
        // phi = 2^64 / goldenRatio
        final long phi = 0x9E3779B97F4A7C15L;
        long h = k * phi;
        h ^= h >>> 32;
        return h ^ (h >>> 16);
    }

    /**
     * Hash function based on Knuth's multiplicative method. This version is faster than using Murmur hash but provides
     * acceptable behavior.
     *
     * @param k the integer for which the hash will be calculated
     * @return the hash
     */
    public static int fastIntMix(int k) {
        // phi = 2^32 / goldenRatio
        final int phi = 0x9E3779B9;
        final int h = k * phi;
        return h ^ (h >>> 16);
    }

    /**
     * Hash code for multiple objects using {@link Arrays#hashCode(Object[])}.
     */
    public static int hashCode(Object... objects) {
        return Arrays.hashCode(objects);
    }

    /**
     * A function that calculates the index (e.g. to be used in an array/list) for a given hash. The returned value will always
     * be equal or larger than 0 and will always be smaller than 'length'.
     *
     * The reason this function exists is to deal correctly with negative and especially the Integer.MIN_VALUE; since that can't
     * be used safely with a Math.abs function.
     *
     * @param length the length of the array/list
     * @return the mod of the hash
     * @throws IllegalArgumentException if mod smaller than 1.
     */
    public static int hashToIndex(int hash, int length) {
        checkPositive(length, "mod must be larger than 0");

        if (hash == Integer.MIN_VALUE) {
            hash = 0;
        } else {
            hash = abs(hash);
        }

        return hash % length;
    }

    /**
     * Computes the key perturbation value applied before hashing. The returned value
     * should be non-zero and different for each capacity. This matters because
     * keys are nearly ordered by hashcode so when adding one container's
     * values to the other, the number of collisions can skyrocket into the worst case
     * possible.
     * <p>
     * If it is known that hash containers will not be added to each other
     * (will be used for counting only, for example) then some speed can be gained by
     * not perturbing keys before hashing and returning a value of zero for all possible
     * capacities. The speed gain is a result of faster rehash operation (keys are mostly
     * in order).
     */
    public static int computePerturbationValue(int capacity) {
        return PERTURBATIONS[Integer.numberOfLeadingZeros(capacity)];
    }

    abstract static class LoadStrategy<R> implements ByteAccessStrategy<R> {

        abstract int getInt(R resource, long offset);

        abstract long getLong(R resource, long offset);

        @Override
        public final void putByte(R resource, long offset, byte value) {
        }
    }

    private static final class ByteArrayLoadStrategy extends LoadStrategy<byte[]> {

        @Override
        public int getInt(byte[] buf, long offset) {
            return EndiannessUtil.readIntL(this, buf, offset);
        }

        @Override
        public long getLong(byte[] buf, long offset) {
            return EndiannessUtil.readLongL(this, buf, offset);
        }

        @Override
        public byte getByte(byte[] buf, long offset) {
            return buf[(int) offset];
        }
    }

    private static final class WideDirectLoadStrategy extends LoadStrategy<MemoryAccessor> {

        @Override
        public int getInt(MemoryAccessor mem, long offset) {
            return mem.getInt(offset);
        }

        @Override
        public long getLong(MemoryAccessor mem, long offset) {
            return mem.getLong(offset);
        }

        @Override
        public byte getByte(MemoryAccessor mem, long offset) {
            return mem.getByte(offset);
        }
    }

    private static final class NarrowDirectLoadStrategy extends LoadStrategy<MemoryAccessor> {

        @Override
        public int getInt(MemoryAccessor mem, long offset) {
            return EndiannessUtil.readIntL(this, mem, offset);
        }

        @Override
        public long getLong(MemoryAccessor mem, long offset) {
            return EndiannessUtil.readLongL(this, mem, offset);
        }

        @Override
        public byte getByte(MemoryAccessor mem, long offset) {
            return mem.getByte(offset);
        }
    }
}