/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.ml.math; import java.nio.ByteBuffer; import java.nio.ByteOrder; /** * This is a very fast, non-cryptographic hash suitable for general hash-based lookup. * <p> * See http://murmurhash.googlepages.com/ for mre details.</p> */ public class MurmurHash { /** Hide it. */ private MurmurHash() { } /** * This produces exactly the same hash values as the final C+ version of MurmurHash3 and is * thus suitable for producing the same hash values across platforms. * <p> * The 32 bit x86 version of this hash should be the fastest variant for relatively short keys like IDs.</p> * <p> * Note - The x86 and x64 versions do _not_ produce the same results, as the algorithms are * optimized for their respective platforms.</p> * <p> * See also http://github.com/yonik/java_util for future updates to this method.</p> * * @param data Data to hash. * @param off Where to start munging. * @param len How many bytes to process. * @param seed The seed to start with. * @return 32 bit hash platform compatible with C++ MurmurHash3 implementation on x86. */ public static int hash3X86(byte[] data, int off, int len, int seed) { int c1 = 0xcc9e2d51; int c2 = 0x1b873593; int h1 = seed; int roundedEnd = off + (len & 0xfffffffc); // Round down to 4 byte block. for (int i = off; i < roundedEnd; i += 4) { int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24); k1 *= c1; k1 = (k1 << 15) | (k1 >>> 17); k1 *= c2; h1 ^= k1; h1 = (h1 << 13) | (h1 >>> 19); h1 = h1 * 5 + 0xe6546b64; } // Tail. int k1 = 0; switch (len & 0x03) { case 3: k1 = (data[roundedEnd + 2] & 0xff) << 16; // Fallthrough - WTF? case 2: k1 |= (data[roundedEnd + 1] & 0xff) << 8; // Fallthrough - WTF? case 1: k1 |= data[roundedEnd] & 0xff; k1 *= c1; k1 = (k1 << 15) | (k1 >>> 17); k1 *= c2; h1 ^= k1; default: } // Finalization. h1 ^= len; h1 ^= h1 >>> 16; h1 *= 0x85ebca6b; h1 ^= h1 >>> 13; h1 *= 0xc2b2ae35; h1 ^= h1 >>> 16; return h1; } /** * Hashes an int. * * @param data The int to hash. * @param seed The seed to start with. * @return The 32 bit hash of the bytes in question. */ public static int hash(int data, int seed) { byte[] arr = new byte[] { (byte)(data >>> 24), (byte)(data >>> 16), (byte)(data >>> 8), (byte)data }; return hash(ByteBuffer.wrap(arr), seed); } /** * Hashes bytes in an array. * * @param data The bytes to hash. * @param seed The seed to start with. * @return The 32 bit hash of the bytes in question. */ public static int hash(byte[] data, int seed) { return hash(ByteBuffer.wrap(data), seed); } /** * Hashes bytes in part of an array. * * @param data The data to hash. * @param off Where to start munging. * @param len How many bytes to process. * @param seed The seed to start with. * @return The 32-bit hash of the data in question. */ public static int hash(byte[] data, int off, int len, int seed) { return hash(ByteBuffer.wrap(data, off, len), seed); } /** * Hashes the bytes in a buffer from the current position to the limit. * * @param buf The bytes to hash. * @param seed The seed to start with. * @return The 32 bit murmur hash of the bytes in the buffer. */ public static int hash(ByteBuffer buf, int seed) { ByteOrder byteOrder = buf.order(); buf.order(ByteOrder.LITTLE_ENDIAN); int m = 0x5bd1e995; int r = 24; int h = seed ^ buf.remaining(); while (buf.remaining() >= 4) { int k = buf.getInt(); k *= m; k ^= k >>> r; k *= m; h *= m; h ^= k; } if (buf.remaining() > 0) { ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); finish.put(buf).rewind(); h ^= finish.getInt(); h *= m; } h ^= h >>> 13; h *= m; h ^= h >>> 15; buf.order(byteOrder); return h; } /** * @param data The data to hash. * @param seed The seed to start with. * @return Hash value for given data and seed. */ public static long hash64A(byte[] data, int seed) { return hash64A(ByteBuffer.wrap(data), seed); } /** * @param data The data to hash. * @param off Where to start munging. * @param len How many bytes to process. * @param seed The seed to start with. */ public static long hash64A(byte[] data, int off, int len, int seed) { return hash64A(ByteBuffer.wrap(data, off, len), seed); } /** * @param buf The data to hash. * @param seed The seed to start with. */ public static long hash64A(ByteBuffer buf, int seed) { ByteOrder byteOrder = buf.order(); buf.order(ByteOrder.LITTLE_ENDIAN); long m = 0xc6a4a7935bd1e995L; int r = 47; long h = seed ^ (buf.remaining() * m); while (buf.remaining() >= 8) { long k = buf.getLong(); k *= m; k ^= k >>> r; k *= m; h ^= k; h *= m; } if (buf.remaining() > 0) { ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); finish.put(buf).rewind(); h ^= finish.getLong(); h *= m; } h ^= h >>> r; h *= m; h ^= h >>> r; buf.order(byteOrder); return h; } }