/** * Copyright 2011 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.hank.hasher; import com.liveramp.commons.util.BytesUtils; import java.nio.ByteBuffer; /** * This is a very fast, non-cryptographic hash suitable for general hash-based * lookup. See http://murmurhash.googlepages.com/ for more details. */ public final class Murmur64Hasher implements Hasher { /** * Note that we use a random initial seed here so that we are unlikely to get * the "same" MurmurHash as anyone else. (For a while, we were mistakenly * using the same exact hash in the Partitioner as the Hasher, leading to very * strange behavior.) */ private static final int INITIAL_SEED = 952336; public static long murmurHash64(final byte[] data, final int off, final int length, final int seed) { final long m = 0xc6a4a7935bd1e995L; final int r = 47; long h = seed ^ (length * m); final int remainder = length & 7; final int end = off + length - remainder; for (int i = off; i < end; i += 8) { long k = data[i + 7]; k = k << 8; k = k | (data[i + 6] & 0xff); k = k << 8; k = k | (data[i + 5] & 0xff); k = k << 8; k = k | (data[i + 4] & 0xff); k = k << 8; k = k | (data[i + 3] & 0xff); k = k << 8; k = k | (data[i + 2] & 0xff); k = k << 8; k = k | (data[i + 1] & 0xff); k = k << 8; k = k | (data[i + 0] & 0xff); k *= m; k ^= k >>> r; k *= m; h ^= k; h *= m; } switch (remainder) { case 7: h ^= (long) (data[end + 6] & 0xff) << 48; case 6: h ^= (long) (data[end + 5] & 0xff) << 40; case 5: h ^= (long) (data[end + 4] & 0xff) << 32; case 4: h ^= (long) (data[end + 3] & 0xff) << 24; case 3: h ^= (long) (data[end + 2] & 0xff) << 16; case 2: h ^= (long) (data[end + 1] & 0xff) << 8; case 1: h ^= (long) (data[end] & 0xff); h *= m; } h ^= h >>> r; h *= m; h ^= h >>> r; return h; } public static long murmurHash64(final byte[] data) { return murmurHash64(data, 0, data.length, INITIAL_SEED); } public static long murmurHash64(final ByteBuffer data) { return murmurHash64(data, INITIAL_SEED); } public static long murmurHash64(final ByteBuffer data, final int seed) { return murmurHash64(data.array(), data.arrayOffset() + data.position(), data.remaining(), seed); } @Override public void hash(ByteBuffer value, int hashSize, byte[] hashBytes) { int seed = INITIAL_SEED; long hashValue; for (int i = 0; i <= hashSize - 8; i += 8) { hashValue = murmurHash64(value, seed); seed = (int) hashValue; hashBytes[i] = (byte) ((hashValue >> 56) & 0xff); hashBytes[i + 1] = (byte) ((hashValue >> 48) & 0xff); hashBytes[i + 2] = (byte) ((hashValue >> 40) & 0xff); hashBytes[i + 3] = (byte) ((hashValue >> 32) & 0xff); hashBytes[i + 4] = (byte) ((hashValue >> 24) & 0xff); hashBytes[i + 5] = (byte) ((hashValue >> 16) & 0xff); hashBytes[i + 6] = (byte) ((hashValue >> 8) & 0xff); hashBytes[i + 7] = (byte) ((hashValue) & 0xff); } int shortHashBytes = hashSize % 8; if (shortHashBytes > 0) { hashValue = murmurHash64(value, seed); int off = hashSize - 1; switch (shortHashBytes) { case 7: hashBytes[off--] = (byte) ((hashValue >> 8) & 0xff); case 6: hashBytes[off--] = (byte) ((hashValue >> 16) & 0xff); case 5: hashBytes[off--] = (byte) ((hashValue >> 24) & 0xff); case 4: hashBytes[off--] = (byte) ((hashValue >> 32) & 0xff); case 3: hashBytes[off--] = (byte) ((hashValue >> 40) & 0xff); case 2: hashBytes[off--] = (byte) ((hashValue >> 48) & 0xff); case 1: hashBytes[off--] = (byte) ((hashValue >> 56) & 0xff); } } } @Override public String toString() { return Murmur64Hasher.class.getSimpleName(); } public static void main(String[] args) { int hashSize = Integer.valueOf(args[1]); byte[] result = new byte[hashSize]; new Murmur64Hasher().hash(BytesUtils.hexStringToBytes(args[0]), hashSize, result); System.out.println(BytesUtils.bytesToHexString(ByteBuffer.wrap(result))); } }