package org.streaminer.util.hash; import java.io.UnsupportedEncodingException; /** * * @author Maycon Viana Bordin <mayconbordin@gmail.com> */ public class HashUtils { public static final int MOD = 2147483647; public static final int HL = 31; /** * return a hash of x using a and b mod (2^31 - 1) may need to do another mod * afterwards, or drop high bits depending on d, number of bad guys * 2^31 - 1 = 2147483647 * @param a * @param b * @param x * @return */ public static long hash31(long a, long b, long x) { long result = (a * x) + b; result = ((result >> HL) + result) & MOD; return result; } /** * returns values that are 4-wise independent by repeated calls to the * pairwise indpendent routine. * @param a * @param b * @param c * @param d * @param x * @return */ public static long fourwise(long a, long b, long c, long d, long x) { long result = hash31(hash31(hash31(x,a,b),x,c),x,d); return result; } // Murmur is faster than an SHA-based approach and provides as-good collision // resistance. The combinatorial generation approach described in // https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf // does prove to work in actual tests, and is obviously faster // than performing further iterations of murmur. public static int[] getHashBuckets(String key, int hashCount, int max) { byte[] b; try { b = key.getBytes("UTF-16"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } return getHashBuckets(b, hashCount, max); } static int[] getHashBuckets(byte[] b, int hashCount, int max) { int[] result = new int[hashCount]; int hash1 = MurmurHash.getInstance().hash(b, b.length, 0); int hash2 = MurmurHash.getInstance().hash(b, b.length, hash1); for (int i = 0; i < hashCount; i++) { result[i] = Math.abs((hash1 + i * hash2) % max); } return result; } }