/** * Copyright 2014 Sunny Gleason and original author or authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.kazuki.v0.internal.hash; import static io.kazuki.v0.internal.hash.LongHashMethods.gatherIntLE; import static io.kazuki.v0.internal.hash.LongHashMethods.gatherLongLE; import static io.kazuki.v0.internal.hash.LongHashMethods.gatherPartialIntLE; import static io.kazuki.v0.internal.hash.LongHashMethods.gatherPartialLongLE; import java.io.UnsupportedEncodingException; /** * MurmurHash implementation suitable for Bloom Filter usage. * * This is a very fast, non-cryptographic hash suitable for general hash-based lookup. See * http://murmurhash.googlepages.com/ for more details. * * <p> * The C version of MurmurHash 2.0 by Austin Appleby found at that site was ported to Java by * Andrzej Bialecki (ab at getopt org). * </p> * * HISTORY: Updated 2010/05/01 by TS; performance enhancements (use int arithmetic instead of long). * Functionality should be exactly the same, but created new class name for conservatism. */ public class MurmurHash implements LongHash { private final static long M_LONG = 0xc6a4a7935bd1e995L; private final static int R_LONG = 47; private final static int M_INT = 0x5bd1e995; private final static int R_INT = 24; private final static int R1_INT = 13; private final static int R2_INT = 15; /** @see LongHash#getMagic() */ @Override public byte[] getMagic() { return "__MRMR__".getBytes(); } /** @see LongHash#getName() */ @Override public String getName() { return this.getClass().getName(); } /** @see LongHash#getLongHashCode(String) */ @Override public long getLongHashCode(String object) { try { return computeMurmurLongHash(object.getBytes("UTF-8"), 0L); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("Java doesn't recognize UTF-8?!"); } } /** @see LongHash#getLongHashCode(byte[]) */ @Override public long getLongHashCode(byte[] data) { return computeMurmurLongHash(data, 0L); } /** @see LongHash#getIntHashCode(String) */ @Override public int getIntHashCode(String object) { try { return computeMurmurIntHash(object.getBytes("UTF-8"), 0); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("Java doesn't recognize UTF-8?!"); } } /** @see LongHash#getIntHashCode(byte[]) */ @Override public int getIntHashCode(byte[] data) { return computeMurmurIntHash(data, 0); } /** @see LongHash#getLongHashCodes(String, int) */ @Override public long[] getLongHashCodes(String object, int k) { if (k < 1) { throw new IllegalArgumentException("k must be >= 1"); } try { long[] hashCodes = new long[k]; byte[] representation = object.getBytes("UTF-8"); for (int i = 0; i < k; i++) { hashCodes[i] = computeMurmurLongHash(representation, i); } return hashCodes; } catch (UnsupportedEncodingException e) { throw new IllegalStateException("Java doesn't recognize UTF-8?!"); } } /** @see LongHash#getIntHashCodes(String, int) */ @Override public int[] getIntHashCodes(String object, int k) { if (k < 1) { throw new IllegalArgumentException("k must be >= 1"); } int[] hashCodes = new int[k]; try { byte[] representation = object.getBytes("UTF-8"); for (int i = 0; i < k; i++) { hashCodes[i] = computeMurmurIntHash(representation, i); } return hashCodes; } catch (UnsupportedEncodingException e) { throw new IllegalStateException("Java doesn't recognize UTF-8?!"); } } /** * Implementation of Murmur Hash, ported from 64-bit version. * * @param data * @param seed * @return */ public long computeMurmurLongHash(byte[] data, long seed) { final int len = data.length; long h = seed ^ len; int i = 0; for (int end = len - 8; i <= end; i += 8) { long k = gatherLongLE(data, i); k *= M_LONG; k ^= k >> R_LONG; k *= M_LONG; h ^= k; h *= M_LONG; } if (i < len) { h ^= gatherPartialLongLE(data, i, (len - i)); h *= M_LONG; } h ^= h >> R_LONG; h *= M_LONG; h ^= h >> R_LONG; return h; } /** * Implementation of Murmur Hash, ported from 32-bit version. * * @param data * @param seed * @return */ public int computeMurmurIntHash(byte[] data, int seed) { final int len = data.length; int h = seed ^ len; int i = 0; for (int end = len - 4; i <= end; i += 4) { int k = gatherIntLE(data, i); k *= M_INT; k ^= k >> R_INT; k *= M_INT; h *= M_INT; h ^= k; } if (i < len) { h ^= gatherPartialIntLE(data, i, (len - i)); h *= M_INT; } h ^= h >> R1_INT; h *= M_INT; h ^= h >> R2_INT; return h; } }