/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.utils;
import java.nio.ByteBuffer;
import java.util.Random;
import sun.misc.Unsafe;
/**
* This is a very fast, non-cryptographic hash suitable for general hash-based
* lookup. See http://murmurhash.googlepages.com/ for more details.
*
* <p>
* The C version of MurmurHash 2.0 found at that site was ported to Java by
* Andrzej Bialecki (ab at getopt org).
* </p>
*/
public class MurmurHash
{
public static int hash32(ByteBuffer data, int offset, int length, int seed)
{
int m = 0x5bd1e995;
int r = 24;
int h = seed ^ length;
int len_4 = length >> 2;
for (int i = 0; i < len_4; i++)
{
int i_4 = i << 2;
int k = data.get(offset + i_4 + 3);
k = k << 8;
k = k | (data.get(offset + i_4 + 2) & 0xff);
k = k << 8;
k = k | (data.get(offset + i_4 + 1) & 0xff);
k = k << 8;
k = k | (data.get(offset + i_4 + 0) & 0xff);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
// avoid calculating modulo
int len_m = len_4 << 2;
int left = length - len_m;
if (left != 0)
{
if (left >= 3)
{
h ^= (int) data.get(offset + length - 3) << 16;
}
if (left >= 2)
{
h ^= (int) data.get(offset + length - 2) << 8;
}
if (left >= 1)
{
h ^= (int) data.get(offset + length - 1);
}
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
public static long hash64(ByteBuffer key, int offset, int length, long seed)
{
long m64 = 0xc6a4a7935bd1e995L;
int r64 = 47;
long h64 = (seed & 0xffffffffL) ^ (m64 * length);
int lenLongs = length >> 3;
for (int i = 0; i < lenLongs; ++i)
{
int i_8 = i << 3;
long k64 = ((long) key.get(offset+i_8+0) & 0xff) + (((long) key.get(offset+i_8+1) & 0xff)<<8) +
(((long) key.get(offset+i_8+2) & 0xff)<<16) + (((long) key.get(offset+i_8+3) & 0xff)<<24) +
(((long) key.get(offset+i_8+4) & 0xff)<<32) + (((long) key.get(offset+i_8+5) & 0xff)<<40) +
(((long) key.get(offset+i_8+6) & 0xff)<<48) + (((long) key.get(offset+i_8+7) & 0xff)<<56);
k64 *= m64;
k64 ^= k64 >>> r64;
k64 *= m64;
h64 ^= k64;
h64 *= m64;
}
int rem = length & 0x7;
switch (rem)
{
case 0:
break;
case 7:
h64 ^= (long) key.get(offset + length - rem + 6) << 48;
case 6:
h64 ^= (long) key.get(offset + length - rem + 5) << 40;
case 5:
h64 ^= (long) key.get(offset + length - rem + 4) << 32;
case 4:
h64 ^= (long) key.get(offset + length - rem + 3) << 24;
case 3:
h64 ^= (long) key.get(offset + length - rem + 2) << 16;
case 2:
h64 ^= (long) key.get(offset + length - rem + 1) << 8;
case 1:
h64 ^= (long) key.get(offset + length - rem);
h64 *= m64;
}
h64 ^= h64 >>> r64;
h64 *= m64;
h64 ^= h64 >>> r64;
return h64;
}
private static final Unsafe unsafe = JavaInternals.getUnsafe();
private static final int byteBase = unsafe.arrayBaseOffset(byte[].class);
public static long hash64u(byte[] key, int offset, int length, long seed)
{
long m64 = 0xc6a4a7935bd1e995L;
int r64 = 47;
long h64 = (seed & 0xffffffffL) ^ (m64 * length);
int lenLongs = length >> 3;
long ofs = byteBase + offset;
for (int i = 0; i < lenLongs; ++i)
{
long k64 = unsafe.getLong(key, ofs);
ofs+=8;
k64 *= m64;
k64 ^= k64 >>> r64;
k64 *= m64;
h64 ^= k64;
h64 *= m64;
}
int rem = length & 0x7;
if (rem!=0) {
h64 = xorBytes(key, rem, h64, rem << 3);
h64 *= m64;
}
h64 ^= h64 >>> r64;
h64 *= m64;
h64 ^= h64 >>> r64;
return h64;
}
private static final int charBase = unsafe.arrayBaseOffset(char[].class);
private static final int charScale = unsafe.arrayIndexScale(char[].class);
private static final long stringValueFieldOffset = JavaInternals.fieldOffset(String.class, "value");
public static long hash64u(String key, long seed)
{
return hash64u(key, ByteBufferUtil.EMPTY_BYTES, seed);
}
public static long hash64u(String key1,byte[] key2, long seed)
{
return hash64u( (char[]) unsafe.getObject(key1, stringValueFieldOffset), key2, seed);
}
public static long hash64u(char[] key1, byte[] key2, long seed)
{
long m64 = 0xc6a4a7935bd1e995L;
int r64 = 47;
final int length1 = key1.length * charScale;
int length2 = key2.length;
long h64 = (seed & 0xffffffffL) ^ (m64 * (length1 + length2) );
int lenLongs = length1 >> 3;
long ofs = charBase;
long k64;
for (int i = 0; i < lenLongs; ++i)
{
k64 = unsafe.getLong(key1, ofs);
// fixing wrong byte order of generated char
k64 = ( (k64 << 8) & 0xFF00FF00FF00FF00l ) | ( ( k64 >> 8 ) & 0x00FF00FF00FF00FFl ) ;
ofs+=8;
k64 *= m64;
k64 ^= k64 >>> r64;
k64 *= m64;
h64 ^= k64;
h64 *= m64;
}
int rem = length1 & 0x7;
if (rem != 0) {
if (rem+length2 > 7) {
// combining long from first and then second arrays
k64 = orBytes(key2, byteBase, 8-rem, 0);
k64 = ( (k64 << 8) & 0xFF00FF00FF00FF00l ) | ( ( k64 >> 8 ) & 0x00FF00FF00FF00FFl ) ;
k64 = orBytes(key1, ofs, rem, k64);
// fixing wrong byte order of generated char
k64 = ( (k64 << 8) & 0xFF00FF00FF00FF00l ) | ( ( k64 >> 8 ) & 0x00FF00FF00FF00FFl ) ;
rem = 8 - rem;
k64 *= m64;
k64 ^= k64 >>> r64;
k64 *= m64;
h64 ^= k64;
h64 *= m64;
} else {
// last long
h64 = xorBytes(key2, length2, h64, (rem + length2 ) << 3);
h64 = xorChars(key1, rem, h64, rem << 3);
h64 *= m64;
h64 ^= h64 >>> r64;
h64 *= m64;
h64 ^= h64 >>> r64;
return h64;
}
}
ofs = byteBase + rem;
length2-=rem;
lenLongs = length2 >> 3;
for (int i = 0; i < lenLongs; ++i)
{
k64 = unsafe.getLong(key2, ofs);
ofs+=8;
k64 *= m64;
k64 ^= k64 >>> r64;
k64 *= m64;
h64 ^= k64;
h64 *= m64;
}
rem = length2 & 0x7;
if (rem!=0) {
h64 = xorBytes(key2, rem, h64, rem << 3);
h64 *= m64;
}
h64 ^= h64 >>> r64;
h64 *= m64;
h64 ^= h64 >>> r64;
return h64;
}
private static long orBytes(Object o, long ofs, int rem, long r) {
ofs+=rem;
while (rem-->0) {
r = ( r << 8 ) | ( unsafe.getByte(o,--ofs) & 0xFFl );
}
return r;
}
private static long xorBytes(byte[] o, int rem, long r, int shift) {
int ofs= o.length ;
while (rem-->0)
{
shift -= 8;
r ^= (long) o[--ofs] << shift;
}
return r;
}
private static long xorChars(char[] o, int rem, long r, int shift) {
int i = o.length << 1;
while (rem -->0) {
char c = o[ --i >> 1 ];
long b = (i & 1) == 0 ? (byte) ( (c & 0xFF00) >>8) : (byte) (c & 0xFF);
shift -= 8;
r ^= b << shift;
}
return r;
}
public static ByteBuffer toByteBuffer(String s, ByteBuffer byteBuffer)
{
int strLen=s.length()*2;
for (int i=s.length(),j=strLen;i-->0;)
{
char c = s.charAt(i);
byte b1 = (byte) (c & 0xFF);
byteBuffer.put(--j,b1);
byte b2 = (byte) ( (c & 0xFF00) >>8);
byteBuffer.put(--j,b2);
}
return byteBuffer;
}
public static void main(String[] args) {
Random random = new Random();
String das = "а роза лежала на шапке азора, lazy dog jumps over the greedy fox";
// String das = "0123456789012345";//6789012345678901234567890123456789";
int i;
for (i=0;i<100000;i++) {
int l;
l = random.nextInt(2560);
byte[] b = new byte[l];
if (l>0)
random.nextBytes(b);
// byte b[] = { (byte)-1 };
long l1 = MurmurHash.hash64(ByteBuffer.wrap(b), 0, b.length, 0);
long l2 = MurmurHash.hash64u("",b, 0);
if (l1!=l2) {
System.out.println(String.format("Fuckshit %s!=%s",l1,l2));
}
l = random.nextInt(das.length());
String s = das.substring(l);
ByteBuffer bb = ByteBuffer.allocate(s.length()*2 + b.length);
toByteBuffer(s, bb);
bb.position( s.length() * 2);
bb.put(b);
bb.limit(bb.position());
bb.position(0);
l1 = MurmurHash.hash64(bb, bb.position(), bb.remaining(), 0);
l2 = MurmurHash.hash64u(s,b, 0);
if (l1!=l2) {
System.out.println(String.format("%s: Fuckstring %s!=%s. strlen = %s, bytelen = %s",i,l1,l2,s.length(), Integer.toHexString( b.length )));
}
}
System.out.println(String.format("i=%s",i));
}
}