package brickhouse.udf.collect;
/**
* This code is public domain.
*
* MurmurHash3 was written by Austin Appleby and put into the
* public domain. The author hereby disclaims copyright to this
* source code. See http://code.google.com/p/smhasher
*
* The java port for MurmurHash3 found here was authored by
* Yonik Seeley and was placed into the public domain per
* https://github.com/yonik/java_util/blob/master/src/util/hash/MurmurHash3.java
*
* This MurmurHash3 Hive UDF was authored by Vangie Shue
* and is placed in the public domain.
*
**/
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
/**
* Evaluates the 32 bit x86 version of MurmurHash3 of Text input.
* Passing a seed value is optional, the default seed used is 1.
* Offset is set to 0.
*/
public class MurmurHash3UDF extends UDF {
public Integer evaluate(Text input) {
if (input == null) {
return null;
}
return hash_str(input.toString());
}
public Integer evaluate(Text input, IntWritable seed) {
if (input == null) {
return null;
}
return hash_str(input.toString(), seed.get());
}
private static int hash_str(String item) {
// Offset: 0
// Seed: 1
return mhash(item.getBytes(), 0, item.length(), 1);
}
private static int hash_str(String item, int seed) {
// Offset: 0
return mhash(item.getBytes(), 0, item.length(), seed);
}
private static int mhash(byte[] data, int offset, int len, int seed) {
int c1 = 0xcc9e2d51;
int c2 = 0x1b873593;
int h1 = seed;
int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
for (int i = offset; i < roundedEnd; i += 4) {
// little endian load order
int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24);
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
h1 = h1 * 5 + 0xe6546b64;
}
// tail
int k1 = 0;
switch (len & 0x03) {
case 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16;
// fallthrough
case 2:
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
// fallthrough
case 1:
k1 |= data[roundedEnd] & 0xff;
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
default:
}
// finalization
h1 ^= len;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
}