/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math;
import com.google.common.base.Charsets;
import org.junit.Assert;
import org.junit.Test;
import java.io.UnsupportedEncodingException;
public class MurmurHashTest extends org.apache.mahout.math.MahoutTestCase {
@Test
public void testForLotsOfChange64() throws UnsupportedEncodingException {
long h1 = MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0);
long h2 = MurmurHash.hash64A("abc ".getBytes(Charsets.UTF_8), 0);
int flipCount = Long.bitCount(h1 ^ h2);
Assert.assertTrue("Small changes should result in lots of bit flips, only found " + flipCount, flipCount > 25);
}
@Test
public void testHash64() {
// test data generated by running MurmurHash2_64.cpp
Assert.assertEquals(0x9cc9c33498a95efbL, MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0));
Assert.assertEquals(0xd2c8c9b470122bddL, MurmurHash.hash64A("abc def ghi jkl ".getBytes(Charsets.UTF_8), 0));
Assert.assertEquals(0xcd37895736a81cbcL, MurmurHash.hash64A("abc def ghi jkl moreGoo".getBytes(Charsets.UTF_8), 0));
}
@Test
public void testForLotsOfChange32() throws UnsupportedEncodingException {
int h1 = MurmurHash.hash("abc".getBytes(Charsets.UTF_8), 0);
int h2 = MurmurHash.hash("abc ".getBytes(Charsets.UTF_8), 0);
int flipCount = Integer.bitCount(h1 ^ h2);
Assert.assertTrue("Small changes should result in lots of bit flips, only found " + flipCount, flipCount > 14);
}
// tests lifted from http://dmy999.com/article/50/murmurhash-2-java-port
// code was marked with this notice:
// released to the public domain - dmy999@gmail.com
// expected values are generated from the output of a C driver that
// ran against the same input
@Test
public void testChangingSeed() {
// use a fixed key
byte[] key = {0x4E, (byte) 0xE3, (byte) 0x91, 0x00, 0x10, (byte) 0x8F, (byte) 0xFF};
int[] expected = {0xeef8be32, 0x8109dec6, 0x9aaf4192, 0xc1bcaf1c,
0x821d2ce4, 0xd45ed1df, 0x6c0357a7, 0x21d4e845,
0xfa97db50, 0x2f1985c8, 0x5d69782a, 0x0d6e4b85,
0xe7d9cf6b, 0x337e6b49, 0xe1606944, 0xccc18ae8};
for (int i = 0; i < expected.length; i++) {
int expectedHash = expected[i];
int hash = MurmurHash.hash(key, i);
Assert.assertEquals("i = " + i, expectedHash, hash);
}
}
@Test
public void testChangingKey() {
byte[] key = new byte[133];
int[] expected = {0xd743ae0b, 0xf1b461c6, 0xa45a6ceb, 0xdb15e003,
0x877721a4, 0xc30465f1, 0xfb658ba4, 0x1adf93b2,
0xe40a7931, 0x3da52db0, 0xbf523511, 0x1efaf273,
0xe628c1dd, 0x9a0344df, 0x901c99fc, 0x5ae1aa44};
for (int i = 0; i < 16; i++) {
// keep seed constant, generate a known key pattern
setKey(key, i);
int expectedHash = expected[i];
int hash = MurmurHash.hash(key, 0x1234ABCD);
Assert.assertEquals("i = " + i, expectedHash, hash);
}
}
@Test
public void testChangingKeyLength() {
int[] expected = {0xa0c72f8e, 0x29c2f97e, 0x00ca8bba, 0x88387876,
0xe203ce49, 0x58d75952, 0xab84febe, 0x98153c65,
0xcbb38375, 0x6ea1a28b, 0x9afa8f55, 0xfb890eb6,
0x9516cc49, 0x6408a8eb, 0xbb12d3e6, 0x00fb7519};
// vary the key and the length
for (int i = 0; i < 16; i++) {
byte[] key = new byte[i];
setKey(key, i);
int expectedHash = expected[i];
int hash = MurmurHash.hash(key, 0x7870AAFF);
Assert.assertEquals("i = " + i, expectedHash, hash);
}
}
/**
* Fill a key with a known pattern (incrementing numbers)
*/
private static void setKey(byte[] key, int start) {
for (int i = 0; i < key.length; i++) {
key[i] = (byte) ((start + i) & 0xFF);
}
}
}