/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.math; import com.google.common.base.Charsets; import org.junit.Assert; import org.junit.Test; import java.io.UnsupportedEncodingException; public class MurmurHashTest extends org.apache.mahout.math.MahoutTestCase { @Test public void testForLotsOfChange64() throws UnsupportedEncodingException { long h1 = MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0); long h2 = MurmurHash.hash64A("abc ".getBytes(Charsets.UTF_8), 0); int flipCount = Long.bitCount(h1 ^ h2); Assert.assertTrue("Small changes should result in lots of bit flips, only found " + flipCount, flipCount > 25); } @Test public void testHash64() { // test data generated by running MurmurHash2_64.cpp Assert.assertEquals(0x9cc9c33498a95efbL, MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0)); Assert.assertEquals(0xd2c8c9b470122bddL, MurmurHash.hash64A("abc def ghi jkl ".getBytes(Charsets.UTF_8), 0)); Assert.assertEquals(0xcd37895736a81cbcL, MurmurHash.hash64A("abc def ghi jkl moreGoo".getBytes(Charsets.UTF_8), 0)); } @Test public void testForLotsOfChange32() throws UnsupportedEncodingException { int h1 = MurmurHash.hash("abc".getBytes(Charsets.UTF_8), 0); int h2 = MurmurHash.hash("abc ".getBytes(Charsets.UTF_8), 0); int flipCount = Integer.bitCount(h1 ^ h2); Assert.assertTrue("Small changes should result in lots of bit flips, only found " + flipCount, flipCount > 14); } // tests lifted from http://dmy999.com/article/50/murmurhash-2-java-port // code was marked with this notice: // released to the public domain - dmy999@gmail.com // expected values are generated from the output of a C driver that // ran against the same input @Test public void testChangingSeed() { // use a fixed key byte[] key = {0x4E, (byte) 0xE3, (byte) 0x91, 0x00, 0x10, (byte) 0x8F, (byte) 0xFF}; int[] expected = {0xeef8be32, 0x8109dec6, 0x9aaf4192, 0xc1bcaf1c, 0x821d2ce4, 0xd45ed1df, 0x6c0357a7, 0x21d4e845, 0xfa97db50, 0x2f1985c8, 0x5d69782a, 0x0d6e4b85, 0xe7d9cf6b, 0x337e6b49, 0xe1606944, 0xccc18ae8}; for (int i = 0; i < expected.length; i++) { int expectedHash = expected[i]; int hash = MurmurHash.hash(key, i); Assert.assertEquals("i = " + i, expectedHash, hash); } } @Test public void testChangingKey() { byte[] key = new byte[133]; int[] expected = {0xd743ae0b, 0xf1b461c6, 0xa45a6ceb, 0xdb15e003, 0x877721a4, 0xc30465f1, 0xfb658ba4, 0x1adf93b2, 0xe40a7931, 0x3da52db0, 0xbf523511, 0x1efaf273, 0xe628c1dd, 0x9a0344df, 0x901c99fc, 0x5ae1aa44}; for (int i = 0; i < 16; i++) { // keep seed constant, generate a known key pattern setKey(key, i); int expectedHash = expected[i]; int hash = MurmurHash.hash(key, 0x1234ABCD); Assert.assertEquals("i = " + i, expectedHash, hash); } } @Test public void testChangingKeyLength() { int[] expected = {0xa0c72f8e, 0x29c2f97e, 0x00ca8bba, 0x88387876, 0xe203ce49, 0x58d75952, 0xab84febe, 0x98153c65, 0xcbb38375, 0x6ea1a28b, 0x9afa8f55, 0xfb890eb6, 0x9516cc49, 0x6408a8eb, 0xbb12d3e6, 0x00fb7519}; // vary the key and the length for (int i = 0; i < 16; i++) { byte[] key = new byte[i]; setKey(key, i); int expectedHash = expected[i]; int hash = MurmurHash.hash(key, 0x7870AAFF); Assert.assertEquals("i = " + i, expectedHash, hash); } } /** * Fill a key with a known pattern (incrementing numbers) */ private static void setKey(byte[] key, int start) { for (int i = 0; i < key.length; i++) { key[i] = (byte) ((start + i) & 0xFF); } } }