/* * Copyright © 2014 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.hbase.wd; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Assert; import org.junit.Test; import java.util.Arrays; import java.util.Random; /** * */ public class OneByteSimpleHashTest { @Test public void testMaxDistribution() { RowKeyDistributorByHashPrefix.OneByteSimpleHash hasher = new RowKeyDistributorByHashPrefix.OneByteSimpleHash(256); byte[][] allPrefixes = hasher.getAllPossiblePrefixes(); Assert.assertEquals(256, allPrefixes.length); Random r = new Random(); for (int i = 0; i < 1000; i++) { byte[] originalKey = new byte[3]; r.nextBytes(originalKey); byte[] hash = hasher.getHashPrefix(originalKey); boolean found = false; for (int k = 0; k < allPrefixes.length; k++) { if (Arrays.equals(allPrefixes[k], hash)) { found = true; break; } } Assert.assertTrue("Hashed prefix wasn't found in all possible prefixes, val: " + Arrays.toString(hash), found); } Assert.assertArrayEquals( hasher.getHashPrefix(new byte[] {123, 12, 11}), hasher.getHashPrefix(new byte[] {123, 12, 11})); } @Test public void testLimitedDistribution() { RowKeyDistributorByHashPrefix.OneByteSimpleHash hasher = new RowKeyDistributorByHashPrefix.OneByteSimpleHash(10); byte[][] allPrefixes = hasher.getAllPossiblePrefixes(); Assert.assertEquals(10, allPrefixes.length); Random r = new Random(); for (int i = 0; i < 1000; i++) { byte[] originalKey = new byte[3]; r.nextBytes(originalKey); byte[] hash = hasher.getHashPrefix(originalKey); boolean found = false; for (int k = 0; k < allPrefixes.length; k++) { if (Arrays.equals(allPrefixes[k], hash)) { found = true; break; } } Assert.assertTrue("Hashed prefix wasn't found in all possible prefixes, val: " + Arrays.toString(hash), found); } Assert.assertArrayEquals( hasher.getHashPrefix(new byte[] {123, 12, 11}), hasher.getHashPrefix(new byte[] {123, 12, 11})); } /** * Tests that records are well spread over buckets. * In fact this test-case verifies *even* distribution across buckets, which may be broken with changing the hashing * algorithm. */ @Test public void testHashPrefixDistribution() { testDistribution(32, 55); testDistribution(37, 13); testDistribution(255, 20); testDistribution(256, 20); testDistribution(256, 1); testDistribution(1, 200); testDistribution(1, 1); } private void testDistribution(int maxBuckets, int countForEachBucket) { RowKeyDistributorByHashPrefix distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(maxBuckets)); int[] bucketCounts = new int[maxBuckets]; for (int i = 0; i < maxBuckets * countForEachBucket; i++) { byte[] original = Bytes.toBytes(i); byte[] distributed = distributor.getDistributedKey(original); bucketCounts[distributed[0] & 0xff]++; } byte[][] allKeys = distributor.getAllDistributedKeys(new byte[0]); Assert.assertEquals(maxBuckets, allKeys.length); for (int bucketCount : bucketCounts) { // i.e. all buckets expected to have similar amount of values (+- 10%) Assert.assertTrue("Unexpected values count in bucket: " + bucketCount + ", avg: " + countForEachBucket, Math.abs((countForEachBucket - bucketCount) / countForEachBucket) < 0.10); } } }