/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.apache.hadoop.hbase.util.test; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MD5Hash; /** * A generator of random keys and values for load testing. Keys are generated * by converting numeric indexes to strings and prefixing them with an MD5 * hash. Values are generated by selecting value size in the configured range * and generating a pseudo-random sequence of bytes seeded by key, column * qualifier, and value size. */ @InterfaceAudience.Private public class LoadTestKVGenerator { private static final Log LOG = LogFactory.getLog(LoadTestKVGenerator.class); private static int logLimit = 10; /** A random number generator for determining value size */ private Random randomForValueSize = new Random(); private final int minValueSize; private final int maxValueSize; public LoadTestKVGenerator(int minValueSize, int maxValueSize) { if (minValueSize <= 0 || maxValueSize <= 0) { throw new IllegalArgumentException("Invalid min/max value sizes: " + minValueSize + ", " + maxValueSize); } this.minValueSize = minValueSize; this.maxValueSize = maxValueSize; } /** * Verifies that the given byte array is the same as what would be generated * for the given seed strings (row/cf/column/...). We are assuming that the * value size is correct, and only verify the actual bytes. However, if the * min/max value sizes are set sufficiently high, an accidental match should be * extremely improbable. */ public static boolean verify(byte[] value, byte[]... seedStrings) { byte[] expectedData = getValueForRowColumn(value.length, seedStrings); boolean equals = Bytes.equals(expectedData, value); if (!equals && LOG.isDebugEnabled() && logLimit > 0) { LOG.debug("verify failed, expected value: " + Bytes.toStringBinary(expectedData) + " actual value: "+ Bytes.toStringBinary(value)); logLimit--; // this is not thread safe, but at worst we will have more logging } return equals; } /** * Converts the given key to string, and prefixes it with the MD5 hash of * the index's string representation. */ public static String md5PrefixedKey(long key) { String stringKey = Long.toString(key); String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey)); // flip the key to randomize return md5hash + "-" + stringKey; } /** * Generates a value for the given key index and column qualifier. Size is * selected randomly in the configured range. The generated value depends * only on the combination of the strings passed (key/cf/column/...) and the selected * value size. This allows to verify the actual value bytes when reading, as done * in {#verify(byte[], byte[]...)} * This method is as thread-safe as Random class. It appears that the worst bug ever * found with the latter is that multiple threads will get some duplicate values, which * we don't care about. */ public byte[] generateRandomSizeValue(byte[]... seedStrings) { int dataSize = minValueSize; if(minValueSize != maxValueSize) { dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); } return getValueForRowColumn(dataSize, seedStrings); } /** * Generates random bytes of the given size for the given row and column * qualifier. The random seed is fully determined by these parameters. */ private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) { long seed = dataSize; for (byte[] str : seedStrings) { final String bytesString = Bytes.toString(str); if (bytesString != null) { seed += bytesString.hashCode(); } } Random seededRandom = new Random(seed); byte[] randomBytes = new byte[dataSize]; seededRandom.nextBytes(randomBytes); return randomBytes; } }