/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.io.file.tfile; import java.util.Random; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG; /** * Generate random <key, value> pairs. */ class KVGenerator { private final Random random; private final byte[][] dict; private final boolean sorted; private final DiscreteRNG keyLenRNG, valLenRNG; private BytesWritable lastKey; private static final int MIN_KEY_LEN = 4; private final byte prefix[] = new byte[MIN_KEY_LEN]; public KVGenerator(Random random, boolean sorted, DiscreteRNG keyLenRNG, DiscreteRNG valLenRNG, DiscreteRNG wordLenRNG, int dictSize) { this.random = random; dict = new byte[dictSize][]; this.sorted = sorted; this.keyLenRNG = keyLenRNG; this.valLenRNG = valLenRNG; for (int i = 0; i < dictSize; ++i) { int wordLen = wordLenRNG.nextInt(); dict[i] = new byte[wordLen]; random.nextBytes(dict[i]); } lastKey = new BytesWritable(); fillKey(lastKey); } private void fillKey(BytesWritable o) { int len = keyLenRNG.nextInt(); if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; o.setSize(len); int n = MIN_KEY_LEN; while (n < len) { byte[] word = dict[random.nextInt(dict.length)]; int l = Math.min(word.length, len - n); System.arraycopy(word, 0, o.get(), n, l); n += l; } if (sorted && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey .getSize() - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) { incrementPrefix(); } System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); lastKey.set(o); } private void fillValue(BytesWritable o) { int len = valLenRNG.nextInt(); o.setSize(len); int n = 0; while (n < len) { byte[] word = dict[random.nextInt(dict.length)]; int l = Math.min(word.length, len - n); System.arraycopy(word, 0, o.get(), n, l); n += l; } } private void incrementPrefix() { for (int i = MIN_KEY_LEN - 1; i >= 0; --i) { ++prefix[i]; if (prefix[i] != 0) return; } throw new RuntimeException("Prefix overflown"); } public void next(BytesWritable key, BytesWritable value, boolean dupKey) { if (dupKey) { key.set(lastKey); } else { fillKey(key); } fillValue(value); } }