/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package edu.brown.rand; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Random; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import edu.brown.statistics.Histogram; import edu.brown.statistics.ObjectHistogram; /** * A class that generates random numbers that follow some distribution. * <p> * Copied from <a * href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 * tfile</a>. Remove after tfile is committed and use the tfile version of this * class instead. * </p> */ public class RandomDistribution { /** * Interface for discrete (integer) random distributions. */ public static abstract class DiscreteRNG extends Random { private static final long serialVersionUID = 1L; protected final long min; protected final long max; protected final Random random; protected final double mean; protected final long range_size; private ObjectHistogram<Long> history; public DiscreteRNG(Random random, long min, long max) { if (min >= max) throw new IllegalArgumentException("Invalid range [" + min + " >= " + max + "]"); this.random = random; this.min = min; this.max = max; this.range_size = (max - min) + 1; this.mean = this.range_size / 2.0; } protected abstract long nextLongImpl(); /** * Enable keeping track of the values that the RNG generates */ public void enableHistory() { assert (this.history == null) : "Trying to enable history tracking more than once"; this.history = new ObjectHistogram<Long>(); } public boolean isHistoryEnabled() { return (this.history != null); } /** * Return the histogram of the values that have been generated * * @return */ public ObjectHistogram<Long> getHistory() { assert (this.history != null) : "Trying to get value history but tracking wasn't enabled"; return (this.history); } /** * Return the count for the number of values that have been generated * Only works if history tracking is enabled * * @return */ public long getSampleCount() { return (this.history.getSampleCount()); } public long getRange() { return this.range_size; } public double getMean() { return this.mean; } public long getMin() { return this.min; } public long getMax() { return this.max; } public Random getRandom() { return (this.random); } public Set<Integer> getRandomIntSet(int cnt) { assert (cnt < this.range_size); Set<Integer> ret = new HashSet<Integer>(); do { ret.add(this.nextInt()); } while (ret.size() < cnt); return (ret); } public Set<Integer> getRandomLongSet(int cnt) { assert (cnt < this.range_size); Set<Integer> ret = new HashSet<Integer>(); do { ret.add(this.nextInt()); } while (ret.size() < cnt); return (ret); } public double calculateMean(int num_samples) { long total = 0l; for (int i = 0; i < num_samples; i++) { total += this.nextLong(); } // FOR return (total / (double) num_samples); } /** * Get the next random number as an int * * @return the next random number. */ @Override public final int nextInt() { long val = (int) this.nextLongImpl(); if (this.history != null) this.history.put(val); return ((int) val); } /** * Get the next random number as a long * * @return the next random number. */ @Override public final long nextLong() { long val = this.nextLongImpl(); if (this.history != null) this.history.put(val); return (val); } @Override public String toString() { return String.format("%s[min=%d, max=%d]", this.getClass().getSimpleName(), this.min, this.max); } public static long nextLong(Random rng, long n) { // error checking and 2^x checking removed for simplicity. long bits, val; do { bits = (rng.nextLong() << 1) >>> 1; val = bits % n; } while (bits - val + (n - 1) < 0L); return val; } } /** * P(i)=1/(max-min) */ public static class Flat extends DiscreteRNG { private static final long serialVersionUID = 1L; /** * Generate random integers from min (inclusive) to max (exclusive) * following even distribution. * * @param random * The basic random number generator. * @param min * Minimum integer * @param max * maximum integer (exclusive). */ public Flat(Random random, long min, long max) { super(random, min, max); } /** * @see DiscreteRNG#nextInt() */ @Override protected long nextLongImpl() { // error checking and 2^x checking removed for simplicity. long bits, val; do { bits = (random.nextLong() << 1) >>> 1; val = bits % (this.range_size - 1); } while (bits - val + (this.range_size - 1) < 0L); val += this.min; assert (val >= min); assert (val < max); return val; } } /** * P(i)=1/(max-min) */ public static class FlatHistogram<T> extends DiscreteRNG { private static final long serialVersionUID = 1L; private final Flat inner; private final Histogram<T> histogram; private final SortedMap<Long, T> value_rle = new TreeMap<Long, T>(); private Histogram<T> history; /** * Generate a run-length of the values of the histogram */ public FlatHistogram(Random random, Histogram<T> histogram) { super(random, 0, (int) histogram.getSampleCount()); this.histogram = histogram; this.inner = new Flat(random, 0, (int) histogram.getSampleCount()); long total = 0; for (T k : this.histogram.values()) { long v = this.histogram.get(k); total += v; this.value_rle.put(total, k); } // FOR } @Override public void enableHistory() { this.history = new ObjectHistogram<T>(); } @Override public boolean isHistoryEnabled() { return (this.history != null); } public Histogram<T> getHistogramHistory() { if (this.history != null) { return (this.history); } return (null); } public T nextValue() { int idx = this.inner.nextInt(); Long total = this.value_rle.tailMap((long) idx).firstKey(); T val = this.value_rle.get(total); if (this.history != null) this.history.put(val); return (val); // assert(false) : "Went beyond our expected total '" + idx + "'"; // return (null); } /** * @see DiscreteRNG#nextLong() */ @Override protected long nextLongImpl() { Object val = this.nextValue(); if (val instanceof Integer) { return ((Integer) val); } return ((Long) val); } } /** * Gaussian Distribution */ public static class Gaussian extends DiscreteRNG { private static final long serialVersionUID = 1L; public Gaussian(Random random, long min, long max) { super(random, min, max); } @Override protected long nextLongImpl() { int value = -1; while (value < 0 || value >= this.range_size) { double gaussian = (this.random.nextGaussian() + 2.0) / 4.0; value = (int) Math.round(gaussian * this.range_size); } return (value + this.min); } } public static class HotWarmCold extends DiscreteRNG { int hot_data_access_skew; int warm_data_access_skew; int hot_data_size; int warm_data_size; // the max of the hot/warm/cold ranges, where hot_data_max < warm_data_max < max int min; int max; int hot_data_max; // integers in the range 0 < x < hot_data_max will represent the "hot" numbers getting hot_data_access_skew% of the accesses int warm_data_max; // integers in the range hot_data_max < x < warm_data_max will represent the "warm" numbers public HotWarmCold(Random r, int _min, int _max, int _hot_data_access_skew, int _hot_data_size, int _warm_data_access_skew, int _warm_data_size) { super(r, _min, _max); assert(_hot_data_access_skew + _warm_data_access_skew <= 100) : "Workload skew cannot be more than 100%."; hot_data_access_skew = _hot_data_access_skew; warm_data_access_skew = _warm_data_access_skew; hot_data_size = _hot_data_size; warm_data_size = _warm_data_size; min = _min; max = _max; hot_data_max = (int)(max * (hot_data_size / (double)100)) + min; warm_data_max = (int)(max * (warm_data_size / (double)100)) + hot_data_max; } @Override protected long nextLongImpl() { int key = 0; int access_skew_rand = random.nextInt(100); if(access_skew_rand < hot_data_access_skew) // generate a number in the "hot" data range, 0 < x < hot_data_max { key = random.nextInt(hot_data_max) + min; } else if(access_skew_rand < hot_data_access_skew + warm_data_access_skew) // generate a key in the "warm" data range, hot_data_max < x < warm_data_max { key = random.nextInt(warm_data_max - hot_data_max + 1) + hot_data_max; } else // generate a number in the "cold" data range, warm_data_max < x < max { key = random.nextInt(max - warm_data_max + 1) + warm_data_max; } return key; } } /** * Zipf distribution. The ratio of the probabilities of integer i and j is * defined as follows: P(i)/P(j)=((j-min+1)/(i-min+1))^sigma. */ public static class Zipf extends DiscreteRNG { private static final long serialVersionUID = 1L; private static final double DEFAULT_EPSILON = 0.001; private final ArrayList<Long> k; private final ArrayList<Double> v; /** * Constructor * * @param r * The random number generator. * @param min * minimum integer (inclusvie) * @param max * maximum integer (exclusive) * @param sigma * parameter sigma. (sigma > 1.0) */ public Zipf(Random r, long min, long max, double sigma) { this(r, min, max, sigma, DEFAULT_EPSILON); } /** * Constructor. * * @param r * The random number generator. * @param min * minimum integer (inclusvie) * @param max * maximum integer (exclusive) * @param sigma * parameter sigma. (sigma > 1.0) * @param epsilon * Allowable error percentage (0 < epsilon < 1.0). */ public Zipf(Random r, long min, long max, double sigma, double epsilon) { super(r, min, max); if ((max <= min) || (sigma <= 1) || (epsilon <= 0) || (epsilon >= 0.5)) { throw new IllegalArgumentException("Invalid arguments [min=" + min + ", max=" + max + ", sigma=" + sigma + ", epsilon=" + epsilon + "]"); } k = new ArrayList<Long>(); v = new ArrayList<Double>(); double sum = 0; long last = -1; for (long i = min; i < max; ++i) { sum += Math.exp(-sigma * Math.log(i - min + 1)); if ((last == -1) || i * (1 - epsilon) > last) { k.add(i); v.add(sum); last = i; } } // FOR if (last != max - 1) { k.add(max - 1); v.add(sum); } v.set(v.size() - 1, 1.0); for (int i = v.size() - 2; i >= 0; --i) { v.set(i, v.get(i) / sum); } } /** * @see DiscreteRNG#nextInt() */ @Override protected long nextLongImpl() { double d = random.nextDouble(); int idx = Collections.binarySearch(v, d); if (idx > 0) { ++idx; } else { idx = -(idx + 1); } if (idx >= v.size()) { idx = v.size() - 1; } if (idx == 0) { return k.get(0); } long ceiling = k.get(idx); long lower = k.get(idx - 1); return ceiling - DiscreteRNG.nextLong(random, ceiling - lower); } } /** * Binomial distribution. P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., * n) P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1) */ public static final class Binomial extends DiscreteRNG { private static final long serialVersionUID = 1L; private final double[] v; private final long n; private static double select(long n, long k) { double ret = 1.0; for (long i = k + 1; i <= n; ++i) { ret *= (double) i / (i - k); } return ret; } private static double power(double p, long k) { return Math.exp(k * Math.log(p)); } /** * Generate random integers from min (inclusive) to max (exclusive) * following Binomial distribution. * * @param random * The basic random number generator. * @param min * Minimum integer * @param max * maximum integer (exclusive). * @param p * parameter. */ public Binomial(Random random, long min, long max, double p) { super(random, min, max); this.n = max - min - 1; if (n > 0) { v = new double[(int) n + 1]; double sum = 0.0; for (int i = 0; i <= n; ++i) { sum += select(n, i) * power(p, i) * power(1 - p, n - i); v[i] = sum; } for (int i = 0; i <= n; ++i) { v[i] /= sum; } } else { v = null; } } /** * @see DiscreteRNG#nextInt() */ @Override protected long nextLongImpl() { if (v == null) { return min; } double d = random.nextDouble(); int idx = Arrays.binarySearch(v, d); if (idx > 0) { ++idx; } else { idx = -(idx + 1); } if (idx >= v.length) { idx = v.length - 1; } return idx + min; } } /** * * Power Law distribution. * * k = 1 + alpha * x = [(max^k - min^k) * y - min^k]^(1/k) * where y ~ uniform(0, 1) */ public static class PowerLaw extends DiscreteRNG { private static final long serialVersionUID = 1L; private final double alpha_; /** * Constructor. * * @param r * The random number generator. * @param min * minimum integer (inclusvie) * @param max * maximum integer (exclusive) * @param alpha * parameter alpha. (alpha < 0) */ public PowerLaw(Random r, long min_x, long max_x, double alpha) { super(r, min_x, max_x); if ((max <= min) || (alpha >= 0)) { throw new IllegalArgumentException("Invalid arguments [min=" + min + ", max=" + max + ", alpha=" + alpha + "]"); } alpha_ = alpha; } /** * @see DiscreteRNG#nextInt() */ @Override protected long nextLongImpl() { double y = random.nextDouble(); long x = 1; double k = 1 + alpha_; x = Math.round(Math.pow((Math.pow(max, k) - Math.pow(min, k)) * y + Math.pow(min, k), 1/k)); return x; } } }