package org.cache2k.benchmark.util;
/*
* #%L
* Benchmarks: utilities
* %%
* Copyright (C) 2013 - 2017 headissue GmbH, Munich
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
/**
* This code is adopted from the YCSB benchmark,
*
* original source:
* https://github.com/brianfrankcooper/YCSB/blob/master/core/src/main/java/com/yahoo/ycsb/generator/ZipfianGenerator.java
*
* original license:
*
* Copyright (c) 2010 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
import it.unimi.dsi.util.XorShift1024StarRandomGenerator;
import java.util.HashMap;
import java.util.Map;
/**
* A generator of a zipfian distribution. It produces a sequence of items, such that some items are more popular than others, according
* to a zipfian distribution. When you construct an instance of this class, you specify the number of items in the set to draw from, either
* by specifying an itemcount (so that the sequence is of items from 0 to itemcount-1) or by specifying a min and a max (so that the sequence is of
* items from min to max inclusive). After you construct the instance, you can change the number of items by calling nextInt(itemcount) or nextLong(itemcount).
*
* Note that the popular items will be clustered together, e.g. item 0 is the most popular, item 1 the second most popular, and so on (or min is the most
* popular, min+1 the next most popular, etc.) If you don't want this clustering, and instead want the popular items scattered throughout the
* item space, then use ScrambledZipfianPattern instead.
*
* Be aware: initializing this generator may take a long time if there are lots of items to choose from (e.g. over a minute
* for 100 million objects). This is because certain mathematical values need to be computed to properly generate a zipfian
* skew, and one of those values (zeta) is a sum sequence from 1 to n, where n is the itemcount. Note that if you increase
* the number of items in the set, we can compute a new zeta incrementally, so it should be fast unless you have added
* millions of items. However, if you decrease the number of items, we recompute zeta from scratch, so this can take a long time.
*
* The algorithm used here is from "Quickly Generating Billion-Record Synthetic Databases", Jim Gray et al, SIGMOD 1994.
*/
@SuppressWarnings("unused")
public class ZipfianPattern extends AbstractEternalAccessPattern {
public static final double ZIPFIAN_CONSTANT = 0.99;
/**
* Number of items.
*/
long items;
/**
* Min item to generate.
*/
long base;
/**
* The zipfian constant to use.
*/
double zipfianconstant;
/**
* Computed parameters for generating the distribution.
*/
double alpha,zetan,eta,theta,zeta2theta;
XorShift1024StarRandomGenerator randomGenerator;
/******************************* Constructors **************************************/
/**
* Create a zipfian generator for the specified number of items.
* @param _items The number of items in the distribution.
*/
public ZipfianPattern(long _randomSeed, long _items) {
this(_randomSeed, 0,_items-1);
}
/**
* Create a zipfian generator for items between min and max.
* @param _min The smallest integer to generate in the sequence.
* @param _max The largest integer to generate in the sequence.
*/
public ZipfianPattern(long _randomSeed, long _min, long _max) {
this(_randomSeed, _min, _max, ZIPFIAN_CONSTANT);
}
/**
* Create a zipfian generator for the specified number of items using the specified zipfian constant.
*
* @param _items The number of items in the distribution.
* @param _zipfianconstant The zipfian constant to use.
*/
public ZipfianPattern(long _randomSeed, long _items, double _zipfianconstant) {
this(_randomSeed, 0, _items-1,_zipfianconstant);
}
/**
* Create a zipfian generator for items between min and max (inclusive) for the specified zipfian constant.
* @param min The smallest integer to generate in the sequence.
* @param max The largest integer to generate in the sequence.
* @param _zipfianconstant The zipfian constant to use.
*/
public ZipfianPattern(long _randomSeed, long min, long max, double _zipfianconstant) {
this(_randomSeed, min, max, _zipfianconstant, zeta(max - min + 1 , _zipfianconstant));
}
/**
* Create a zipfian generator for items between min and max (inclusive) for the specified zipfian constant, using the precomputed value of zeta.
*
* @param min The smallest integer to generate in the sequence.
* @param max The largest integer to generate in the sequence.
* @param _zipfianconstant The zipfian constant to use.
* @param _zetan The precomputed zeta constant.
*/
public ZipfianPattern(long _randomSeed, long min, long max, double _zipfianconstant, double _zetan) {
items = max - min + 1;
base = min;
zipfianconstant = _zipfianconstant;
theta = zipfianconstant;
zeta2theta = zeta(2,theta);
alpha = 1.0 / (1.0 - theta);
zetan = _zetan;
eta = (1 - Math.pow(2.0/items,1-theta))/(1-zeta2theta/zetan);
randomGenerator = new XorShift1024StarRandomGenerator(_randomSeed);
}
/**************************************************************************/
final static Map<String, Double> zetaStaticMap = new HashMap<>();
/**
* Precomputed zeta constants to save setup time.
*/
static {
zetaStaticMap.put("2|0.99", 1.5034777750283594);
zetaStaticMap.put("1000000|0.99", 15.391849746037371);
zetaStaticMap.put("8000000|0.99", 17.80436406783243);
zetaStaticMap.put("10000000|0.99", 18.066242574968303);
zetaStaticMap.put("80000000|0.99", 20.534952035464187);
zetaStaticMap.put("100000000|0.99", 20.80293049002014);
zetaStaticMap.put("800000000|0.99", 23.329143628120455);
zetaStaticMap.put("200000000|0.99", 21.639171532673963);
}
/**
* Compute the zeta constant needed for the distribution. Remember computed constants
* in a hash map, since we may initialize the same pattern in multiple threads.
*
* @param n The number of items to compute zeta over.
* @param theta The zipfian constant.
*/
static double zeta(long n, double theta) {
synchronized (zetaStaticMap) {
String k = n + "|" + theta;
Double d = zetaStaticMap.get(k);
if (d != null) {
return d;
}
double sum = 0;
for (long i = 0; i < n; i++) {
sum += 1 / (Math.pow(i + 1, theta));
}
zetaStaticMap.put(k, sum);
System.out.println("new zeta constant: " + k + " -> " + sum);
return sum;
}
}
/****************************************************************************************/
/**
* Generate the next item as a long.
*
* @return The next item in the sequence.
*/
long nextLong() {
double u = randomGenerator.nextDouble();
double uz = u * zetan;
if (uz < 1.0) {
return base;
}
if (uz< 1.0 + Math.pow(0.5,theta)) {
return base + 1;
}
return base + (long)((items) * Math.pow(eta*u - eta + 1, alpha));
}
/**
* Return the next value, skewed by the Zipfian distribution. The 0th item will be the most popular, followed by the 1st, followed
* by the 2nd, etc. (Or, if min != 0, the min-th item is the most popular, the min+1th item the next most popular, etc.) If you want the
* popular items scattered throughout the item space, use ScrambledZipfianPattern instead.
*/
int nextInt() {
return (int)nextLong();
}
/**
* Return the next value, skewed by the Zipfian distribution. The 0th item will be the most popular, followed by the 1st, followed
* by the 2nd, etc. (Or, if min != 0, the min-th item is the most popular, the min+1th item the next most popular, etc.) If you want the
* popular items scattered throughout the item space, use ScrambledZipfianPattern instead.
*/
@Override
public int next() {
return nextInt();
}
}