// =================================================================================================
// Copyright 2013 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.stats;
import java.util.Arrays;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.twitter.common.quantity.Amount;
import com.twitter.common.quantity.Data;
/**
* <p>
* Implements Histogram structure for computing approximate quantiles.
* The implementation is based on the following paper:
*
* <pre>
* [MP80] Munro & Paterson, "Selection and Sorting with Limited Storage",
* Theoretical Computer Science, Vol 12, p 315-323, 1980.
* </pre>
* </p>
* <p>
* You could read a detailed description of the same algorithm here:
*
* <pre>
* [MRL98] Manku, Rajagopalan & Lindsay, "Approximate Medians and other
* Quantiles in One Pass and with Limited Memory", Proc. 1998 ACM
* SIGMOD, Vol 27, No 2, p 426-435, June 1998.
* </pre>
* </p>
* <p>
* There's a good explanation of the algorithm in the Sawzall source code
* See: http://szl.googlecode.com/svn-history/r36/trunk/src/emitters/szlquantile.cc
* </p>
* Here's a schema of the tree:
* <pre>
* [4] level 3, weight=rootWeight=8
* |
* [3] level 2, weight=4
* |
* [2] level 1, weight=2
* / \
* [0] [1] level 0, weight=1
* </pre>
* <p>
* {@code [i]} represents {@code buffer[i]}
* The depth of the tree is limited to a maximum value
* Every buffer has the same size
* </p>
* <p>
* We add element in {@code [0]} or {@code [1]}.
* When {@code [0]} and {@code [1]} are full, we collapse them, it generates a temporary buffer
* of weight 2, if {@code [2]} is empty, we put the collapsed buffer into {@code [2]} otherwise
* we collapse {@code [2]} with the temporary buffer and put it in {@code [3]} if it's empty and
* so on...
* </p>
*/
public final class ApproximateHistogram implements Histogram {
@VisibleForTesting
public static final Precision DEFAULT_PRECISION = new Precision(0.02, 100 * 1000);
@VisibleForTesting
public static final Amount<Long, Data> DEFAULT_MAX_MEMORY = Amount.of(12L, Data.KB);
@VisibleForTesting static final long ELEM_SIZE = 8; // sizeof long
// See above
@VisibleForTesting long[][] buffer;
@VisibleForTesting long count = 0L;
@VisibleForTesting int leafCount = 0; // number of elements in the bottom two leaves
@VisibleForTesting int currentTop = 1;
@VisibleForTesting int[] indices; // member for optimization reason
private boolean leavesSorted = true;
private int rootWeight = 1;
private long[][] bufferPool; // pool of 2 buffers (used for merging)
private int bufferSize;
private int maxDepth;
/**
* Private init method that is called only by constructors.
* All allocations are done in this method.
*
* @param bufSize size of each buffer
* @param depth maximum depth of the tree of buffers
*/
@VisibleForTesting
void init(int bufSize, int depth) {
bufferSize = bufSize;
maxDepth = depth;
bufferPool = new long[2][bufferSize];
indices = new int[depth + 1];
buffer = new long[depth + 1][bufferSize];
// only allocate the first 2 buffers, lazily allocate the others.
allocate(0);
allocate(1);
Arrays.fill(buffer, 2, buffer.length, null);
clear();
}
@VisibleForTesting
ApproximateHistogram(int bufSize, int depth) {
init(bufSize, depth);
}
/**
* Constructor with precision constraint, it will allocated as much memory as require to match
* this precision constraint.
* @param precision the requested precision
*/
public ApproximateHistogram(Precision precision) {
Preconditions.checkNotNull(precision);
int depth = computeDepth(precision.getEpsilon(), precision.getN());
int bufSize = computeBufferSize(depth, precision.getN());
init(bufSize, depth);
}
/**
* Constructor with memory constraint, it will find the best possible precision that satisfied
* the memory constraint.
* @param maxMemory the maximum amount of memory that the instance will take
*/
public ApproximateHistogram(Amount<Long, Data> maxMemory, int expectedSize) {
Preconditions.checkNotNull(maxMemory);
Preconditions.checkArgument(1024 <= maxMemory.as(Data.BYTES),
"at least 1KB is required for an Histogram");
double epsilon = DEFAULT_PRECISION.getEpsilon();
int n = expectedSize;
int depth = computeDepth(epsilon, n);
int bufSize = computeBufferSize(depth, n);
long maxBytes = maxMemory.as(Data.BYTES);
// Increase precision if the maxMemory allow it, otherwise reduce precision. (by 5% steps)
boolean tooMuchMem = memoryUsage(bufSize, depth) > maxBytes;
double multiplier = tooMuchMem ? 1.05 : 0.95;
while((maxBytes < memoryUsage(bufSize, depth)) == tooMuchMem) {
epsilon *= multiplier;
if (epsilon < 0.00001) {
// for very high memory constraint increase N as well
n *= 10;
epsilon = DEFAULT_PRECISION.getEpsilon();
}
depth = computeDepth(epsilon, n);
bufSize = computeBufferSize(depth, n);
}
if (!tooMuchMem) {
// It's ok to consume less memory than the constraint
// but we never have to consume more!
depth = computeDepth(epsilon / multiplier, n);
bufSize = computeBufferSize(depth, n);
}
init(bufSize, depth);
}
/**
* Constructor with memory constraint.
* @see #ApproximateHistogram(Amount, int)
*/
public ApproximateHistogram(Amount<Long, Data> maxMemory) {
this(maxMemory, DEFAULT_PRECISION.getN());
}
/**
* Default Constructor.
* @see #ApproximateHistogram(Amount)
*/
public ApproximateHistogram() {
this(DEFAULT_MAX_MEMORY);
}
@Override
public synchronized void add(long x) {
// if the leaves of the tree are full, "collapse" recursively the tree
if (leafCount == 2 * bufferSize) {
Arrays.sort(buffer[0]);
Arrays.sort(buffer[1]);
recCollapse(buffer[0], 1);
leafCount = 0;
}
// Now we're sure there is space for adding x
if (leafCount < bufferSize) {
buffer[0][leafCount] = x;
} else {
buffer[1][leafCount - bufferSize] = x;
}
leafCount++;
count++;
leavesSorted = (leafCount == 1);
}
@Override
public synchronized long getQuantile(double q) {
Preconditions.checkArgument(0.0 <= q && q <= 1.0,
"quantile must be in the range 0.0 to 1.0 inclusive");
if (count == 0) {
return 0L;
}
// the two leaves are the only buffer that can be partially filled
int buf0Size = Math.min(bufferSize, leafCount);
int buf1Size = Math.max(0, leafCount - buf0Size);
long sum = 0;
long target = (long) Math.ceil(count * (1.0 - q));
int i;
if (! leavesSorted) {
Arrays.sort(buffer[0], 0, buf0Size);
Arrays.sort(buffer[1], 0, buf1Size);
leavesSorted = true;
}
Arrays.fill(indices, bufferSize - 1);
indices[0] = buf0Size - 1;
indices[1] = buf1Size - 1;
do {
i = biggest(indices);
indices[i]--;
sum += weight(i);
} while (sum < target);
return buffer[i][indices[i] + 1];
}
@Override
public synchronized long[] getQuantiles(double[] quantiles) {
return Histograms.extractQuantiles(this, quantiles);
}
@Override
public synchronized void clear() {
count = 0L;
leafCount = 0;
currentTop = 1;
rootWeight = 1;
leavesSorted = true;
}
/**
* MergedHistogram is a Wrapper on top of multiple histograms, it gives a view of all the
* underlying histograms as it was just one.
* Note: Should only be used for querying the underlying histograms.
*/
private static class MergedHistogram implements Histogram {
private final ApproximateHistogram[] histograms;
private MergedHistogram(ApproximateHistogram[] histograms) {
this.histograms = histograms;
}
@Override
public void add(long x) {
/* Ignore, Shouldn't be used */
assert(false);
}
@Override
public void clear() {
/* Ignore, Shouldn't be used */
assert(false);
}
@Override
public long getQuantile(double quantile) {
Preconditions.checkArgument(0.0 <= quantile && quantile <= 1.0,
"quantile must be in the range 0.0 to 1.0 inclusive");
long count = initIndices();
if (count == 0) {
return 0L;
}
long sum = 0;
long target = (long) Math.ceil(count * (1.0 - quantile));
int iHist = -1;
int iBiggest = -1;
do {
long biggest = Long.MIN_VALUE;
for (int i = 0; i < histograms.length; i++) {
ApproximateHistogram hist = histograms[i];
int indexBiggest = hist.biggest(hist.indices);
if (indexBiggest >= 0) {
long value = hist.buffer[indexBiggest][hist.indices[indexBiggest]];
if (iBiggest == -1 || biggest <= value) {
iBiggest = indexBiggest;
biggest = value;
iHist = i;
}
}
}
histograms[iHist].indices[iBiggest]--;
sum += histograms[iHist].weight(iBiggest);
} while (sum < target);
ApproximateHistogram hist = histograms[iHist];
int i = hist.indices[iBiggest];
return hist.buffer[iBiggest][i + 1];
}
@Override
public synchronized long[] getQuantiles(double[] quantiles) {
return Histograms.extractQuantiles(this, quantiles);
}
/**
* Initialize the indices array for each Histogram and return the global count.
*/
private long initIndices() {
long count = 0L;
for (int i = 0; i < histograms.length; i++) {
ApproximateHistogram h = histograms[i];
int[] indices = h.indices;
count += h.count;
int buf0Size = Math.min(h.bufferSize, h.leafCount);
int buf1Size = Math.max(0, h.leafCount - buf0Size);
if (! h.leavesSorted) {
Arrays.sort(h.buffer[0], 0, buf0Size);
Arrays.sort(h.buffer[1], 0, buf1Size);
h.leavesSorted = true;
}
Arrays.fill(indices, h.bufferSize - 1);
indices[0] = buf0Size - 1;
indices[1] = buf1Size - 1;
}
return count;
}
}
/**
* Return a MergedHistogram
* @param histograms array of histograms to merged together
* @return a new Histogram
*/
public static Histogram merge(ApproximateHistogram[] histograms) {
return new MergedHistogram(histograms);
}
/**
* We compute the "smallest possible b" satisfying two inequalities:
* 1) (b - 2) * (2 ^ (b - 2)) + 0.5 <= epsilon * N
* 2) k * (2 ^ (b - 1)) >= N
*
* For an explanation of these inequalities, please read the Munro-Paterson or
* the Manku-Rajagopalan-Linday papers.
*/
@VisibleForTesting static int computeDepth(double epsilon, long n) {
int b = 2;
while ((b - 2) * (1L << (b - 2)) + 0.5 <= epsilon * n) {
b += 1;
}
return b;
}
@VisibleForTesting static int computeBufferSize(int depth, long n) {
return (int) (n / (1L << (depth - 1)));
}
/**
* Return an estimation of the memory used by an instance.
* The size is due to:
* - a fix cost (76 bytes) for the class + fields
* - bufferPool: 16 + 2 * (16 + bufferSize * ELEM_SIZE)
* - indices: 16 + sizeof(Integer) * (depth + 1)
* - buffer: 16 + (depth + 1) * (16 + bufferSize * ELEM_SIZE)
*
* Note: This method is tested with unit test, it will break if you had new fields.
* @param bufferSize the size of a buffer
* @param depth the depth of the tree of buffer (depth + 1 buffers)
*/
@VisibleForTesting
static long memoryUsage(int bufferSize, int depth) {
return 176 + (24 * depth) + (bufferSize * ELEM_SIZE * (depth + 3));
}
/**
* Return the level of the biggest element (using the indices array 'ids'
* to track which elements have been already returned). Every buffer has
* already been sorted at this point.
* @return the level of the biggest element or -1 if no element has been found
*/
@VisibleForTesting
int biggest(final int[] ids) {
long biggest = Long.MIN_VALUE;
final int id0 = ids[0], id1 = ids[1];
int iBiggest = -1;
if (0 < leafCount && 0 <= id0) {
biggest = buffer[0][id0];
iBiggest = 0;
}
if (bufferSize < leafCount && 0 <= id1) {
long x = buffer[1][id1];
if (x > biggest) {
biggest = x;
iBiggest = 1;
}
}
for (int i = 2; i < currentTop + 1; i++) {
if (!isBufferEmpty(i) && 0 <= ids[i]) {
long x = buffer[i][ids[i]];
if (x > biggest) {
biggest = x;
iBiggest = i;
}
}
}
return iBiggest;
}
/**
* Based on the number of elements inserted we can easily know if a buffer
* is empty or not
*/
@VisibleForTesting
boolean isBufferEmpty(int level) {
if (level == currentTop) {
return false; // root buffer (if present) is always full
} else {
long levelWeight = 1 << (level - 1);
return (((count - leafCount) / bufferSize) & levelWeight) == 0;
}
}
/**
* Return the weight of the level ie. 2^(i-1) except for the two tree
* leaves (weight=1) and for the root
*/
private int weight(int level) {
if (level == 0) {
return 1;
} else if (level == maxDepth) {
return rootWeight;
} else {
return 1 << (level - 1);
}
}
private void allocate(int i) {
if (buffer[i] == null) {
buffer[i] = new long[bufferSize];
}
}
/**
* Recursively collapse the buffers of the tree.
* Upper buffers will be allocated on first access in this method.
*/
private void recCollapse(long[] buf, int level) {
// if we reach the root, we can't add more buffer
if (level == maxDepth) {
// weight() return the weight of the root, in that case we need the
// weight of merge result
int mergeWeight = 1 << (level - 1);
int idx = level % 2;
long[] merged = bufferPool[idx];
long[] tmp = buffer[level];
collapse(buf, mergeWeight, buffer[level], rootWeight, merged);
buffer[level] = merged;
bufferPool[idx] = tmp;
rootWeight += mergeWeight;
} else {
allocate(level + 1); // lazy allocation (if needed)
if (level == currentTop) {
// if we reach the top, add a new buffer
collapse1(buf, buffer[level], buffer[level + 1]);
currentTop += 1;
rootWeight *= 2;
} else if (isBufferEmpty(level + 1)) {
// if the upper buffer is empty, use it
collapse1(buf, buffer[level], buffer[level + 1]);
} else {
// it the upper buffer isn't empty, collapse with it
long[] merged = bufferPool[level % 2];
collapse1(buf, buffer[level], merged);
recCollapse(merged, level + 1);
}
}
}
/**
* collapse two sorted Arrays of different weight
* ex: [2,5,7] weight 2 and [3,8,9] weight 3
* weight x array + concat = [2,2,5,5,7,7,3,3,3,8,8,8,9,9,9]
* sort = [2,2,3,3,3,5,5,7,7,8,8,8,9,9,9]
* select every nth elems = [3,7,9] (n = sum weight / 2)
*/
@VisibleForTesting
static void collapse(
long[] left,
int leftWeight,
long[] right,
int rightWeight,
long[] output) {
int totalWeight = leftWeight + rightWeight;
int halfTotalWeight = (totalWeight / 2) - 1;
int i = 0, j = 0, k = 0, cnt = 0;
int weight;
long smallest;
while (i < left.length || j < right.length) {
if (i < left.length && (j == right.length || left[i] < right[j])) {
smallest = left[i];
weight = leftWeight;
i++;
} else {
smallest = right[j];
weight = rightWeight;
j++;
}
int cur = (cnt + halfTotalWeight) / totalWeight;
cnt += weight;
int next = (cnt + halfTotalWeight) / totalWeight;
for(; cur < next; cur++) {
output[k] = smallest;
k++;
}
}
}
/**
* Optimized version of collapse for collapsing two array of the same weight
* (which is what we want most of the time)
*/
private static void collapse1(
long[] left,
long[] right,
long[] output) {
int i = 0, j = 0, k = 0, cnt = 0;
long smallest;
while (i < left.length || j < right.length) {
if (i < left.length && (j == right.length || left[i] < right[j])) {
smallest = left[i];
i++;
} else {
smallest = right[j];
j++;
}
if (cnt % 2 == 1) {
output[k] = smallest;
k++;
}
cnt++;
}
}
}