/*
* Copyright 2010-2012 Coda Hale and Yammer, Inc.
* Copyright 2014 LinkedIn, Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Retrieved from https://github.com/codahale/metrics/ on 2013-12-20. Modified
* to support aggregation, which requires read/write access both to the full
* '_values' object (including rescaling and bulk updating) and to '_startTime'.
* Also modified to support use of the StatUtils package in Apache Commons' Math
* project for percentile/quantile calculations; in particular, use doubles rather
* than longs for data values, and omit sorting the data values since StatUtils
* necessarily does so itself.
*
* Ultimately the plan is to contribute this back to the public metrics-core repo on
* github, but at present the changes don't fit terribly well with the existing class
* design. For example, the Reservoir interface currently assumes long data values;
* several of the internal variables specific to the ExponentiallyDecayingReservoir
* implementation are exposed; and the mergeability feature isn't sufficiently abstracted
* to be immediately extensible to the other kinds of metrics-core reservoirs.
*/
package com.codahale.metrics;
import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static java.lang.Math.exp;
import static java.lang.Math.min;
import com.codahale.metrics.Clock;
import com.codahale.metrics.ThreadLocalRandom;
/**
* An exponentially-decaying random reservoir of {@code double}s. Uses Cormode et al.'s
* forward-decaying priority reservoir sampling method to produce a statistically
* representative sampling reservoir, exponentially biased toward newer entries.
*
* @see <a href="http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf">
* Cormode et al., "Forward Decay: A Practical Time Decay Model for Streaming Systems."
* ICDE '09: Proceedings of the 2009 IEEE International Conference on Data Engineering (2009)</a>
*/
public class MergeableExponentiallyDecayingReservoir // implements Reservoir [no longer implements update(long)]
{
public static final int DEFAULT_SIZE = 1028;
public static final double DEFAULT_ALPHA = 0.015; // units = 1/sec
private static final long RESCALE_THRESHOLD = TimeUnit.HOURS.toNanos(1);
private static final long CURRENT_NEXT_SCALE_TIME = Long.MIN_VALUE;
private final ConcurrentSkipListMap<Double, Double> _values;
private final ReentrantReadWriteLock _lock;
private final double _alpha;
private final int _size;
private final AtomicLong _count; // accurate only until reservoir size is reached; thereafter mostly inaccurate
private volatile long _startTime;
private final AtomicLong _nextScaleTime;
private final Clock _clock;
/**
* Creates a new {@link MergeableExponentiallyDecayingReservoir} of 1028 elements, which
* offers a 99.9% confidence level with a 5% margin of error assuming a normal distribution,
* and an alpha factor of 0.015, which heavily biases the reservoir to the past 5 minutes of
* measurements.
*/
public MergeableExponentiallyDecayingReservoir() {
this(DEFAULT_SIZE, DEFAULT_ALPHA);
}
/**
* Creates a new {@link MergeableExponentiallyDecayingReservoir}.
*
* @param size the number of samples to keep in the sampling reservoir
* @param alpha the exponential decay factor; the higher this is, the more biased the reservoir
* will be towards newer values
*/
public MergeableExponentiallyDecayingReservoir(int size, double alpha) {
this(size, alpha, Clock.defaultClock());
}
/**
* Creates a new {@link MergeableExponentiallyDecayingReservoir}.
*
* @param size the number of samples to keep in the sampling reservoir
* @param alpha the exponential decay factor; the higher this is, the more biased the reservoir
* will be towards newer values
*/
public MergeableExponentiallyDecayingReservoir(int size, double alpha, Clock clock) {
_values = new ConcurrentSkipListMap<Double, Double>();
_lock = new ReentrantReadWriteLock();
_alpha = alpha;
_size = size;
_clock = clock;
_count = new AtomicLong(0);
_startTime = currentTimeInSeconds();
_nextScaleTime = new AtomicLong(clock.getTick() + RESCALE_THRESHOLD);
}
// @Override
public int size() {
return (int) min(_size, _count.get());
}
// @Override
public void update(double value) {
update(value, currentTimeInSeconds());
}
/**
* Adds an old value with a fixed timestamp to the reservoir. This method appears to use the
* "priority sampling" approach from page 7 of the Cormody et al. paper. (The only weird part
* is that the paper talks about sampling <i>without</i> replacement, yet the code is clearly
* replacing lower-priority values. Perhaps the distinction is the non-replacement of values
* with the exact same priority? The code does avoid doing that (via putIfAbsent()).)
*
* @param value the value to be added
* @param timestamp the epoch timestamp of {@code value} in seconds
*/
public void update(double value, long timestamp) {
rescaleIfNeeded();
lockForRegularUsage();
try {
final double priority = weight(timestamp - _startTime) / ThreadLocalRandom.current()
.nextDouble();
// TODO/FIXME: why is this unconditional? if newCount > size, should be decremented again...
final long newCount = _count.incrementAndGet();
if (newCount <= _size) {
_values.put(priority, value);
} else {
Double first = _values.firstKey();
if (first < priority && _values.putIfAbsent(priority, value) == null) {
// ensure we always remove an item
while (_values.remove(first) == null) {
first = _values.firstKey();
}
}
// _count.set(_size); ? [TODO/FIXME: cheap; shouldn't hurt; better than not setting]
}
} finally {
unlockForRegularUsage();
}
}
/**
* Merges another reservoir into this one by (1) choosing consistent landmark (_startTime) value;
* (2) rescaling the reservoir with the older landmark value to the newer one; and (3) adding the
* other's higher-priority key/value pairs to this one's _values map, throwing out lower-priority
* entries as in the normal update() method above.
*
* @param value the value to be added
* @param timestamp the epoch timestamp of {@code value} in seconds
*/
public void merge(MergeableExponentiallyDecayingReservoir other)
{
if (other == null)
return;
ConcurrentNavigableMap<Double, Double> otherReversedMap = null; // alternatively, array of <K,V> entries?
final long now = _clock.getTick();
final long otherStartTime = other.getLandmark(); // 1-second granularity
if (otherStartTime < _startTime)
{
// other is older, so need to rescale other's data to match ours
otherReversedMap = other.rescale(now, CURRENT_NEXT_SCALE_TIME, _startTime);
}
else
{
// get other's data here (as close as possible to time of getLandmark() call): small race condition, but
// given one-hour rescale granularity and fact that other is more recently rescaled, shouldn't actually be
// a problem
otherReversedMap = other.getDescendingMap(); // _values.descendingMap()
if (otherStartTime > _startTime)
{
// other is newer; need to rescale our data to match, even if haven't yet reached _nextScaleTime
rescale(now, _nextScaleTime.get(), otherStartTime);
}
}
// both halves of merge now have same landmark (startTime) value, and we have a reverse-iterable
// view of other's map => can do actual merge on apples-to-apples basis
lockForRegularUsage();
try
{
long approxCount = _count.get();
// iterate over other's entries from highest priority to lowest
for (Double priority : otherReversedMap.keySet())
{
Double value = otherReversedMap.get(priority);
if (++approxCount <= _size)
{
_values.put(priority, value);
}
else
{
Double lowestPriority = _values.firstKey();
if (lowestPriority < priority)
{
if (_values.putIfAbsent(priority, value) == null)
{
// ensure we always remove an item
while (_values.remove(lowestPriority) == null)
{
lowestPriority = _values.firstKey();
}
}
}
else // hit break-even point: this and all future "other" priorities are equal to or lower than
// lowest already present in map, so no point in continuing to iterate: discard in bulk
{
break;
}
}
}
// in principle, other threads might have been updating _values concurrently, so set _count to the
// true value, and trim the map if we've exceeded our target size
long trueCount = _values.size(); // O(n)
if (trueCount > _size)
{
for (long i = _size; i < trueCount; ++i)
{
Double lowestPriority = _values.firstKey();
while (_values.remove(lowestPriority) == null)
{
lowestPriority = _values.firstKey();
}
}
trueCount = _size;
}
_count.set(trueCount);
} finally {
unlockForRegularUsage();
}
}
public long getLandmark()
{
lockForRegularUsage();
try
{
return _startTime;
}
finally
{
unlockForRegularUsage();
}
}
public ConcurrentNavigableMap<Double, Double> getDescendingMap()
{
lockForRegularUsage();
try
{
return _values.descendingMap();
}
finally
{
unlockForRegularUsage();
}
}
// for use with StatUtils.percentile() and StatUtils.max(), which presumably do their own sorting
public double[] getUnsortedValues()
{
lockForRegularUsage();
try
{
Collection<Double> dataValues = _values.values();
double[] result = new double[dataValues.size()];
int j = 0;
for (Double dataValue : dataValues)
{
result[j++] = dataValue.doubleValue();
}
return result;
}
finally
{
unlockForRegularUsage();
}
}
// @Override
// public Snapshot getSnapshot() {
// lockForRegularUsage();
// try {
// return new Snapshot(_values.values());
// } finally {
// unlockForRegularUsage();
// }
// }
private long currentTimeInSeconds() {
return TimeUnit.MILLISECONDS.toSeconds(_clock.getTime());
}
private double weight(long t) {
return exp(_alpha * t);
}
// TODO: if getTime() is comparably cheap to getTick(), use currentTimeInSeconds() for both next/now AND
// _startTime: 1-second granularity more than sufficient if rescaling only once per hour, and
// should help synch up landmark values during multiple merges => avoid rescale churn
// [getTime() == System.currentTimeMillis()] Windoze: "5-6 CPU clocks"
// [getTick() == System.nanoTime() with default clock == Clock.UserTimeClock] "relatively expensive; can be 100+ CPU clocks"
// [https://blogs.oracle.com/dholmes/entry/inside_the_hotspot_vm_clocks]
// [not much detailed info on Linux timings...]
// [AHA, Linux: http://bugs.sun.com/view_bug.do?bug_id=6876279 Aug 2010]
// [currentTimeMillis(): ~7 ns on 1.6.0_04 64-bit with AggressiveOpts, ~1600 ns on 1.6.0_14 64-bit]
// [1.6.0_04 included an experimental gettimeofday() cache in AggressiveOpts; later removed due to problems]
// [nanoTime(): ~1700 ns on both 1.6 versions; should be much better with reliable-TSC support in hw]
private void rescaleIfNeeded() {
final long now = _clock.getTick();
final long next = _nextScaleTime.get();
if (now >= next) {
rescale(now, next);
}
}
/* "A common feature of the above techniques—indeed, the key technique that
* allows us to track the decayed weights efficiently—is that they maintain
* counts and other quantities based on g(ti − L), and only scale by g(t − L)
* at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero
* and one, the intermediate values of g(ti − L) could become very large. For
* polynomial functions, these values should not grow too large, and should be
* effectively represented in practice by floating point values without loss of
* precision. For exponential functions, these values could grow quite large as
* new values of (ti − L) become large, and potentially exceed the capacity of
* common floating point types. However, since the values stored by the
* algorithms are linear combinations of g values (scaled sums), they can be
* rescaled relative to a new landmark. That is, by the analysis of exponential
* decay in Section III-A, the choice of L does not affect the final result. We
* can therefore multiply each value based on L by a factor of exp(−α(L′ − L)),
* and obtain the correct value as if we had instead computed relative to a new
* landmark L′ (and then use this new L′ at query time). This can be done with
* a linear pass over whatever data structure is being used."
*/
private void rescale(long now, long next) {
if (_nextScaleTime.compareAndSet(next, now + RESCALE_THRESHOLD)) {
lockForRescale();
try {
final long oldStartTime = _startTime;
_startTime = currentTimeInSeconds();
final ArrayList<Double> keys = new ArrayList<Double>(_values.keySet());
for (Double key : keys) {
final Double value = _values.remove(key);
_values.put(key * exp(-_alpha * (_startTime - oldStartTime)), value);
}
// make sure the counter is in sync with the number of stored samples.
_count.set(_values.size()); // TODO/FIXME: O(n): truly needed? after hit _size, effectively just need boolean...
} finally {
unlockForRescale();
}
}
}
// Public variant of above for merging (aggregating) histograms.
public ConcurrentNavigableMap<Double, Double> rescale(long now, long next, long startTime)
{
if (next == CURRENT_NEXT_SCALE_TIME)
{
next = _nextScaleTime.get();
}
if (_nextScaleTime.compareAndSet(next, now + RESCALE_THRESHOLD))
{
lockForRescale();
try
{
final long oldStartTime = _startTime;
_startTime = startTime;
final ArrayList<Double> keys = new ArrayList<Double>(_values.keySet());
for (Double key : keys)
{
final Double value = _values.remove(key);
_values.put(key * exp(-_alpha * (_startTime - oldStartTime)), value);
}
// make sure the counter is in sync with the number of stored samples.
//_count.set(_values.size()); // same O(n) concern as above...
return _values.descendingMap();
}
finally
{
unlockForRescale();
}
}
else
{
lockForRegularUsage();
try
{
return _values.descendingMap();
}
finally
{
unlockForRegularUsage();
}
}
}
private void lockForRescale() {
_lock.writeLock().lock();
}
private void unlockForRescale() {
_lock.writeLock().unlock();
}
private void lockForRegularUsage() {
_lock.readLock().lock();
}
private void unlockForRegularUsage() {
_lock.readLock().unlock();
}
}