/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.cache;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.diqube.util.DiqubeIterables;
import org.diqube.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
/**
* Default implementation of a cache that counts usage of elements and caches the most used objects, but adhering to a
* memory cap.
*
* <p>
* Assumes that values identified by a key pair (K1,K2) do not change, not even when re-inserted into the cache.
*
* <p>
* As this implementation counts the offers, these counts are not internally cleared ever. This means that the memory
* used by this cache will increase statically over time if the keys keep changing. <b>Take therefore special care of
* what to use as keys! Do NOT use any randomly generated IDs etc (e.g. {@link UUID#randomUUID()}) without specifying a
* meaningful {@link CountCleanupStrategy}!</b> By default counts will never be cleaned up.
*
* <p>
* It caches those values that were used the most often times - it therefore counts the usages of these on calls to
* {@link #offer(Object, Object, Object)}. This cache caches up to a maximum memory size, which is calculated using a
* {@link MemoryConsumptionProvider}. If there are multiple values used the same amount of times and they are at the
* that max-memory border, the ColumnShards are ordered by their key2 (of type K2) - the columns with the "lesser" key2s
* may be cached.
*
* <p>
* This cache maintains itself and does execute cleanup actions on internally used data structures at its own
* discretion.
*
* <p>
* This cache is a {@link FlaggingCache}, which means that is capable of prohibiting specific elements from being
* evicted from the cache for a certain amount of time. This cache implements this behavior without an additional
* thread. Therefore it might take a while to actually evict values that have been flagged: This will happen on calls to
* {@link #offer(Comparable, Comparable, Object)} and {@link #offerAndFlag(Comparable, Comparable, Object, long)};
* additionally one can trigger it using {@link #consolidate()}.
*
* <p>
* The flagged elements memory size does <b>NOT</b> count towards the memory cap! This means that this cache might
* actually take up "maxMemory + x" memory, where x is the sum of sizes of all flagged entries. On the other hand, if
* there is free memory (up to the cap), but there are not-longer-flagged entries, these are not evicted from the cache
* until the cap is reached (this is obviously true, because since we're under the cap, all entries have an entry in
* {@link #topCounts} and are therefore regular entries in the cache, although that flagged entry was just additionally
* flagged).
*
* @author Bastian Gloeckle
*/
public class CountingCache<K1 extends Comparable<K1>, K2 extends Comparable<K2>, V>
implements WritableFlaggingCache<K1, K2, V> {
private static final Logger logger = LoggerFactory.getLogger(CountingCache.class);
/**
* The default internal cleanup strategy cleans up randomly in approx. 3% of cases.
*
* Use 128 as base so random is implemented faster. Scale "3" (%) equally (* 1.28).
*/
private static final InternalCleanupStrategy DEFAULT_CLEANUP_STRATEGY =
() -> ThreadLocalRandom.current().nextInt(128) < 4;
private ConcurrentMap<K1, ConcurrentMap<K2, V>> caches = new ConcurrentHashMap<>();
private ConcurrentMap<CacheId, AtomicLong> counts = new ConcurrentHashMap<>();
private ConcurrentSkipListSet<CacheIdCount> topCounts = new ConcurrentSkipListSet<>();
private ConcurrentMap<CacheId, Long> memoryConsumption = new ConcurrentHashMap<>();
private ConcurrentSkipListSet<CacheId> currentlyCachedCacheIds = new ConcurrentSkipListSet<>();
/**
* CacheIds which are flagged currently to the current information about the flag.
*
* Items are removed from here by {@link #removeOldFlaggedCacheIds()}. Note to sync on the {@link FlagInfo} object if
* changing anything of interest for {@link #removeOldFlaggedCacheIds()} in the {@link FlagInfo}.
*/
private ConcurrentMap<CacheId, FlagInfo> flaggedChacheIds = new ConcurrentHashMap<>();
/**
* Timeout time in nanos to the CacheId that was flagged.
*/
private ConcurrentSkipListMap<Long, CacheId> flaggedTimes = new ConcurrentSkipListMap<>();
private Object updateCacheSync = new Object();
/** gets write-locked when cleanup is running. Use read-lock to execute something while not cleaning up. */
private ReentrantReadWriteLock cleanupLock = new ReentrantReadWriteLock();
private InternalCleanupStrategy cleanupStrategy;
private long maxMemoryBytes;
private MemoryConsumptionProvider<V> memoryConsumptionProvider;
private CountCleanupStrategy<K1, K2> countCleanupStrategy;
public CountingCache(long maxMemoryBytes, MemoryConsumptionProvider<V> memoryConsumptionProvider) {
this(maxMemoryBytes, DEFAULT_CLEANUP_STRATEGY, memoryConsumptionProvider);
}
public CountingCache(long maxMemoryBytes, MemoryConsumptionProvider<V> memoryConsumptionProvider,
CountCleanupStrategy<K1, K2> countCleanupStrategy) {
this(maxMemoryBytes, DEFAULT_CLEANUP_STRATEGY, memoryConsumptionProvider, countCleanupStrategy);
}
public CountingCache(long maxMemoryBytes, InternalCleanupStrategy cleanupStrategy,
MemoryConsumptionProvider<V> memoryConsumptionProvider) {
this(maxMemoryBytes, cleanupStrategy, memoryConsumptionProvider,
/* never to count cleanups! */
(countsUpForCleanup, allCounts) -> null);
}
public CountingCache(long maxMemoryBytes, InternalCleanupStrategy cleanupStrategy,
MemoryConsumptionProvider<V> memoryConsumptionProvider, CountCleanupStrategy<K1, K2> countCleanupStrategy) {
this.maxMemoryBytes = maxMemoryBytes;
this.cleanupStrategy = cleanupStrategy;
this.memoryConsumptionProvider = memoryConsumptionProvider;
this.countCleanupStrategy = countCleanupStrategy;
}
@Override
public V get(K1 key1, K2 key2) {
ConcurrentMap<K2, V> cache = caches.get(key1);
if (cache == null)
return null;
return cache.get(key2);
}
@Override
public V flagAndGet(K1 key1, K2 key2, long flagUntilNanos) {
CacheId cacheId = new CacheId(key1, key2);
while (true) {
V res = get(key1, key2);
if (res == null)
return null;
flag(cacheId, flagUntilNanos);
// re-check that the element we got is still in cache.
if (res == get(key1, key2))
return res;
// Here: If an offer of the same CacheId than this method happens exactly here, we might keep something in the
// cache although we would remove the flaggedCacheId right away again. But that is not as bad, since in the next
// call to #consolidate this mistake will be corrected.
// element changed, remove flag and retry.
flaggedChacheIds.compute(cacheId, (k, v) -> {
// no need to sync here: If the following if is "true", we added that cacheId to flaggedCacheIds just a few
// lines above, therefore we're of no interest to #removeOldFlaggedCacheIds yet. If the "if" is "false", we do
// not care anyway, since then this method does not change anything of interest to #removeOldFlaggedCacheIds.
if (v.getFlagCount().decrementAndGet() == 0)
return null;
return v;
});
}
}
/**
* Add a given cacheId to {@link #flaggedTimes} and {@link #flaggedChacheIds}.
*
* Will adjust the given flagUntilNanos so we do not get collisions in {@link #flaggedTimes}.
*
* @return The flagUntilNanos value actually used.
*/
private long flag(CacheId cacheId, long flagUntilNanos) {
while (flaggedTimes.putIfAbsent(flagUntilNanos, cacheId) != null)
flagUntilNanos++;
flaggedChacheIds.merge(cacheId, new FlagInfo(flagUntilNanos), (oldValue, newValue) -> {
synchronized (oldValue) { // sync to stay valid according to #removeOldFlaggedCacheIds
newValue.getFlagCount().addAndGet(oldValue.getFlagCount().get());
newValue.getNewestTimeoutNanos().getAndAccumulate(oldValue.getNewestTimeoutNanos().get(), Long::max);
return newValue;
}
});
return flagUntilNanos;
}
@Override
public Collection<V> getAll(K1 key1) {
ConcurrentMap<K2, V> cache2 = caches.get(key1);
if (cache2 == null)
return new ArrayList<>();
return new ArrayList<>(cache2.values());
}
@Override
public V offerAndFlag(K1 key1, K2 key2, V value, long flagUntilNanos) {
return offerAndFlag(key1, key2, value, flagUntilNanos, 1);
}
/**
* Just like {@link #offerAndFlag(Comparable, Comparable, Object, long)}, but specify the number of times this element
* has been "used".
*
* @param countDelta
* Number of times the offered element has been "used".
*/
public V offerAndFlag(K1 key1, K2 key2, V value, long flagUntilNanos, long countDelta) {
flag(new CacheId(key1, key2), flagUntilNanos);
offer(key1, key2, value, countDelta);
return get(key1, key2);
}
@Override
public boolean offer(K1 key1, K2 key2, V value) {
return offer(key1, key2, value, 1);
}
/**
* Just like {@link #offer(Comparable, Comparable, Object)}, but specify the number of times this element has been
* "used".
*
* @param countDelta
* Number of times the offered element has been "used".
*/
public boolean offer(K1 key1, K2 key2, V value, long countDelta) {
// Implementation details: We adjust internal data structures here, but leave #cache unchanged. Based on this, this
// method then calls #consolidate to consolidate the caches, too.
removeOldFlaggedCacheIds();
boolean addedToCache = false;
cleanupLock.readLock().lock(); // do not execute cleanup while we're inside this block!
try {
CacheId cacheId = new CacheId(key1, key2);
AtomicLong count = counts.computeIfAbsent(cacheId, id -> new AtomicLong(0L));
long oldCount = count.getAndAdd(countDelta);
long newCount = oldCount + countDelta;
memoryConsumption.computeIfAbsent(cacheId, ci -> memoryConsumptionProvider.getMemoryConsumptionBytes(value));
// we add our value with its count to "topCounts". Note: If this value now is used often enough to get into the
// cache,
// actually another thread calling this method might expect our value to be added to the cache already (and
// remove other values for its decision accordingly). We do not really care about this, though -> in that case the
// cache might be smaller than needed for a short time.
CacheIdCount newColIdCount = new CacheIdCount(cacheId, newCount);
topCounts.add(newColIdCount);
topCounts.remove(new CacheIdCount(cacheId, oldCount));
addedToCache = consolidate(cacheId, value);
} finally {
cleanupLock.readLock().unlock();
}
if (cleanupStrategy.executeCleanup())
intermediaryCleanup();
return addedToCache;
}
/**
* Consolidates the {@link #caches} based on current values of {@link #topCounts}, {@link #memoryConsumption},
* {@link #flaggedChacheIds} and {@link #currentlyCachedCacheIds}. {@link #memoryConsumption} needs to contain values
* for all elements in {@link #topCounts}.
*
* <p>
* This method optionally adds a new element to the cache if eligible.
*
* <p>
* This method must only be called, if {@link #cleanupLock}s readLock is acquired already!
*
* @param addCacheId
* The ID of the element to add or <code>null</code> if nothing should be added.
* @param addValue
* The value of the element to add or <code>null</code> if nothing should be added.
* @return <code>true</code> in case the value was added successfully.
*/
private boolean consolidate(CacheId addCacheId, V addValue) {
boolean addedToCache = false;
// we retry to identify if we have to add the new value/what other values to remove to/from the cache. This is
// because while inspecting the situation we might have multiple threads doing the same simultaneously, even with
// the same CacheId! There we find a decision what we'd like to do, then enter a sync-block and validate if the data
// we based our decision on is still valid and only if it is, we execute our decision.
boolean retry = true;
while (retry) {
// we collect the colIds that we inspected in "counts" in the right order.
List<CacheIdCount> curInspectedCountCacheIds = new ArrayList<>();
Set<CacheId> cacheIdsVisited = new HashSet<>();
Set<CacheId> cacheIdsThatShouldBeCached = new HashSet<>();
long memory = 0L;
for (CacheIdCount cacheIdCount : topCounts) {
curInspectedCountCacheIds.add(cacheIdCount);
if (cacheIdsVisited.contains(cacheIdCount.getLeft()))
// CacheIds might be available multiple times in "count" (they are first added, then removed, see above).
// Therefore make sure we just take the maximum "count" into account of a CacheId.
continue;
cacheIdsVisited.add(cacheIdCount.getLeft());
// memory consumption is definitely available, since it cannot have been cleaned up, as the internal cleanup
// cannot run simultaneously!
long nextMemory = memoryConsumption.get(cacheIdCount.getLeft());
if (memory + nextMemory > maxMemoryBytes)
break;
memory += nextMemory;
cacheIdsThatShouldBeCached.add(cacheIdCount.getLeft());
}
// decide what we have to do
Set<CacheId> curCurrentlyCachedCacheIds = new HashSet<>(currentlyCachedCacheIds);
Set<CacheId> cacheIdsToBeRemovedFromCache =
new HashSet<>(Sets.difference(curCurrentlyCachedCacheIds, cacheIdsThatShouldBeCached));
boolean shouldAddNewCacheIdToCache = addCacheId != null && !curCurrentlyCachedCacheIds.contains(addCacheId)
&& (cacheIdsThatShouldBeCached.contains(addCacheId) || flaggedChacheIds.containsKey(addCacheId));
if (!cacheIdsToBeRemovedFromCache.isEmpty()
|| (addCacheId != null && cacheIdsThatShouldBeCached.contains(addCacheId))) {
synchronized (updateCacheSync) {
if (!curCurrentlyCachedCacheIds.equals(currentlyCachedCacheIds)
|| !DiqubeIterables.startsWith(topCounts, curInspectedCountCacheIds))
// retry as the data structures we based our decisions on have changed.
continue;
// do not remove flagged cache Ids.
Sets.difference(cacheIdsToBeRemovedFromCache, flaggedChacheIds.keySet()).forEach(id -> removeFromCache(id));
if (addCacheId != null && shouldAddNewCacheIdToCache) {
addToCache(addCacheId, addValue);
addedToCache = true;
}
// we succeeded!
retry = false;
}
} else
// we do not need to take any action, so we're done!
retry = false;
}
return addedToCache;
}
/**
* Consolidate the cache.
*
* <p>
* This will evict all elements that are not flagged any more and perhaps execute internal cleanup (at the discretion
* of the cache).
*/
public void consolidate() {
removeOldFlaggedCacheIds();
cleanupLock.readLock().lock();
try {
consolidate(null, null);
} finally {
cleanupLock.readLock().unlock();
}
if (cleanupStrategy.executeCleanup())
intermediaryCleanup();
}
/**
* Executes a cleanup of data structures of this instance - execute once and then to not pollute memory!
*/
private void intermediaryCleanup() {
cleanupLock.writeLock().lock();
try {
logger.trace("Executing cache cleanup...");
// clean topCounts: Remove any CacheIdCounts of CacheIds where we have bigger counts (=earlier in topCounts)
// and/or which are not currently cached.
boolean lastTopCountWasCached = true;
Set<CacheId> cacheIdsVisited = new HashSet<>();
CacheId notCachedInterestingCacheId = null;
for (Iterator<CacheIdCount> it = topCounts.iterator(); it.hasNext();) {
CacheIdCount curCnt = it.next();
if (cacheIdsVisited.contains(curCnt.getLeft())) {
it.remove();
continue;
}
cacheIdsVisited.add(curCnt.getLeft());
boolean curCntIsCached = currentlyCachedCacheIds.contains(curCnt.getLeft());
if (!curCntIsCached && !lastTopCountWasCached)
// remove all entries from topCount but leave only the cached ones and the one right after that. The latter
// one is needed to make sure not new entries are cached (= are in topCount and memory does match), but would
// actually have a lower CacheIdCount value than the one following the cachedTopCounts (this can happen e.g.
// if the one after the cached topCounts is a very memory-intensive value, does therefore not get cached, but
// the next value with a lower count would have a memory consumption that would fit in the cache). We do
// not cache any CacheIdCounts that are lower than the first one that does not fit into memory any more.
it.remove();
else if (!curCntIsCached && lastTopCountWasCached)
notCachedInterestingCacheId = curCnt.getLeft();
lastTopCountWasCached = curCntIsCached;
}
// remove memory information of all cacheIds that are not interesting (=cached values are interesting, if there
// is
// another value left in topCount, that is interesting, too) - Next time someone wants to insert a value we remove
// now, we can recalculate the size without problems.
Collection<CacheId> retainCacheIds = currentlyCachedCacheIds;
if (notCachedInterestingCacheId != null)
retainCacheIds =
Sets.newHashSet(Iterables.concat(currentlyCachedCacheIds, Arrays.asList(notCachedInterestingCacheId)));
memoryConsumption.keySet().retainAll(retainCacheIds);
// check if we should clean up any "counts"
Set<? extends Pair<K1, K2>> countCleanups = countCleanupStrategy
.getCountsForCleanup(Sets.difference(counts.keySet(), currentlyCachedCacheIds), counts.keySet());
if (countCleanups != null && !countCleanups.isEmpty()) {
for (Pair<K1, K2> p : countCleanups)
counts.remove(new CacheId(p.getLeft(), p.getRight()));
}
} finally {
cleanupLock.writeLock().unlock();
}
}
/**
* Removes data of a specific {@link CacheId} from the cache. Call only when synced on {@link #updateCacheSync} and
* inside a lock of {@link #cleanupLock}!
*/
private void removeFromCache(CacheId colId) {
logger.trace("Removing from cache: {}", colId);
currentlyCachedCacheIds.remove(colId);
caches.get(colId.getLeft()).remove(colId.getRight());
if (caches.get(colId.getLeft()).isEmpty())
caches.remove(colId.getLeft());
}
/**
* Add data of a specific {@link CacheId} to the cache. Call only when synced on {@link #updateCacheSync} and inside a
* lock of {@link #cleanupLock}!
*/
private void addToCache(CacheId cacheId, V value) {
logger.trace("Adding to cache: {}", cacheId);
currentlyCachedCacheIds.add(cacheId);
ConcurrentMap<K2, V> k1Cache = caches.computeIfAbsent(cacheId.getLeft(), id -> new ConcurrentHashMap<>());
k1Cache.put(cacheId.getRight(), value);
}
@Override
public int size() {
return currentlyCachedCacheIds.size();
}
public Long getCount(K1 key1, K2 key2) {
AtomicLong l = counts.get(new CacheId(key1, key2));
if (l == null)
return null;
return l.get();
}
/**
* Internal method to check for flagged elements whose timeout has passed - will remove those elements from
* {@link #flaggedChacheIds} and will cleanup {@link #flaggedTimes}.
*/
private void removeOldFlaggedCacheIds() {
Iterator<Entry<Long, CacheId>> cacheIdEntryIt = flaggedTimes.headMap(System.nanoTime()).entrySet().iterator();
while (cacheIdEntryIt.hasNext()) {
Entry<Long, CacheId> cacheIdEntry = cacheIdEntryIt.next();
CacheId cacheId = cacheIdEntry.getValue();
long timeoutTime = cacheIdEntry.getKey();
cacheIdEntryIt.remove();
FlagInfo flagInfo = flaggedChacheIds.get(cacheId);
// validate that there is a flagInfo and the flagInfo was not updated in the meantime.
if (flagInfo != null && flagInfo.getNewestTimeoutNanos().get() == timeoutTime) {
synchronized (flagInfo) {
// synched on flagInfo itself and re-check that there is not another thread that currently tries to flag this
// chacheId!
// If not synced, we could succeed the "if", but before removing the element from flaggedCacheIds, someone
// changes the newestTimeoutNanos. And then we'd remove the object -> object is not actually flagged!
if (flagInfo.getNewestTimeoutNanos().get() == timeoutTime) {
// remove the flaggedCacheId, but do not directly remove the cached value itself - it might still be in the
// regular topCounts! Let #offer(..) do the cleanup as soon as it is called.
flaggedChacheIds.remove(cacheId);
}
}
}
}
}
/** for testing only! */
protected long getMaxMemoryBytes() {
return maxMemoryBytes;
}
/** for testing only! */
protected void removeFromCache(K1 key1, K2 key2) {
cleanupLock.writeLock().lock();
try {
synchronized (updateCacheSync) {
removeFromCache(new CacheId(key1, key2));
}
} finally {
cleanupLock.writeLock().unlock();
}
}
/** for tests */
/* pcakage */ void setCleanupStrategy(InternalCleanupStrategy cleanupStrategy) {
this.cleanupStrategy = cleanupStrategy;
}
/**
* Identifies a (cached) entity within the cache.
*
* <p>
* Left: key part 1 <br/>
* Right: key part 2
*/
private class CacheId extends Pair<K1, K2> {
public CacheId(K1 left, K2 right) {
super(left, right);
}
}
/**
* A {@link CacheId} combined with the number of times that element was offered.
*
* <p>
* This is {@link Comparable} and compares first by "count" (descending) and then by {@link CacheId}.
*/
private class CacheIdCount extends Pair<CacheId, Long> {
public CacheIdCount(CacheId left, Long right) {
super(left, right);
}
@Override
public int compareTo(Pair<CacheId, Long> o) {
int countCmp = getRight().compareTo(o.getRight());
if (countCmp != 0)
return -1 * countCmp;
return getLeft().compareTo(o.getLeft());
}
}
/**
* Mainly for testing: Extract strategy on when to do an internal cleanup.
*/
/* package */ static interface InternalCleanupStrategy {
/**
* @return true if cleanup should be execued, false otherwise.
*/
public boolean executeCleanup();
}
private static class FlagInfo {
private AtomicInteger flagCount;
private AtomicLong newestTimeoutNanos;
FlagInfo(long timeoutNanos) {
flagCount = new AtomicInteger(1);
newestTimeoutNanos = new AtomicLong(timeoutNanos);
}
public AtomicInteger getFlagCount() {
return flagCount;
}
public AtomicLong getNewestTimeoutNanos() {
return newestTimeoutNanos;
}
}
/**
* Provider of the size of memory a value takes up.
*/
public static interface MemoryConsumptionProvider<V> {
/**
* @return Number of bytes the given value takes up.
*/
public long getMemoryConsumptionBytes(V value);
}
/**
* Strategy to decide which collected "count" values should be cleaned up.
*
* <p>
* This is called once-and-then by {@link CountingCache}.
*/
public static interface CountCleanupStrategy<K1 extends Comparable<K1>, K2 extends Comparable<K2>> {
/**
* Identifies which "count" values to be cleaned up.
*
* <p>
* Note that only "count" values of cache entries which will never again be "offered" to the cache should be cleaned
* up, otherwise the counting of such cache elements will re-start at 0 (and therefore probably always lose when
* compared against the currently-cached elements).
*
* @param countsUpForCleanup
* A set of (K1,K2) pairs of cache entries which are currently not cached, but of which a "count" is
* available.
* @param allCounts
* A set of (K1, K2) pairs of all counts available. Not that if (K1, K2)s that are only available in this
* set (and not in countsUpForCleanup), you might remove the counts of currently cached elements which will
* lead to these entries being most probably removed from the cache in the next call to
* {@link CountingCache#offer(Comparable, Comparable, Object)} etc.
* @return Those (K1,K2) pairs which should be cleaned up or <code>null</code> if nothing should be cleaned up and
* all counts should be kept.
*/
public Set<? extends Pair<K1, K2>> getCountsForCleanup(Set<? extends Pair<K1, K2>> countsUpForCleanup,
Set<? extends Pair<K1, K2>> allCounts);
}
}