/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.executionenv;
import java.util.Deque;
import java.util.HashSet;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiFunction;
import javax.annotation.PostConstruct;
import org.diqube.cache.CountingCache;
import org.diqube.cache.CountingCache.CountCleanupStrategy;
import org.diqube.cache.CountingCache.MemoryConsumptionProvider;
import org.diqube.cache.FlaggingCache;
import org.diqube.config.Config;
import org.diqube.config.ConfigKey;
import org.diqube.context.AutoInstatiate;
import org.diqube.data.flatten.FlattenedTable;
import org.diqube.util.Holder;
import org.diqube.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
/**
* Manages various {@link FlattenedTable}s that are available in main memory (=instances) locally.
* <p>
* Flattened tables that are not needed any more/are not used often enough/exceed a memory cap will be evicted . The
* implementation is based on a {@link CountingCache}.
*
* <p>
* The counts are basically calls to {@link #getFlattenedTable(UUID, String, String)}.
*
* @author Bastian Gloeckle
*/
@AutoInstatiate
public class FlattenedTableInstanceManager {
private static final Logger logger = LoggerFactory.getLogger(FlattenedTableInstanceManager.class);
/**
* Number of seconds a flattened table gets flagged by
* {@link #getNewestFlattenedTableVersionAndFlagIt(String, String)} and therefore saves it from being evicted.
*/
public static final long FLATTENED_TABLE_FLAG_NANOSECONDS = 120 * 1_000_000_000L; /* 2 min */
private long flagNanoseconds = FLATTENED_TABLE_FLAG_NANOSECONDS;
/**
* Execute cache consolidation in approx. 10% of cases.
*/
private CacheConsolidateStrategy cacheConsolidateStrategy = () -> ThreadLocalRandom.current().nextInt(128) < 13;
@Config(ConfigKey.FLATTEN_MEMORY_CACHE_SIZE_MB)
private int flattenedTableCacheSizeMb;
/**
* {@link FlaggingCache} we use to store the {@link FlattenedTable}s.
*
* <p>
* Key1: Pair of tableName, flattenBy <br/>
* Key2: versionId.
*
* <p>
* This cache typically holds only the newest versions of the FlattenedTables, although old versions might still be
* available if they were flagged.
*/
private CountingCache<Pair<String, String>, UUID, FlattenedTableInfo> cache;
/**
* Usage counts for table/flatten-by pairs.
*
* This field will never be cleaned up, it therefore counts usages even when the corresponding table gets evicted from
* {@link #cache} in the meantime (the counts inside the cache get evicted, because the cache key is based on the
* version Id additionally).
*/
private ConcurrentMap<Pair<String, String>, AtomicLong> usageCounts = new ConcurrentHashMap<>();
/**
* ID of the newest version of a table/flatten-by pair. Sync access to this and cache by the UUID value of this map.
*/
private ConcurrentMap<Pair<String, String>, UUID> newestVersionIds = new ConcurrentHashMap<>();
/** Elements whose counts should be removed in {@link #cache} on the next opportunity. */
private Deque<Pair<Pair<String, String>, UUID>> countCleanupCacheEntries = new ConcurrentLinkedDeque<>();
@PostConstruct
public void initialize() {
// Use a CountCleanupStrategy that cleans up everything that was already evicted from the cache: If something was
// evicted from the cache, we definitely
// won't offer it again, since we will not use that same versionId again. Therefore we can free up the count memory
// of those.
// Additionally we remove the counts of every version that is in #countCleanupCacheEntries. These are old versions.
// If anybody still needs those versions, they must have flagged those elements in the cache, otherwise their
// entries will have count 0 and that will most probably lead to them being evicted from the cache on the next run.
CountCleanupStrategy<Pair<String, String>, UUID> cacheCountCleanupStrategy = (countsForCleanup, allCounts) -> {
Set<Pair<Pair<String, String>, UUID>> curCountCleanupCacheEntries = new HashSet<>();
while (!countCleanupCacheEntries.isEmpty()) {
try {
curCountCleanupCacheEntries.add(countCleanupCacheEntries.pop());
} catch (NoSuchElementException e) {
// swallow -> two thread concurrently traversed countCleanupCacheEntries and our thread did not get another
// element. Thats fine. (Although this will not happen currently, since CountingCache synchronizes).
}
}
Set<Pair<Pair<String, String>, UUID>> res =
Sets.union(countsForCleanup, Sets.intersection(allCounts, curCountCleanupCacheEntries));
logger.trace("Evicting old usage counts (limit): {}", Iterables.limit(res, 100));
return res;
};
MemoryConsumptionProvider<FlattenedTableInfo> cacheMemoryConsumptionProvider =
info -> info.getFlattenedTable().calculateApproximateSizeInBytes();
cache = new CountingCache<>(flattenedTableCacheSizeMb * 1024L * 1024L, cacheMemoryConsumptionProvider,
cacheCountCleanupStrategy);
}
/**
* Register a newly created {@link FlattenedTable} from a Flattener.
*
* <p>
* This version will automatically be the newest version available, so it is likely that
* {@link #getNewestFlattenedTableVersion(String, String)} will return this flattened table version if called right
* after registering the new version.
*
* <p>
* The new flattenedTable will be available through this table manager at least for
* {@link #FLATTENED_TABLE_FLAG_NANOSECONDS}.
*
* @param versionId
* The version ID of the flattened table.
* @param flattenedTable
* The flattened table itself.
* @param origTableName
* The table the flattened table was based on.
* @param flattenBy
* The field which the table was flattened by.
*/
public void registerFlattenedTableVersion(UUID versionId, FlattenedTable flattenedTable, String origTableName,
String flattenBy) {
Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);
// We manually manage the "counts" here: Get the count from usageCounts. Then manually force the cache to remove the
// count on the old version (which will lead to eviction of the old entity from the cache).
// This counting is not 100% thread safe, as we might loose a few "counts" if this method is called simultaneously
// for the same flattening with different flattenedTables - but that is not a big problem, since typically only one
// Flattener will be running for one table/flatten-by pair anyway.
FlattenedTableInfo newInfo = new FlattenedTableInfo(versionId, flattenedTable);
Holder<Long> oldCountHolder = new Holder<>(null);
Runnable update = () -> {
// flag new flattened table to make sure it definitely ends up being in the cache.
if (oldCountHolder.getValue() != null)
cache.offerAndFlag(keyPair, versionId, newInfo, System.nanoTime() + flagNanoseconds,
oldCountHolder.getValue() + 1L);
else
cache.offerAndFlag(keyPair, versionId, newInfo, System.nanoTime() + flagNanoseconds);
usageCounts.merge(keyPair, new AtomicLong(1L), (oldValue, newValue) -> {
AtomicLong res = new AtomicLong(oldValue.get());
res.addAndGet(newValue.get());
return res;
});
newestVersionIds.put(keyPair, versionId);
};
Pair<UUID, FlattenedTable> oldNewestVersionPair = getNewestFlattenedTableVersion(origTableName, flattenBy);
if (oldNewestVersionPair != null && !oldNewestVersionPair.getLeft().equals(versionId)) {
// sync on previous UUID, since that'd be used by anyone who wants to query the newsest UUID.
synchronized (oldNewestVersionPair.getLeft()) {
Long c = usageCounts.get(keyPair).get();
if (c != null)
oldCountHolder.setValue(c);
logger.info(
"Registering new flattened table {} from table '{}' flattened by '{}' of which an "
+ "old version was in the cache already ({}). Using cached usageCounts for new table: {}",
versionId, origTableName, flattenBy, oldNewestVersionPair.getLeft(), oldCountHolder.getValue());
update.run();
}
countCleanupCacheEntries.add(new Pair<>(keyPair, oldNewestVersionPair.getLeft()));
} else {
logger.info("Registering new flattened table {} from table '{}' flattened by '{}'", versionId, origTableName,
flattenBy);
update.run();
}
}
/**
* Fetches the newest version of a flattened table.
*
* @param origTableName
* Name of the original table.
* @param flattenBy
* Field by which the orig table was flattened.
* @return Pair of version ID and flattened table, or <code>null</code> in case there is no flattened version of that
* table.
*/
public Pair<UUID, FlattenedTable> getNewestFlattenedTableVersion(String origTableName, String flattenBy) {
logger.trace("Getting newest version of flattened table '{}' by '{}'", origTableName, flattenBy);
return getNewestFlattenedTableVersion(origTableName, flattenBy,
(keyPair, newestVersion) -> cache.get(keyPair, newestVersion));
}
/**
* Fetches the newest version of a flattened table and flag that version to not be evicted for
* {@link #FLATTENED_TABLE_FLAG_NANOSECONDS}.
*
* @param origTableName
* Name of the original table.
* @param flattenBy
* Field by which the orig table was flattened.
* @return Pair of version ID and flattened table, or <code>null</code> in case there is no flattened version of that
* table.
*/
public Pair<UUID, FlattenedTable> getNewestFlattenedTableVersionAndFlagIt(String origTableName, String flattenBy) {
logger.trace("Flagging and getting newest version of flattened table '{}' by '{}'", origTableName, flattenBy);
return getNewestFlattenedTableVersion(origTableName, flattenBy, //
(keyPair, newestVersion) -> //
cache.flagAndGet(keyPair, newestVersion, System.nanoTime() + flagNanoseconds));
}
private Pair<UUID, FlattenedTable> getNewestFlattenedTableVersion(String origTableName, String flattenBy,
BiFunction<Pair<String, String>, UUID, FlattenedTableInfo> flattenTableInfoResolver) {
Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);
UUID newestVersion = newestVersionIds.get(keyPair);
while (newestVersion != null) {
FlattenedTableInfo info = null;
synchronized (newestVersion) {
UUID newNewestVersion = newestVersionIds.get(keyPair);
if (!newestVersion.equals(newNewestVersion)) {
// newestVersion changed, we have synced on old object -> retry!
newestVersion = newNewestVersion;
continue;
}
info = flattenTableInfoResolver.apply(keyPair, newestVersion);
}
if (info != null)
return new Pair<>(newestVersion, info.getFlattenedTable());
else
return null;
}
return null;
}
/**
* Get a specific version of a flattened table and increase its usage count.
*
* @param versionId
* The version of the flattened table.
* @param origTableName
* The table name the flattening was based on.
* @param flattenBy
* The field by which the original table was flattened.
* @return The {@link FlattenedTable} or <code>null</code> if it is not available.
*/
public FlattenedTable getFlattenedTable(UUID versionId, String origTableName, String flattenBy) {
Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);
FlattenedTableInfo info = cache.get(keyPair, versionId);
if (info != null) {
logger.trace("Using version {} of flattened table '{}' by '{}'.", versionId, origTableName, flattenBy);
// increase usage count in cache and in our cache-evict-safe map.
// Note that this will also happen if a version of a flattened table is fetched which is about to be removed from
// the cache (= whose count was/will be removed). This is not that nice, since old versions get another "used"
// count although they should have been deleted right away, but there is no simple way around this unfortunately.
// The old versions might get another count, but that is not as bad either, as the count will always be very low
// and the entry will be evicted soon again (together with its count being removed).
cache.offer(keyPair, versionId, info);
usageCounts.get(keyPair).incrementAndGet();
// Consolidate cache. This will evict all not-any-more-flagged entries. We need to do this, as we might not call
// #offer on the cache that often (which would execute the same). But if we do not do this here, we might end up
// leaving unneeded objects in the cache for a longer time.
if (cacheConsolidateStrategy.consolidateCache())
cache.consolidate();
return info.getFlattenedTable();
}
// Consolidate cache.
if (cacheConsolidateStrategy.consolidateCache())
cache.consolidate();
return null;
}
/** for tests */
/* package */ void setFlagNanoseconds(long flagNanoseconds) {
this.flagNanoseconds = flagNanoseconds;
}
/** for tests */
/* package */ void setFlattenedTableCacheSizeMb(int flattenedTableCacheSizeMb) {
this.flattenedTableCacheSizeMb = flattenedTableCacheSizeMb;
initialize();
}
/** for tests */
/* package */ CountingCache<Pair<String, String>, UUID, FlattenedTableInfo> getCache() {
return cache;
}
/** for tests */
/* package */void setCacheConsolidateStrategy(CacheConsolidateStrategy cacheConsolidateStrategy) {
this.cacheConsolidateStrategy = cacheConsolidateStrategy;
}
private class FlattenedTableInfo {
private UUID versionId;
private FlattenedTable flattenedTable;
public FlattenedTableInfo(UUID versionId, FlattenedTable flattenedTable) {
this.versionId = versionId;
this.flattenedTable = flattenedTable;
}
public UUID getVersionId() {
return versionId;
}
public FlattenedTable getFlattenedTable() {
return flattenedTable;
}
}
/* package */ static interface CacheConsolidateStrategy {
public boolean consolidateCache();
}
}