/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.tree.concurrent;
import javax.annotation.concurrent.GuardedBy;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BooleanSupplier;
import java.nio.charset.StandardCharsets;
import com.addthis.basis.concurrentlinkedhashmap.MediatedEvictionConcurrentHashMap;
import com.addthis.basis.util.ClosableIterator;
import com.addthis.basis.util.LessBytes;
import com.addthis.basis.util.LessFiles;
import com.addthis.basis.util.Meter;
import com.addthis.basis.util.Parameter;
import com.addthis.hydra.common.Configuration;
import com.addthis.hydra.data.tree.CacheKey;
import com.addthis.hydra.data.tree.DataTree;
import com.addthis.hydra.data.tree.DataTreeNode;
import com.addthis.hydra.data.tree.DataTreeNodeActor;
import com.addthis.hydra.data.tree.DataTreeNodeInitializer;
import com.addthis.hydra.data.tree.DataTreeNodeUpdater;
import com.addthis.hydra.data.tree.TreeCommonParameters;
import com.addthis.hydra.data.tree.TreeDataParent;
import com.addthis.hydra.data.tree.TreeNodeData;
import com.addthis.hydra.store.common.PageFactory;
import com.addthis.hydra.store.db.CloseOperation;
import com.addthis.hydra.store.db.DBKey;
import com.addthis.hydra.store.db.IPageDB;
import com.addthis.hydra.store.db.PageDB;
import com.addthis.hydra.store.kv.PagedKeyValueStore;
import com.addthis.hydra.store.skiplist.ConcurrentPage;
import com.addthis.hydra.store.skiplist.SkipListCache;
import com.addthis.hydra.store.util.MeterFileLogger;
import com.addthis.hydra.store.util.MeterFileLogger.MeterDataSource;
import com.addthis.hydra.store.util.NamedThreadFactory;
import com.addthis.hydra.store.util.Raw;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.io.Files;
import com.google.common.util.concurrent.AtomicDouble;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Gauge;
import org.apache.commons.lang3.mutable.MutableLong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @user-reference
*/
public final class ConcurrentTree implements DataTree, MeterDataSource {
static final Logger log = LoggerFactory.getLogger(ConcurrentTree.class);
// number of background deletion threads
@Configuration.Parameter
static final int defaultNumDeletionThreads = Parameter.intValue("hydra.tree.clean.threads", 1);
// sleep interval of deletion threads in between polls of deletion queue
@Configuration.Parameter
static final int deletionThreadSleepMillis = Parameter.intValue("hydra.tree.clean.interval", 10);
// number of nodes in between trash removal logging messages
@Configuration.Parameter
static final int deletionLogInterval = Parameter.intValue("hydra.tree.clean.logging", 100000);
private static final AtomicInteger scopeGenerator = new AtomicInteger();
private final String scope = "ConcurrentTree" + Integer.toString(scopeGenerator.getAndIncrement());
public static enum METERTREE {
CACHE_HIT, CACHE_MISS, NODE_PUT, NODE_CREATE, NODE_DELETE, SOURCE_MISS
}
static final String keyCacheGet = METERTREE.CACHE_HIT.toString();
static final String keyCacheMiss = METERTREE.CACHE_MISS.toString();
private final File root;
private final File idFile;
final IPageDB<DBKey, ConcurrentTreeNode> source;
private final ConcurrentTreeNode treeRootNode;
final ConcurrentTreeNode treeTrashNode;
private final AtomicLong nextDBID;
final AtomicBoolean closed = new AtomicBoolean(false);
private final Meter<METERTREE> meter;
private final MeterFileLogger logger;
private final AtomicDouble cacheHitRate = new AtomicDouble(0.0);
private final MediatedEvictionConcurrentHashMap<CacheKey, ConcurrentTreeNode> cache;
private final ScheduledExecutorService deletionThreadPool;
@GuardedBy("treeTrashNode")
private IPageDB.Range<DBKey, ConcurrentTreeNode> trashIterator;
@SuppressWarnings("unused")
final Gauge<Integer> treeTrashNodeCount = Metrics.newGauge(SkipListCache.class,
"treeTrashNodeCount", scope,
new Gauge<Integer>() {
@Override
public Integer value() {
return treeTrashNode == null ? -1 : treeTrashNode.getNodeCount();
}
});
@SuppressWarnings("unused")
final Gauge<Long> treeTrashHitsCount = Metrics.newGauge(SkipListCache.class,
"treeTrashHitsCount", scope,
new Gauge<Long>() {
@Override
public Long value() {
return treeTrashNode == null ? -1 : treeTrashNode.getCounter();
}
});
ConcurrentTree(File root, int numDeletionThreads, int cleanQSize, int maxCacheSize,
int maxPageSize, PageFactory factory) throws Exception {
LessFiles.initDirectory(root);
this.root = root;
long start = System.currentTimeMillis();
// setup metering
meter = new Meter<>(METERTREE.values());
for (METERTREE m : METERTREE.values()) {
meter.addCountMetric(m, m.toString());
}
// create meter logging thread
if (TreeCommonParameters.meterLogging > 0) {
logger = new MeterFileLogger(this, root, "tree-metrics",
TreeCommonParameters.meterLogging, TreeCommonParameters.meterLogLines);
} else {
logger = null;
}
source = new PageDB.Builder<>(root, ConcurrentTreeNode.class, maxPageSize, maxCacheSize)
.pageFactory(factory)
.build();
source.setCacheMem(TreeCommonParameters.maxCacheMem);
source.setPageMem(TreeCommonParameters.maxPageMem);
source.setMemSampleInterval(TreeCommonParameters.memSample);
// create cache
cache = new MediatedEvictionConcurrentHashMap.Builder<CacheKey, ConcurrentTreeNode>()
.mediator(new CacheMediator(source))
.maximumWeightedCapacity(cleanQSize)
.build();
// get stored next db id
idFile = new File(root, "nextID");
if (idFile.exists() && idFile.isFile() && idFile.length() > 0) {
nextDBID = new AtomicLong(Long.parseLong(Files.toString(idFile, StandardCharsets.UTF_8)));
} else {
nextDBID = new AtomicLong(1);
}
// get tree root
ConcurrentTreeNode dummyRoot = ConcurrentTreeNode.getTreeRoot(this);
treeRootNode = dummyRoot.getOrCreateEditableNode("root");
treeTrashNode = dummyRoot.getOrCreateEditableNode("trash");
treeTrashNode.requireNodeDB();
deletionThreadPool = Executors.newScheduledThreadPool(numDeletionThreads,
new NamedThreadFactory(scope + "-deletion-", true));
for (int i = 0; i < numDeletionThreads; i++) {
deletionThreadPool.scheduleAtFixedRate(
new ConcurrentTreeDeletionTask(this, closed::get, LoggerFactory.getLogger(
ConcurrentTreeDeletionTask.class.getName() + ".Background")),
i, deletionThreadSleepMillis, TimeUnit.MILLISECONDS);
}
long openTime = System.currentTimeMillis() - start;
log.info("dir={} root={} trash={} cache={} nextdb={} openms={}",
root, treeRootNode, treeTrashNode, TreeCommonParameters.cleanQMax, nextDBID, openTime);
}
public ConcurrentTree(File root) throws Exception {
this(root, defaultNumDeletionThreads, TreeCommonParameters.cleanQMax,
TreeCommonParameters.maxCacheSize, TreeCommonParameters.maxPageSize,
ConcurrentPage.ConcurrentPageFactory.singleton);
}
public void meter(METERTREE meterval) {
meter.inc(meterval);
}
/**
* This method is only for testing purposes.
* It has a built in safeguard but nonetheless
* it should not be invoked for other purposes.
*/
@VisibleForTesting
boolean setNextNodeDB(long id) {
while (true) {
long current = nextDBID.get();
if (current > id) {
return false;
} else if (nextDBID.compareAndSet(current, id)) {
return true;
}
}
}
long getNextNodeDB() {
long nextValue = nextDBID.incrementAndGet();
return nextValue;
}
private static boolean setLease(final ConcurrentTreeNode node, final boolean lease) {
return (!lease || node.tryLease());
}
public ConcurrentTreeNode getNode(final ConcurrentTreeNode parent, final String child, final boolean lease) {
long nodedb = parent.nodeDB();
if (nodedb <= 0) {
log.trace("[node.get] {} --> {} NOMAP --> null", parent, child);
return null;
}
CacheKey key = new CacheKey(nodedb, child);
/**
* (1) First check the cache for the (key, value) pair. If the value
* is found and the value is successfully leased then return it.
* (2) Otherwise the value is not found in the cache. Check the backing store
* for the value. If the value is not found in the backing store then (3) return
* null. Otherwise (4) if the value is found in the backing store and
* successfully inserted into the cache and the value is leased then
* return the value. If all of these steps are unsuccessful then repeat.
*/
while (true) {
ConcurrentTreeNode node = cache.get(key);
if (node != null) {
if (node.isDeleted()) {
cache.remove(key, node);
} else if (setLease(node, lease)) {
reportCacheHit();
return node; // (1)
}
} else {// (2)
DBKey dbkey = key.dbkey();
reportCacheMiss();
node = source.get(dbkey);
if (node == null) {
meter.inc(METERTREE.SOURCE_MISS);
return null; // (3)
}
if (node.isDeleted()) {
source.remove(dbkey);
} else {
node.initIfDecoded(this, dbkey, key.name);
ConcurrentTreeNode prev = cache.putIfAbsent(key, node);
if (prev == null) {
node.reactivate();
if (setLease(node, lease)) {
return node; // (4)
}
}
}
}
}
}
public ConcurrentTreeNode getOrCreateNode(final ConcurrentTreeNode parent, final String child,
final DataTreeNodeInitializer creator) {
parent.requireNodeDB();
CacheKey key = new CacheKey(parent.nodeDB(), child);
ConcurrentTreeNode newNode = null;
while (true) {
ConcurrentTreeNode node = cache.get(key);
if (node != null) {
if (node.isDeleted()) {
cache.remove(key, node);
} else if (setLease(node, true)) {
reportCacheHit();
return node;
}
} else {
DBKey dbkey = key.dbkey();
reportCacheMiss();
node = source.get(dbkey);
if (node != null) {
if (node.isDeleted()) {
source.remove(dbkey);
} else {
node.initIfDecoded(this, dbkey, key.name);
ConcurrentTreeNode prev = cache.putIfAbsent(key, node);
if (prev == null) {
node.reactivate();
if (setLease(node, true)) {
return node;
}
}
}
} else { // create a new node
if (newNode == null) {
newNode = new ConcurrentTreeNode();
newNode.init(this, dbkey, key.name);
newNode.tryLease();
newNode.markChanged();
if (creator != null) {
creator.onNewNode(newNode);
}
}
node = newNode;
if (cache.putIfAbsent(key, node) == null) {
/**
* We must insert the new node into the external storage
* because our iterators traverse this data
* structure to search for nodes.
*/
source.put(dbkey, node);
parent.updateNodeCount(1);
return node;
}
}
}
}
}
boolean deleteNode(final ConcurrentTreeNode parent, final String child) {
log.trace("[node.delete] {} --> {}", parent, child);
long nodedb = parent.nodeDB();
if (nodedb <= 0) {
log.debug("parent has no children on delete : {} --> {}", parent, child);
return false;
}
CacheKey key = new CacheKey(nodedb, child);
// lease node to prevent eviction from cache and thereby disrupting our {@code source.remove()}
ConcurrentTreeNode node = getNode(parent, child, true);
if (node != null) {
// first ensure no one can rehydrate into a different instance
source.remove(key.dbkey());
// "markDeleted" causes other threads to remove the node at will, so it is semantically the same
// as removing it from the cache ourselves. Since this is the last and only instance, we can safely
// coordinate concurrent deletion attempts with the lease count (-2 is used as a special flag) even
// though most other code stops bothering with things like "thread safety" around this stage.
if (node.markDeleted()) {
// node could have already been dropped from the cache, and then re-created (sharing the same cache
// key equality). That is a fresh node that needs its own deletion, so only try to remove our instance.
cache.remove(key, node);
parent.updateNodeCount(-1);
if (node.hasNodes() && !node.isAlias()) {
markForChildDeletion(node);
}
return true;
}
}
return false;
}
private void markForChildDeletion(final ConcurrentTreeNode node) {
/*
* only put nodes in the trash if they have children because they've
* otherwise already been purged from backing store by release() in the
* TreeCache.
*/
assert node.hasNodes();
assert !node.isAlias();
long nodeDB = treeTrashNode.nodeDB();
int next = treeTrashNode.incrementNodeCount();
DBKey key = new DBKey(nodeDB, Raw.get(LessBytes.toBytes(next)));
source.put(key, node);
log.trace("[trash.mark] {} --> {}", next, treeTrashNode);
}
@SuppressWarnings("unchecked") IPageDB.Range<DBKey, ConcurrentTreeNode> fetchNodeRange(long db) {
return source.range(new DBKey(db), new DBKey(db + 1));
}
@SuppressWarnings({"unchecked", "unused"}) private IPageDB.Range<DBKey, ConcurrentTreeNode> fetchNodeRange(long db, String from) {
return source.range(new DBKey(db, Raw.get(from)), new DBKey(db + 1));
}
@SuppressWarnings("unchecked") IPageDB.Range<DBKey, ConcurrentTreeNode> fetchNodeRange(long db, String from, String to) {
return source.range(new DBKey(db, Raw.get(from)),
to == null ? new DBKey(db+1, (Raw)null) : new DBKey(db, Raw.get(to)));
}
@Override public ConcurrentTreeNode getRootNode() {
return treeRootNode;
}
/**
* Package-level visibility is for testing purposes only.
*/
@VisibleForTesting
void waitOnDeletions() {
shutdownDeletionThreadPool();
synchronized (treeTrashNode) {
if (trashIterator != null) {
trashIterator.close();
trashIterator = null;
}
}
}
/**
* Package-level visibility is for testing purposes only.
*/
@VisibleForTesting
ConcurrentMap<CacheKey, ConcurrentTreeNode> getCache() {
return cache;
}
private void shutdownDeletionThreadPool() {
if (deletionThreadPool == null)
return;
deletionThreadPool.shutdown();
try {
if (!deletionThreadPool.awaitTermination(10, TimeUnit.SECONDS)) {
log.warn("Waiting on outstanding node deletions to complete.");
deletionThreadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
}
} catch (InterruptedException ignored) {
}
}
/**
* Delete from the backing storage all nodes that have been moved to be
* children of the trash node where they are waiting deletion. Also delete
* all subtrees of these nodes. After deleting each subtree then test
* the provided {@param terminationCondition}. If it returns true then
* stop deletion.
*
* @param terminationCondition invoked between subtree deletions to
* determine whether to return from method.
*/
@Override
public void foregroundNodeDeletion(BooleanSupplier terminationCondition) {
ConcurrentTreeDeletionTask deletionTask = new ConcurrentTreeDeletionTask(this, terminationCondition, log);
deletionTask.run();
}
@Override
public void sync() throws IOException {
log.debug("[sync] start");
for (ConcurrentTreeNode node : cache.values()) {
if (!node.isDeleted() && node.isChanged()) {
source.put(node.getDbkey(), node);
}
}
log.debug("[sync] end nextdb={}", nextDBID);
Files.write(nextDBID.toString(), idFile, StandardCharsets.UTF_8);
}
@Override
public long getDBCount() {
return nextDBID.get();
}
@Override
public int getCacheSize() {
return cache.size();
}
@Override
public double getCacheHitRate() {
if (logger == null) {
getIntervalData();
}
return cacheHitRate.get();
}
/**
* Close the tree.
*
* @param cleanLog if true then wait for the BerkeleyDB clean thread to finish.
* @param operation optionally test or repair the berkeleyDB.
*/
@Override
public void close(boolean cleanLog, CloseOperation operation) throws IOException {
if (!closed.compareAndSet(false, true)) {
log.trace("already closed");
return;
}
log.debug("closing {}", this);
waitOnDeletions();
if (treeRootNode != null) {
treeRootNode.markChanged();
treeRootNode.release();
if (treeRootNode.getLeaseCount() != 0) {
throw new IllegalStateException("invalid root state on shutdown : " + treeRootNode);
}
}
if (treeTrashNode != null) {
treeTrashNode.markChanged();
treeTrashNode.release();
}
sync();
if (source != null) {
int status = source.close(cleanLog, operation);
if (status != 0) {
throw new RuntimeException("page db close returned a non-zero exit code : " + status);
}
}
if (logger != null) {
logger.terminate();
}
}
@Override
public void close() throws IOException {
close(false, CloseOperation.NONE);
}
@Override
public Map<String, Long> getIntervalData() {
Map<String, Long> mark = meter.mark();
Long gets = mark.get(keyCacheGet);
Long miss = mark.get(keyCacheMiss);
if ((gets == null) || (gets == 0)) {
cacheHitRate.set(0.0);
} else if ((miss == null) || (miss == 0)) {
cacheHitRate.set(1.0);
} else {
cacheHitRate.set(1.0d - ((miss * 1.0d) / ((gets + miss) * 1.0d)));
}
return mark;
}
private void reportCacheHit() {
meter.inc(METERTREE.CACHE_HIT);
}
private void reportCacheMiss() {
meter.inc(METERTREE.CACHE_MISS);
}
@Override
public String toString() {
return "Tree@" + root;
}
@Override
public DataTreeNode getLeasedNode(String name) {
return getRootNode().getLeasedNode(name);
}
@Override
public DataTreeNode getOrCreateNode(String name, DataTreeNodeInitializer init) {
return getRootNode().getOrCreateNode(name, init);
}
@Override
public boolean deleteNode(String node) {
return getRootNode().deleteNode(node);
}
@Override
public ClosableIterator<DataTreeNode> getIterator() {
ConcurrentTreeNode rootNode = getRootNode();
if (rootNode != null) {
return getRootNode().getIterator();
}
return null;
}
@Override
public ClosableIterator<DataTreeNode> getIterator(String begin) {
return getRootNode().getIterator(begin);
}
@Override
public ClosableIterator<DataTreeNode> getIterator(String from, String to) {
return getRootNode().getIterator(from, to);
}
@Override
public Iterator<DataTreeNode> iterator() {
return getRootNode().iterator();
}
@Override
public String getName() {
return getRootNode().getName();
}
@Override
public int getNodeCount() {
return getRootNode().getNodeCount();
}
@Override
public long getCounter() {
return getRootNode().getCounter();
}
@Override
public void incrementCounter() {
getRootNode().incrementCounter();
}
@Override
public long incrementCounter(long val) {
return getRootNode().incrementCounter(val);
}
@Override
public void writeLock() {
getRootNode().writeLock();
}
@Override
public void writeUnlock() {
getRootNode().writeUnlock();
}
@Override
public void setCounter(long val) {
getRootNode().setCounter(val);
}
@Override
public void updateChildData(DataTreeNodeUpdater state, TreeDataParent path) {
getRootNode().updateChildData(state, path);
}
@Override
public void updateParentData(DataTreeNodeUpdater state, DataTreeNode child, boolean isnew) {
getRootNode().updateParentData(state, child, isnew);
}
@Override
public boolean aliasTo(DataTreeNode target) {
throw new RuntimeException("root node cannot be an alias");
}
@Override
public void release() {
getRootNode().release();
}
@Override
public DataTreeNodeActor getData(String key) {
return getRootNode().getData(key);
}
@Override
public Map<String, TreeNodeData> getDataMap() {
return getRootNode().getDataMap();
}
private static String keyName(DBKey dbkey) {
try {
return new String(dbkey.key(), "UTF-8");
} catch (UnsupportedEncodingException ex) {
log.warn("Could not decode the following dbkey bytes into a string: " +
Arrays.toString(dbkey.key()));
return null;
}
}
/**
* Recursively delete all the children of the input node.
* Use a non-negative value for the counter parameter to
* tally the nodes that have been deleted. Use a negative
* value to disable logging of the number of deleted nodes.
*
* @param rootNode root of the subtree to delete
*/
void deleteSubTree(ConcurrentTreeNode rootNode,
MutableLong totalCount,
MutableLong nodeCount,
BooleanSupplier terminationCondition,
Logger deletionLogger) {
long nodeDB = rootNode.nodeDB();
IPageDB.Range<DBKey, ConcurrentTreeNode> range = fetchNodeRange(nodeDB);
DBKey endRange;
boolean reschedule;
try {
while (range.hasNext() && !terminationCondition.getAsBoolean()) {
totalCount.increment();
if ((totalCount.longValue() % deletionLogInterval) == 0) {
deletionLogger.info("Deleted {} total nodes in {} trash nodes from the trash.",
totalCount.longValue(), nodeCount.longValue());
}
Map.Entry<DBKey, ConcurrentTreeNode> entry = range.next();
ConcurrentTreeNode next = entry.getValue();
if (next.hasNodes() && !next.isAlias()) {
deleteSubTree(next, totalCount, nodeCount, terminationCondition, deletionLogger);
}
String name = entry.getKey().rawKey().toString();
CacheKey key = new CacheKey(nodeDB, name);
ConcurrentTreeNode cacheNode = cache.remove(key);
/* Mark the node as deleted so that it will not be
* pushed to disk when removed from the eviction queue.
*/
if (cacheNode != null) {
cacheNode.markDeleted();
}
}
if (range.hasNext()) {
endRange = range.next().getKey();
reschedule = true;
} else {
endRange = new DBKey(nodeDB + 1);
reschedule = false;
}
} finally {
range.close();
}
source.remove(new DBKey(nodeDB), endRange);
if (reschedule) {
markForChildDeletion(rootNode);
}
}
Map.Entry<DBKey, ConcurrentTreeNode> nextTrashNode() {
synchronized (treeTrashNode) {
if (trashIterator == null) {
return recreateTrashIterator();
} else if (trashIterator.hasNext()) {
return trashIterator.next();
} else {
return recreateTrashIterator();
}
}
}
@GuardedBy("treeTrashNode")
private Map.Entry<DBKey, ConcurrentTreeNode> recreateTrashIterator() {
if (trashIterator != null) {
trashIterator.close();
}
trashIterator = fetchNodeRange(treeTrashNode.nodeDB());
if (trashIterator.hasNext()) {
return trashIterator.next();
} else {
trashIterator.close();
trashIterator = null;
return null;
}
}
/**
* For testing purposes only.
*/
@VisibleForTesting
ConcurrentTreeNode getTreeTrashNode() {
return treeTrashNode;
}
public void repairIntegrity() {
PagedKeyValueStore store = source.getEps();
if (store instanceof SkipListCache) {
((SkipListCache) store).testIntegrity(true);
}
}
}