// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.semantic.recognizer;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
/**
* LFU cache implementation based on http://dhruvbird.com/lfu.pdf, with some notable differences:
* <ul>
* <li>
* Frequency list is stored as an array with no next/prev pointers between nodes: looping over the array should be faster and more
* CPU-cache friendly than using an ad-hoc linked-pointers structure.
* </li>
* <li>
* The max frequency is capped at the cache size to avoid creating more and more frequency list entries, and all elements residing
* in the max frequency entry are re-positioned in the frequency entry linked set in order to put most recently accessed elements
* ahead of less recent ones, which will be collected sooner.
* </li>
* <li>
* The eviction factor determines how many elements (more specifically, the percentage of) will be evicted.
* </li>
* </ul>
* As a consequence, this cache runs in *amortized* O(1) time (considering the worst case of having the lowest frequency at 0 and
* having to evict all elements).
*
* @author Sergio Bossa
*/
public class LFUCache<K, V> implements Map<K, V> {
private final Map<K, CacheNode<K, V>> cache;
private final LinkedHashSet<CacheNode<K, V>>[] frequencyList;
private int lowestFrequency;
private int maxFrequency;
private final int maxCacheSize;
private final float evictionFactor;
public LFUCache(int initialCacheSize, int maxCacheSize, float evictionFactor) {
if (evictionFactor <= 0 || evictionFactor >= 1) {
throw new IllegalArgumentException("Eviction factor must be greater than 0 and lesser than or equal to 1");
}
this.cache = new HashMap<K, CacheNode<K, V>>(initialCacheSize);
this.frequencyList = new LinkedHashSet[maxCacheSize];
this.lowestFrequency = 0;
this.maxFrequency = maxCacheSize - 1;
this.maxCacheSize = maxCacheSize;
this.evictionFactor = evictionFactor;
initFrequencyList();
}
@Override
public V put(K k, V v) {
V oldValue = null;
CacheNode<K, V> currentNode = cache.get(k);
if (currentNode == null) {
if (cache.size() == maxCacheSize) {
doEviction();
}
LinkedHashSet<CacheNode<K, V>> nodes = frequencyList[0];
currentNode = new CacheNode<K, V>(k, v, 0);
nodes.add(currentNode);
cache.put(k, currentNode);
lowestFrequency = 0;
} else {
oldValue = currentNode.v;
currentNode.v = v;
}
return oldValue;
}
@Override
public void putAll(Map<? extends K, ? extends V> map) {
for (Map.Entry<? extends K, ? extends V> me : map.entrySet()) {
put(me.getKey(), me.getValue());
}
}
@Override
public V get(Object k) {
CacheNode<K, V> currentNode = cache.get(k);
if (currentNode != null) {
int currentFrequency = currentNode.frequency;
if (currentFrequency < maxFrequency) {
int nextFrequency = currentFrequency + 1;
LinkedHashSet<CacheNode<K, V>> currentNodes = frequencyList[currentFrequency];
LinkedHashSet<CacheNode<K, V>> newNodes = frequencyList[nextFrequency];
moveToNextFrequency(currentNode, nextFrequency, currentNodes, newNodes);
cache.put((K) k, currentNode);
if (lowestFrequency == currentFrequency && currentNodes.isEmpty()) {
lowestFrequency = nextFrequency;
}
} else {
// Hybrid with LRU: put most recently accessed ahead of others:
LinkedHashSet<CacheNode<K, V>> nodes = frequencyList[currentFrequency];
nodes.remove(currentNode);
nodes.add(currentNode);
}
return currentNode.v;
} else {
return null;
}
}
@Override
public V remove(Object k) {
CacheNode<K, V> currentNode = cache.remove(k);
if (currentNode != null) {
LinkedHashSet<CacheNode<K, V>> nodes = frequencyList[currentNode.frequency];
nodes.remove(currentNode);
if (lowestFrequency == currentNode.frequency) {
findNextLowestFrequency();
}
return currentNode.v;
} else {
return null;
}
}
public int frequencyOf(K k) {
CacheNode<K, V> node = cache.get(k);
if (node != null) {
return node.frequency + 1;
} else {
return 0;
}
}
@Override
public void clear() {
for (int i = 0; i <= maxFrequency; i++) {
frequencyList[i].clear();
}
cache.clear();
lowestFrequency = 0;
}
@Override
public Set<K> keySet() {
return this.cache.keySet();
}
@Override
public Collection<V> values() {
return null; // To change body of implemented methods use File | Settings | File Templates.
}
@Override
public Set<Entry<K, V>> entrySet() {
return null; // To change body of implemented methods use File | Settings | File Templates.
}
@Override
public int size() {
return cache.size();
}
@Override
public boolean isEmpty() {
return this.cache.isEmpty();
}
@Override
public boolean containsKey(Object o) {
return this.cache.containsKey(o);
}
@Override
public boolean containsValue(Object o) {
return false; // To change body of implemented methods use File | Settings | File Templates.
}
private void initFrequencyList() {
for (int i = 0; i <= maxFrequency; i++) {
frequencyList[i] = new LinkedHashSet<CacheNode<K, V>>();
}
}
private void doEviction() {
int currentlyDeleted = 0;
float target = maxCacheSize * evictionFactor;
while (currentlyDeleted < target) {
LinkedHashSet<CacheNode<K, V>> nodes = frequencyList[lowestFrequency];
if (nodes.isEmpty()) {
throw new IllegalStateException("Lowest frequency constraint violated!");
} else {
Iterator<CacheNode<K, V>> it = nodes.iterator();
while (it.hasNext() && currentlyDeleted++ < target) {
CacheNode<K, V> node = it.next();
it.remove();
cache.remove(node.k);
}
if (!it.hasNext()) {
findNextLowestFrequency();
}
}
}
}
private void moveToNextFrequency(CacheNode<K, V> currentNode, int nextFrequency, LinkedHashSet<CacheNode<K, V>> currentNodes,
LinkedHashSet<CacheNode<K, V>> newNodes) {
currentNodes.remove(currentNode);
newNodes.add(currentNode);
currentNode.frequency = nextFrequency;
}
private void findNextLowestFrequency() {
while (lowestFrequency <= maxFrequency && frequencyList[lowestFrequency].isEmpty()) {
lowestFrequency++;
}
if (lowestFrequency > maxFrequency) {
lowestFrequency = 0;
}
}
private static class CacheNode<Key, Value> {
private final Key k;
private Value v;
private int frequency;
public CacheNode(Key k, Value v, int frequency) {
this.k = k;
this.v = v;
this.frequency = frequency;
}
@Override
public String toString() {
return "[" + v + "->" + frequency + "]";
}
}
@Override
public String toString() {
return cache.toString();
}
}