/*******************************************************************************
* Copyright (c) 2009 Centrum Wiskunde en Informatica (CWI)
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Arnold Lankamp - interfaces and implementation
*******************************************************************************/
package org.rascalmpl.value.impl.util.sharing;
import java.lang.ref.WeakReference;
/**
* This is a 'weak' constant pool for uniquely represented objects.
* <br />
* <br />
* This implementation is thread-safe, but tries to avoid (contended) locking as much as is
* reasonably achievable. As a result this implementation scales fairly well on multi-core /
* processor systems.
* <br />
* <br />
* NOTE: It is highly recommended to leave this class alone; concurrency bugs can be very subtle,
* so please don't edit this class unless you know exactly what you're doing.
*
* @author Arnold Lankamp
*/
public final class ShareableValuesFactory<E extends IShareable>{
private final static int DEFAULT_LOG_NR_OF_SEGMENTS = 5;
private final int logNrOfSegments;
private final Segment<E>[] segments;
/**
* Default constructor.
*/
@SuppressWarnings("unchecked")
public ShareableValuesFactory(){
super();
logNrOfSegments = DEFAULT_LOG_NR_OF_SEGMENTS;
segments = (Segment<E>[]) new Segment[1 << logNrOfSegments];
for(int i = segments.length - 1; i >= 0; i--){
segments[i] = new Segment<>(logNrOfSegments);
}
}
/**
* Constructor.
*
* @param logNrOfSegments
* Sets the amount of stripping to: (2 ^ logNrOfSegments).
* @throws IllegalArgumentException
* Thrown when logNrOfSegments is to high (logNrOfSegments > 32 - 5).
*/
@SuppressWarnings("unchecked")
public ShareableValuesFactory(int logNrOfSegments){
super();
if((32 - logNrOfSegments) <= 5) throw new IllegalArgumentException("logNrOfSegments can not be larger then (32 - 5).");
this.logNrOfSegments = logNrOfSegments;
segments = (Segment<E>[]) new Segment[1 << logNrOfSegments];
for(int i = segments.length - 1; i >= 0; i--){
segments[i] = new Segment<>(logNrOfSegments);
}
}
/**
* Removes stale entries from the set (if any).
*/
public void cleanup(){
int nrOfSegments = segments.length;
for(int i = 0; i < nrOfSegments; i++){
Segment<E> segment = segments[i];
synchronized(segment){
segment.cleanup();
}
}
}
/**
* Returns statistics.
*
* @see java.lang.Object#toString()
*/
public String toString(){
StringBuilder sb = new StringBuilder();
int nrOfSegments = segments.length;
for(int i = 0; i < nrOfSegments; i++){
Segment<E> segment = segments[i];
int maxSegmentBitSize = segment.maxSegmentBitSize;
int startHash = i << maxSegmentBitSize;
int endHash = ((i + 1) << maxSegmentBitSize) - 1;
sb.append("Segment hash range: ");
sb.append(startHash);
sb.append(" till ");
sb.append(endHash);
sb.append(" | ");
sb.append(segment.toString());
sb.append("\n");
}
return sb.toString();
}
/**
* Returns a unique version of the given shareable.
*
* @param shareable
* The shareable we want the unique reference too.
* @return The reference to the unique version of the given shareable.
*/
public E build(E shareable){
int hash = shareable.hashCode();
int segmentNr = hash >>> (32 - DEFAULT_LOG_NR_OF_SEGMENTS);
return segments[segmentNr].get(shareable, hash);
}
/**
* Segments are hashsets, which are responsible for elements in a certain hash-range.
*
* @author Arnold Lankamp
*/
private final static class Segment<E extends IShareable>{
private final static int DEFAULT_LOG_SEGMENT_SIZE = 5;
private final static float DEFAULT_LOAD_FACTOR = 2f;
private final int maxSegmentBitSize;
private volatile Entry<E>[] entries;
private volatile int hashMask;
private int bitSize;
private int threshold;
private int load;
private volatile boolean flaggedForCleanup;
private volatile WeakReference<GarbageCollectionDetector<E>> garbageCollectionDetector;
private int cleanupScaler;
private int cleanupThreshold;
/**
* Constructor.
*
* @param logNrOfSegments
* Specifies the maximal size this segment can grow to.
*/
@SuppressWarnings("unchecked")
public Segment(int logNrOfSegments){
super();
maxSegmentBitSize = 32 - logNrOfSegments;
bitSize = DEFAULT_LOG_SEGMENT_SIZE;
int nrOfEntries = 1 << bitSize;
hashMask = nrOfEntries - 1;
entries = (Entry<E>[]) new Entry[nrOfEntries];
threshold = (int) (nrOfEntries * DEFAULT_LOAD_FACTOR);
load = 0;
flaggedForCleanup = false;
garbageCollectionDetector = new WeakReference<>(new GarbageCollectionDetector<>(this)); // Allocate a (unreachable) GC detector.
cleanupScaler = 50; // Initially we set the average cleanup percentage to 50%, to make sure the cleanup can and will be executed the first time.
cleanupThreshold = cleanupScaler;
}
/**
* Removes stale entries from this segment (if any).
*/
private void cleanup(){
Entry<E>[] table = entries;
int newLoad = load;
for(int i = hashMask; i >= 0; i--){
Entry<E> e = table[i];
if(e != null){
Entry<E> previous = null;
do{
Entry<E> next = e.next;
if(e.get() == null){
if(previous == null){
table[i] = next;
}else{
previous.next = next;
}
newLoad--;
}else{
previous = e;
}
e = next;
}while(e != null);
}
}
load = newLoad;
entries = table; // Volatile write.
}
/**
* Rehashes this set. All the entries will remain in the same order as they are in; this
* should improve look-up time in the general case and is more garbage collector friendly
* (you'll prevent old -> young references, contrary to the 'default' rehashing algorithm,
* which makes a complete mess of it).
*/
@SuppressWarnings("unchecked")
private void rehash(){
int nrOfEntries = 1 << (++bitSize);
int newHashMask = nrOfEntries - 1;
Entry<E>[] oldEntries = entries;
Entry<E>[] newEntries = (Entry<E>[]) new Entry[nrOfEntries];
// Construct temporary entries that function as roots for the entries that remain in
// the current bucket and those that are being shifted.
Entry<E> currentEntryRoot = new Entry<>(null, 0);
Entry<E> shiftedEntryRoot = new Entry<>(null, 0);
int newLoad = load;
int oldSize = oldEntries.length;
for(int i = oldSize - 1; i >= 0; i--){
Entry<E> e = oldEntries[i];
if(e != null){
Entry<E> lastCurrentEntry = currentEntryRoot;
Entry<E> lastShiftedEntry = shiftedEntryRoot;
do{
if(e.get() != null){ // Cleared entries should not be copied.
int position = e.hash & newHashMask;
if(position == i){
lastCurrentEntry.next = e;
lastCurrentEntry = e;
}else{
lastShiftedEntry.next = e;
lastShiftedEntry = e;
}
}else{
newLoad --;
}
e = e.next;
}while(e != null);
// Set the next pointers of the last entries in the buckets to null.
lastCurrentEntry.next = null;
lastShiftedEntry.next = null;
newEntries[i] = currentEntryRoot.next;
newEntries[i | oldSize] = shiftedEntryRoot.next; // The entries got shifted by the size of the old table.
}
}
load = newLoad;
threshold <<= 1;
entries = newEntries; // Volatile write.
hashMask = newHashMask; // Volatile write.
}
/**
* Ensures the load in this segment will not exceed a certain threshold.
*/
private void ensureCapacity(){
// Rehash if the load exceeds the threshold, unless the segment is already stretched to
// it's maximum (since that would be a useless thing to do).
if(load > threshold && bitSize < maxSegmentBitSize){
rehash();
}
}
/**
* Attempts to run a cleanup if the garbage collector ran before the invocation of this
* function. This ensures that, in most cases, the buckets will contain no (or very little)
* cleared entries. This speeds up lookups significantly.
* <br />
* <br />
* Note that we automatically throttle the frequency of the cleanups. In case we hardly
* every collect anything (either because there is no garbage or collections occur very
* frequently) cleanups will be slowed down to as little as once every four minor garbage
* collections. When a lot of entries are being cleared the cleanup will run after every
* collection.
* <br />
* This is done in an attempt to balance the overhead a cleanup induces with the
* performance gains it has on lookups; while keeping the probability of long-lived stale
* entries relatively low. Additionally, linking the cleanup 'trigger' to garbage
* collections ensures that we clean the segment exactly when it is both needed and
* possible.
* <br />
* <br />
* NOTE: This may seem a bit 'dodgy' to some, but this does work on (at least) every major
* JVM since 1.2, and will remain working properly while weak-references keep functioning
* as intended. Apart from that, this way of handling it is significantly faster then any
* of the alternatives (i.e. using reference queues is slow as *bleep* in comparison).
* Additionally, it releases us from synchronizing during every single call.
*/
private void tryCleanup(){
if(flaggedForCleanup){
flaggedForCleanup = false;
synchronized(this){
if(garbageCollectionDetector == null){ // This being 'null' indicates the same thing as 'flaggedForCleanup' being 'true' (but we do it like this to prevent potential optimizations the VM probably doesn't, but could apply); and yes DCL like stuff works on volatiles.
if(cleanupThreshold > 8){ // The 'magic' number 8 is chosen, so the cleanup will be done at least once after every four garbage collections.
int oldLoad = load;
cleanup();
int cleanupPercentate;
if(oldLoad == 0) cleanupPercentate = 50; // This prevents division by zero errors in case the table is empty (keep the cleanup percentage at 50% in this case).
else cleanupPercentate = 100 - ((load * 100) / oldLoad); // Calculate the percentage of entries that has been cleaned.
cleanupScaler = (((cleanupScaler * 25) + (cleanupPercentate * 7)) >> 5); // Modify the scaler, depending on the history (weight = 25) and how much we cleaned up this time (weight = 7).
if(cleanupScaler > 0){
cleanupThreshold = cleanupScaler;
}else{
cleanupThreshold = 1; // If the scaler value became 0 (when we hardly every collect something), set the threshold to 1, so we only skip the next three garbage collections.
}
}else{
cleanupThreshold <<= 1;
}
garbageCollectionDetector = new WeakReference<>(new GarbageCollectionDetector<>(this)); // Allocate a new (unreachable) GC detector.
}
}
}
}
/**
* Inserts the given shareable into this set.
*
* @param shareable
* The shareable to insert.
* @param hash
* The hash code that is associated with the given shareable.
*/
private void put(E shareable, int hash){
Entry<E> e = new Entry<>(shareable, hash);
Entry<E>[] table = entries;
int position = hash & hashMask;
e.next = table[position];
table[position] = e;
load++;
table = entries; // Volatile write.
}
/**
* Returns a reference to the unique version of the given shareable.
*
* @param shareable
* The shareable of which we want to obtain the reference to the unique version.
* @param hash
* The hash code that is associated with the given shareable.
* @return The reference to the unique version of the given shareable.
*/
public final E get(E shareable, int hash){
// Cleanup if necessary.
tryCleanup();
// Find the object (lock free).
int position = hash & hashMask;
Entry<E> e = entries[position]; // Volatile read.
if(e != null){
do{
if(hash == e.hash){
E object = e.get();
if(object != null){
if(shareable.equivalent(object)){
return object;
}
}
}
e = e.next;
}while(e != null);
}
synchronized(this){
// Try again while holding the global lock for this segment.
position = hash & hashMask;
e = entries[position];
if(e != null){
do{
if(hash == e.hash){
E object = e.get();
if(object != null){
if(shareable.equivalent(object)){
return object;
}
}
}
e = e.next;
}while(e != null);
}
// If we still can't find it, add it.
ensureCapacity();
E result = shareable;
put(result, hash);
return result;
}
}
/**
* Returns statistics.
*
* @see java.lang.Object#toString()
*/
public String toString(){
StringBuilder sb = new StringBuilder();
synchronized(this){
Entry<E>[] table = entries;
int tableSize = table.length;
sb.append("Table size: ");
sb.append(tableSize);
sb.append(", ");
sb.append("Number of entries: ");
sb.append(load);
sb.append(", ");
sb.append("Threshold: ");
sb.append(threshold);
sb.append(", ");
int nrOfFilledBuckets = 0;
int totalNrOfCollisions = 0;
int maxBucketLength = 0;
for(int i = 0; i < tableSize; i++){
Entry<E> e = table[i];
if(e != null){
nrOfFilledBuckets++;
int bucketLength = 1;
while((e = e.next) != null){
bucketLength++;
}
if(bucketLength > maxBucketLength) maxBucketLength = bucketLength;
totalNrOfCollisions += bucketLength - 1;
}
}
// Do some voodoo to round the results on a certain amount of decimals (3 and 1
// respectively); or at least attempt to do so ....
double averageBucketLength = 0;
double distribution = 100;
if(nrOfFilledBuckets != 0){
averageBucketLength = (((double) ((totalNrOfCollisions * 1000) / nrOfFilledBuckets)) / 1000) + 1;
distribution = 100 - (((double) (((totalNrOfCollisions * 1000) / nrOfFilledBuckets) / DEFAULT_LOAD_FACTOR)) / 10);
}
sb.append("Number of filled buckets: ");
sb.append(nrOfFilledBuckets);
sb.append(", ");
sb.append("Load factor: ");
sb.append(DEFAULT_LOAD_FACTOR);
sb.append(", ");
sb.append("Distribution (collisions vs filled buckets): "); // Total number of collisions vs number of filled buckets.
sb.append(distribution);
sb.append("%, ");
sb.append("Total number of collisions: ");
sb.append(totalNrOfCollisions);
sb.append(", ");
sb.append("Average (filled) bucket length: ");
sb.append(averageBucketLength);
sb.append(", ");
sb.append("Maximal bucket length: ");
sb.append(maxBucketLength);
sb.append(", ");
sb.append("Cleanup scaler: ");
sb.append(cleanupScaler);
sb.append("%");
}
return sb.toString();
}
/**
* An entry used for storing shareables in this segment.
*
* @author Arnold Lankamp
*/
private static class Entry<E extends IShareable> extends WeakReference<E>{
public final int hash;
public Entry<E> next;
/**
* Constructor.
*
* @param shareable
* The shareable.
* @param hash
* The hashcode that is associated with the given shareable.
*/
public Entry(E shareable, int hash){
super(shareable);
this.hash = hash;
}
}
/**
* An object that can be used to detect when a garbage collection has been executed.
* Instances of this object must be made weakly reachable for this to work.
*
* @author Arnold Lankamp
*/
private static class GarbageCollectionDetector<E extends IShareable>{
private final Segment<E> segment;
/**
* Constructor.
*
* @param segment
* The segment we need to flag after a garbage collection has occurred.
*/
public GarbageCollectionDetector(Segment<E> segment){
this.segment = segment;
}
/**
* Executed after the garbage collector detects that this object is eligible for
* reclamation. When this happens it will flag the associated segment for cleanup.
*
* @see java.lang.Object#finalize
*/
protected void finalize(){
segment.garbageCollectionDetector = null;
segment.flaggedForCleanup = true;
}
}
}
}