package org.wikibrain.utils; import gnu.trove.impl.PrimeFinder; import gnu.trove.list.array.TIntArrayList; import gnu.trove.list.array.TLongArrayList; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.concurrent.atomic.AtomicLongArray; /** * An implement of a set containing int that uses open addressing * * The implementation is threadsafe and almost entirely lock-free. * Locks do occur while the the underlying array is expanded. * * If you're careful and spread an accurate guess of capacity, * peformance will be dramatically improved. * * Does not support removals at the moment. * * @author Shilad Sen */ public class AtomicIntSet { // Hack: pick a value that's unlikley to be used as the unused value. private final int unusedValue = Integer.MIN_VALUE + 1; private volatile AtomicIntegerArray set; private final AtomicInteger numElements = new AtomicInteger(); private double loadFactor = 0.5; public AtomicIntSet() { this(5); } /** * Create a new set with the specified capacity. * Ideally, the capacity will be at least the number of total elements * (1 / loadFactor) * to prevent future expansions. * * @param capacity */ public AtomicIntSet(int capacity) { set = makeEmptyArray(capacity); } /** * Returns true iff the set contains the specified value. * If the value is added before contains is called, it will always return true. * If the value is added while contains is called, it may or may not return true. * Will always return false if the set does not contain the value. * * @param value * @return */ public boolean contains(int value) { // An implementation of Knuth's open addressing algorithm adapted from Trove's TLongHash. // Returns whether the set contained the value at the *start* of the call AtomicIntegerArray tmp = set; // could change out from under us... int length = tmp.length(); int hash = hash(value); int probe = 1 + (hash % (length - 2)); int index = hash % length; int firstIndex = index; do { index -= probe; if (index < 0) { index += length; } int v = tmp.get(index); if (v == unusedValue) { return false; } else if (v == value) { return true; } } while (index != firstIndex); return false; } /** * Adds the specified value to the set. * @param value */ public void add(int value) { if (value == unusedValue) { throw new IllegalArgumentException("Value " + value + " is used internally as an unused slot marker!"); } numElements.incrementAndGet(); expandIfNecessary(); setInternal(set, value); } /** * Store a particular value in an array representing a set. * @param array * @param value */ private void setInternal(AtomicIntegerArray array, int value) { // An implementation of Knuth's open addressing algorithm adapted from Trove's TLongHash. int length = array.length(); int hash = hash(value); int probe = 1 + (hash % (length - 2)); int index = hash % length; int firstIndex = index; do { index -= probe; if (index < 0) { index += length; } long v = array.get(index); if (v == value) { break; // already set } else if (v == unusedValue && array.compareAndSet(index, unusedValue, value)) { break; } } while (index != firstIndex); } /** * Returns the number of elements stored in the set. * @return */ public int size() { return numElements.get(); } /** * Expand the underlying array if the load factor is exceeded. * If the load factor is NOT exceeded, no locking is required. * If it is exceeded, all threads block while one expands. */ private void expandIfNecessary() { // Check if we're safe (usually the case, so no locks typically used!) if (numElements.get() < loadFactor * set.length()) { return; } synchronized (numElements) { // Maybe somebody expanded while we were waiting for the lock if (numElements.get() < loadFactor * set.length()) { return; } // expand, rehash int newSize = (int) Math.ceil(set.length() / loadFactor); AtomicIntegerArray newSet = makeEmptyArray(newSize); for (int i = 0; i < set.length(); i++) { int v = set.get(i); if (v != unusedValue) { setInternal(newSet, v); } } set = newSet; } } /** * Returns the values in the set. * This is a relatively expensive O(n) operation. * It will return all the elements in the set at the start of the call, and * it may return any (or none of) the elements added while it is ongoing. * * @return the values in the set. */ public int[] toArray() { TIntArrayList vals = new TIntArrayList(); AtomicIntegerArray tmp = set; // could change out from under us... for (int i = 0; i < tmp.length(); i++) { int v = tmp.get(i); if (v != unusedValue) { vals.add(v); } } return vals.toArray(); } /** * From trove * @param value * @return */ public static int hash(int value) { return value & 0x7fffffff; } /** * Creates an empty array whose capacity is a prime bigger than the requested size. * The array is filled with the unusedValue. * * @param capacity * @return */ private AtomicIntegerArray makeEmptyArray(int capacity) { capacity = Math.max(capacity, 5); capacity = PrimeFinder.nextPrime(capacity); AtomicIntegerArray set = new AtomicIntegerArray(capacity); for (int i = 0; i < capacity; i++) { set.set(i, unusedValue); } return set; } /** * Removes all elements in the set. * Does not compact it. */ public void clear() { AtomicIntegerArray tmp = set; // could change out from under us... for (int i = 0; i < tmp.length(); i++) { tmp.set(i, unusedValue); } } }