/**
* Copyright (C) 2007 Aelitis, All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* AELITIS, SAS au capital de 63.529,40 euros
* 8 Allee Lenotre, La Grille Royale, 78600 Le Mesnil le Roi, France.
*
*/
package org.gudy.azureus2.core3.util;
import java.util.*;
/**
* A lighter (on memory) hash set<br>
*
* Advantages over HashSet:
* <ul>
* <li>Lower memory footprint
* <li>Everything is stored in a single array, this might improve cache performance (not verified)
* <li>Read-only operations on iterators should be concurrency-safe but they might return null values unexpectedly under concurrent modification (not verified)
* </ul>
*
* Disadvantages:
* <ul>
* <li>removal is implemented with thombstone-keys, this can significantly increase the lookup time if many values are removed. Use compactify() for scrubbing
* <li>entry set iterators and thus transfers to other maps are slower than compareable implementations
* <li>the map does not store hashcodes and relies on either the key-objects themselves caching them (such as strings) or a fast computation of hashcodes
* </ul>
*
* @author Aaron Grunthal
* @create 28.11.2007
*/
public class LightHashSet extends AbstractSet implements Cloneable {
private static final Object THOMBSTONE = new Object();
private static final Object NULLKEY = new Object();
private static final float DEFAULT_LOAD_FACTOR = 0.75f;
private static final int DEFAULT_CAPACITY = 8;
public LightHashSet()
{
this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR);
}
public LightHashSet(final int initialCapacity)
{
this(initialCapacity, DEFAULT_LOAD_FACTOR);
}
public LightHashSet(final Collection c)
{
this(0);
if(c instanceof LightHashSet)
{
final LightHashSet lightMap = (LightHashSet)c;
this.size = lightMap.size;
this.data = (Object[])lightMap.data.clone();
} else
addAll(c);
}
public Object clone() {
try
{
final LightHashMap newMap = (LightHashMap) super.clone();
newMap.data = (Object[])data.clone();
return newMap;
} catch (CloneNotSupportedException e)
{
// should not ever happen
e.printStackTrace();
throw new RuntimeException(e);
}
}
public LightHashSet(int initialCapacity, final float loadFactor)
{
if (loadFactor > 1)
throw new IllegalArgumentException("Load factor must not be > 1");
this.loadFactor = loadFactor;
int capacity = 1;
while (capacity < initialCapacity)
capacity <<= 1;
data = new Object[capacity];
}
final float loadFactor;
int size;
Object[] data;
public Iterator iterator() {
return new HashIterator();
}
private class HashIterator implements Iterator {
private int nextIdx = -1;
private int currentIdx = -1;
private Object[] itData = data;
public HashIterator()
{
findNext();
}
private void findNext() {
do
nextIdx++;
while (nextIdx < itData.length && (itData[nextIdx] == null || itData[nextIdx] == THOMBSTONE));
}
public void remove() {
if (currentIdx == -1)
throw new IllegalStateException("No entry to delete, use next() first");
if (itData != data)
throw new ConcurrentModificationException("removal opperation not supported as concurrent structural modification occured");
LightHashSet.this.removeForIndex(currentIdx);
currentIdx = -1;
}
public boolean hasNext() {
return nextIdx < itData.length;
}
public Object next() {
if (!hasNext())
throw new IllegalStateException("No more entries");
currentIdx = nextIdx;
findNext();
final Object key = itData[currentIdx];
return key != NULLKEY ? key : null;
}
}
public boolean add(final Object key) {
checkCapacity(1);
return addInternal(key, false);
}
public int size() {
return size;
}
public boolean addAll(final Collection c) {
checkCapacity(c.size());
boolean changed = false;
for (final Iterator it = c.iterator(); it.hasNext();)
{
changed |= addInternal(it.next(), true);
}
// compactify in case we overestimated the new size due to redundant entries
//compactify(0.f);
return changed;
}
public int capacity()
{
return data.length;
}
/**
* Fetches an element which does equal() the provided object but is not necessarily the same object
* @param Object to retrieve
* @return an object fulfilling the equals contract or null if no such object was found in this set
*/
public Object get(Object key) {
if(key == null)
key = NULLKEY;
final int idx = nonModifyingFindIndex(key);
if(keysEqual(data[idx], key))
return data[idx];
return null;
}
private boolean addInternal(Object key, final boolean bulkAdd) {
if(key == null)
key = NULLKEY;
final int idx = bulkAdd ? nonModifyingFindIndex(key) : findIndex(key);
if (data[idx] == null || data[idx] == THOMBSTONE)
{
data[idx] = key;
size++;
return true;
}
return false;
}
public boolean remove(Object key) {
if(size == 0)
return false;
if(key == null)
key = NULLKEY;
final int idx = findIndex(key);
if (keysEqual(key, data[idx]))
{
removeForIndex(idx);
return true;
}
return false;
}
private void removeForIndex(final int idx)
{
data[idx] = THOMBSTONE;
size--;
}
public void clear() {
size = 0;
int capacity = 1;
while (capacity < DEFAULT_CAPACITY)
capacity <<= 1;
data = new Object[capacity];
}
public boolean contains(Object key) {
if(size == 0)
return false;
if(key == null)
key = NULLKEY;
return keysEqual(key, data[nonModifyingFindIndex(key)]);
}
private final boolean keysEqual(final Object o1, final Object o2) {
return o1 == o2 || (o1 != null && o2 != null && o1.hashCode() == o2.hashCode() && o1.equals(o2));
}
private int findIndex(final Object keyToFind) {
final int hash = keyToFind.hashCode();
/* hash ^= (hash >>> 20) ^ (hash >>> 12);
* hash ^= (hash >>> 7) ^ (hash >>> 4);
*/
int probe = 1;
int newIndex = hash & (data.length - 1);
int thombStoneIndex = -1;
int thombStoneCount = 0;
final int thombStoneThreshold = Math.min(data.length-size, 100);
// search until we find a free entry or an entry matching the key to insert
while (data[newIndex] != null && !keysEqual(data[newIndex], keyToFind))
{
if (data[newIndex] == THOMBSTONE)
{
if(thombStoneIndex == -1)
thombStoneIndex = newIndex;
thombStoneCount++;
if(thombStoneCount * 2 > thombStoneThreshold)
{
compactify(0.f);
thombStoneIndex = -1;
probe = 0;
thombStoneCount = 0; // not really necessary
}
}
newIndex = (hash + ((probe + probe * probe) >> 1)) & (data.length - 1);
probe++;
}
// if we didn't find an exact match then the first thombstone will do too for insert
if (thombStoneIndex != -1 && !keysEqual(data[newIndex], keyToFind))
return thombStoneIndex;
return newIndex;
}
private int nonModifyingFindIndex(final Object keyToFind) {
final int hash = keyToFind.hashCode();
/* hash ^= (hash >>> 20) ^ (hash >>> 12);
* hash ^= (hash >>> 7) ^ (hash >>> 4);
*/
int probe = 1;
int newIndex = hash & (data.length - 1);
int thombStoneIndex = -1;
// search until we find a free entry or an entry matching the key to insert
while (data[newIndex] != null && !keysEqual(data[newIndex], keyToFind) && probe < data.length)
{
if(data[newIndex] == THOMBSTONE && thombStoneIndex == -1)
thombStoneIndex = newIndex;
newIndex = (hash + ((probe + probe * probe) >> 1)) & (data.length - 1);
probe++;
}
if (thombStoneIndex != -1 && !keysEqual(data[newIndex], keyToFind))
return thombStoneIndex;
return newIndex;
}
private void checkCapacity(final int n) {
final int currentCapacity = data.length;
if ((size + n) < currentCapacity * loadFactor)
return;
int newCapacity = currentCapacity;
do
newCapacity <<= 1;
while (newCapacity * loadFactor < (size + n));
adjustCapacity(newCapacity);
}
/**
* will shrink the internal storage size to the least possible amount,
* should be used after removing many entries for example
*
* @param compactingLoadFactor
* load factor for the compacting operation. Use 0f to compact
* with the load factor specified during instantiation. Use
* negative values of the desired load factors to compact only
* when it would reduce the storage size.
*/
public void compactify(float compactingLoadFactor) {
int newCapacity = 1;
float adjustedLoadFactor = Math.abs(compactingLoadFactor);
if (adjustedLoadFactor <= 0.f || adjustedLoadFactor >= 1.f)
adjustedLoadFactor = loadFactor;
while (newCapacity * adjustedLoadFactor < (size+1))
newCapacity <<= 1;
if(newCapacity < data.length || compactingLoadFactor >= 0.f )
adjustCapacity(newCapacity);
}
private void adjustCapacity(final int newSize) {
final Object[] oldData = data;
data = new Object[newSize];
size = 0;
for (int i = 0; i < oldData.length; i++)
{
if (oldData[i] == null || oldData[i] == THOMBSTONE)
continue;
addInternal(oldData[i], true);
}
}
static void test() {
final Random rnd = new Random();
final byte[] buffer = new byte[5];
final String[] fillData = new String[(int)((1<<20) * 0.93f)];
for (int i = 0; i < fillData.length; i++)
{
rnd.nextBytes(buffer);
fillData[i] = new String(buffer);
fillData[i].hashCode();
}
long time;
final Set s1 = new HashSet();
final Set s2 = new LightHashSet();
System.out.println("fill:");
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s1.add(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s2.add(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
System.out.println("replace-fill:");
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s1.add(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s2.add(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
System.out.println("get:");
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s1.contains(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s2.contains(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
System.out.println("compactify light map");
time = System.currentTimeMillis();
((LightHashSet) s2).compactify(0.95f);
System.out.println(System.currentTimeMillis() - time);
System.out.println("transfer to hashmap");
time = System.currentTimeMillis();
new HashSet(s1);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
new HashSet(s2);
System.out.println(System.currentTimeMillis() - time);
System.out.println("transfer to lighthashmap");
time = System.currentTimeMillis();
new LightHashSet(s1);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
new LightHashSet(s2);
System.out.println(System.currentTimeMillis() - time);
System.out.println("remove entry by entry");
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s1.remove(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
for (int i = 0; i < fillData.length; i++)
s2.remove(fillData[i]);
System.out.println(System.currentTimeMillis() - time);
}
public static void main(final String[] args) {
System.out.println("Call with -Xmx300m -Xcomp -server");
Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
// some quadratic probing math test:
/*
boolean[] testArr = new boolean[1<<13];
int hash = 0xc8d3 << 1;
int position = hash & (testArr.length -1);
int probe = 0;
do
{
position = (hash + probe + probe * probe) & (testArr.length - 1);
probe++;
testArr[position] = true;
} while (probe < (testArr.length>>1));
for(int i = 0;i<testArr.length;i+=2)
{
if(testArr[i] != true)
System.out.println("even element failed"+i);
if(testArr[i+1] != false)
System.out.println("uneven element failed"+(i+1));
}
*/
try
{
Thread.sleep(300);
} catch (final InterruptedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
test();
System.out.println("-------------------------------------");
System.gc();
try
{
Thread.sleep(300);
} catch (final InterruptedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
test();
System.out.println("\n\nPerforming sanity tests");
final Random rnd = new Random();
final byte[] buffer = new byte[25];
final String[] fillData = new String[1048];
for (int i = 0; i < fillData.length; i++)
{
rnd.nextBytes(buffer);
fillData[i] = new String(buffer);
fillData[i].hashCode();
}
final Set s1 = new HashSet();
final Set s2 = new LightHashSet();
for(int i=0;i<fillData.length*10;i++)
{
int random = rnd.nextInt(fillData.length);
s1.add(null);
s2.add(null);
if(!s1.equals(s2))
System.out.println("Error 0");
s1.add(fillData[random]);
s2.add(fillData[random]);
if(!s1.equals(s2))
System.out.println("Error 1");
}
// create thombstones, test removal
for(int i=0;i<fillData.length/2;i++)
{
int random = rnd.nextInt(fillData.length);
s1.remove(fillData[random]);
s2.remove(fillData[random]);
if(!s1.equals(s2))
System.out.println("Error 2");
}
// do some more inserting, this time with thombstones
for(int i=0;i<fillData.length*10;i++)
{
int random = rnd.nextInt(fillData.length);
s1.add(fillData[random]);
s1.add(null);
s2.add(fillData[random]);
s2.add(null);
if(!s1.equals(s2))
System.out.println("Error 3");
}
Iterator i1 = s1.iterator();
Iterator i2 = s2.iterator();
// now try removal with iterators
while(i1.hasNext())
{
i1.next();
i1.remove();
i2.next();
i2.remove();
}
if(!s1.equals(s2))
System.out.println("Error 4");
// test churn/thombstones
s2.clear();
/*
for(int i=0;i<fillData.length*10;i++)
{
int random = rnd.nextInt(fillData.length);
m2.put(fillData[random], fillData[i%fillData.length]);
}
*/
for(int i = 0;i<100000;i++)
{
rnd.nextBytes(buffer);
String s = new String(buffer);
s2.add(s);
s2.contains(s);
s2.remove(s);
}
System.out.println("checks done");
}
}