/*
* Licensed under the Apache License, Version 2.0 (the "License");
*
* You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributions from 2013-2017 where performed either by US government
* employees, or under US Veterans Health Administration contracts.
*
* US Veterans Health Administration contributions by government employees
* are work of the U.S. Government and are not subject to copyright
* protection in the United States. Portions contributed by government
* employees are USGovWork (17USC ยง105). Not subject to copyright.
*
* Contribution by contractors to the US Veterans Health Administration
* during this period are contractually contributed under the
* Apache License, Version 2.0.
*
* See: https://www.usa.gov/government-works
*
* Contributions prior to 2013:
*
* Copyright (C) International Health Terminology Standards Development Organisation.
* Licensed under the Apache License, Version 2.0.
*
*/
package sh.isaac.api.collections.uuidnidmap;
//~--- JDK imports ------------------------------------------------------------
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
//~--- non-JDK imports --------------------------------------------------------
import org.apache.mahout.math.function.DoubleProcedure;
import org.apache.mahout.math.list.ByteArrayList;
import org.apache.mahout.math.list.IntArrayList;
import org.apache.mahout.math.map.HashFunctions;
import org.apache.mahout.math.map.PrimeFinder;
//~--- classes ----------------------------------------------------------------
/**
* The Class UuidToIntHashMap.
*
* @author kec
*/
public class UuidToIntHashMap
extends AbstractUuidToIntHashMap
implements UuidToIntMap {
/** The Constant serialVersionUID. */
private static final long serialVersionUID = -3621266181773866347L;
/** The Constant FREE. */
protected static final byte FREE = 0;
/** The Constant FULL. */
protected static final byte FULL = 1;
/** The Constant REMOVED. */
protected static final byte REMOVED = 2;
//~--- fields --------------------------------------------------------------
/**
* The hash table keys.
*
* @serial
*/
protected long table[];
/**
* The hash table values.
*
* @serial
*/
protected int values[];
/**
* The state of each hash table entry (FREE, FULL, REMOVED).
*
* @serial
*/
protected byte state[];
/**
* The number of table entries in state==FREE.
*
* @serial
*/
protected int freeEntries;
//~--- constructors --------------------------------------------------------
/**
* Constructs an empty map with default capacity and default load factors.
*/
public UuidToIntHashMap() {
this(defaultCapacity);
}
/**
* Constructs an empty map with the specified initial capacity and default load factors.
*
* @param initialCapacity the initial capacity of the map.
* @throws IllegalArgumentException if the initial capacity is less than zero.
*/
public UuidToIntHashMap(int initialCapacity) {
this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
}
/**
* Constructs an empty map with the specified initial capacity and the specified minimum and maximum load
* factor.
*
* @param initialCapacity the initial capacity.
* @param minLoadFactor the minimum load factor.
* @param maxLoadFactor the maximum load factor.
* @throws IllegalArgumentException if
*
* {@code initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 ||
* maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)} .
*/
public UuidToIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
setUp(initialCapacity, minLoadFactor, maxLoadFactor);
}
//~--- methods -------------------------------------------------------------
/**
* Removes all (key,value) associations from the receiver. Implicitly calls {@code trimToSize()}.
*/
@Override
public void clear() {
new ByteArrayList(this.state).fillFromToWith(0, this.state.length - 1, FREE);
// new UuidArrayList(values).fillFromToWith(0, state.length-1, 0); //
// delta
this.distinct = 0;
this.freeEntries = this.state.length; // delta
trimToSize();
}
/**
* Returns a deep copy of the receiver.
*
* @return a deep copy of the receiver.
*/
@Override
public Object clone() {
final UuidToIntHashMap copy = (UuidToIntHashMap) super.clone();
copy.table = copy.table.clone();
copy.values = copy.values.clone();
copy.state = copy.state.clone();
return copy;
}
/**
* Returns {@code true} if the receiver contains the specified key.
*
* @param key the key
* @return {@code true} if the receiver contains the specified key.
*/
@Override
public boolean containsKey(long[] key) {
return indexOfKey(key) >= 0;
}
/**
* Contains key.
*
* @param key the key
* @return true, if successful
*/
@Override
public boolean containsKey(UUID key) {
return indexOfKey(key) >= 0;
}
/**
* Returns {@code true} if the receiver contains the specified value.
*
* @param value the value
* @return {@code true} if the receiver contains the specified value.
*/
@Override
public boolean containsValue(int value) {
return indexOfValue(value) >= 0;
}
/**
* Ensures that the receiver can hold at least the specified number of associations without needing to
* allocate new internal memory. If necessary, allocates new internal memory and increases the capacity of
* the receiver. <p> This method never need be called; it is for performance tuning only. Calling this
* method before {@code put()}ing a large number of associations boosts performance, because the receiver
* will grow only once instead of potentially many times and hash collisions get less probable.
*
* @param minCapacity the desired minimum capacity.
*/
@Override
public void ensureCapacity(int minCapacity) {
if (this.state.length < minCapacity) {
final int newCapacity = nextPrime(minCapacity);
rehash(newCapacity);
}
}
/**
* For each key.
*
* @param procedure the procedure
* @return true, if successful
*/
@Override
public boolean forEachKey(UuidProcedure procedure) {
for (int i = this.state.length; i-- > 0; ) {
if (this.state[i] == FULL) {
final long[] key = new long[2];
key[0] = this.table[i * 2];
key[1] = this.table[i * 2 + 1];
if (!procedure.apply(key)) {
;
}
return false;
}
}
return true;
}
/**
* Applies a procedure to each (key,value) pair of the receiver, if any. Iteration order is guaranteed to
* be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
*
* @param procedure the procedure to be applied. Stops iteration if the procedure returns {@code false},
* otherwise continues.
* @return {@code false} if the procedure stopped before all keys where iterated over, {@code true}
* otherwise.
*/
@Override
public boolean forEachPair(final UuidIntProcedure procedure) {
for (int i = this.state.length; i-- > 0; ) {
if (this.state[i] == FULL) {
final long[] key = new long[2];
key[0] = this.table[i * 2];
key[1] = this.table[i * 2 + 1];
if (!procedure.apply(key, this.values[i])) {
return false;
}
}
}
return true;
}
/**
* Returns the first key the given value is associated with. It is often a good idea to first check with
* {@link #containsValue(int)} whether there exists an association from a key to this value. Search order
* is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
*
* @param value the value to search for.
* @return the first key for which holds {@code get(key) == value}; returns {@code Double.NaN} if no
* such key exists.
*/
@Override
public long[] keyOf(int value) {
// returns the first key found; there may be more matching keys,
// however.
final int i = indexOfValue(value);
if (i < 0) {
return null;
}
final long[] uuid = new long[2];
final int msb = i * 2;
final int lsb = msb + 1;
uuid[0] = this.table[msb];
uuid[1] = this.table[lsb];
return uuid;
}
/**
* Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0.
* After this call returns the specified list has a new size that equals {@code this.size()}. Iteration
* order is guaranteed to be <i>identical</i> to the order used by method
* {@link #forEachKey(DoubleProcedure)}. <p> This method can be used to iterate over the keys of the
* receiver.
*
* @param list the list to be filled, can have any size.
*/
@Override
public void keys(UuidArrayList list) {
list.setSize(this.distinct);
final long[] elements = list.elements();
final long[] tab = this.table;
final byte[] stat = this.state;
int j = 0;
for (int i = stat.length; i-- > 0; ) {
if (stat[i] == FULL) {
final int iMsb = i * 2;
final int iLsb = iMsb + 1;
final int jMsb = j * 2;
final int jLsb = jMsb + 1;
elements[jMsb] = tab[iMsb];
elements[jLsb] = tab[iLsb];
j++;
}
}
}
/**
* Keys of.
*
* @param value the value
* @return the list
*/
public List<UUID> keysOf(int value) {
final List<Integer> indexes = indexesOfValue(value);
final List<UUID> keys = new ArrayList<>(indexes.size());
indexes.stream()
.map((index) -> index * 2)
.forEach((msb) -> {
final int lsb = msb + 1;
keys.add(new UUID(this.table[msb], this.table[lsb]));
});
return keys;
}
/**
* Associates the given key with the given value. Replaces any old {@code (key,someOtherValue)}
* association, if existing.
*
* @param key the key the value shall be associated with.
* @param value the value to be associated.
* @return {@code true} if the receiver did not already contain such a key; {@code false} if the
* receiver did already contain such a key - the new value has now replaced the formerly associated value.
*/
@Override
public boolean put(long[] key, int value) {
return privatePut(key, value);
}
/**
* Removes the given key with its associated element from the receiver, if present.
*
* @param key the key to be removed from the receiver.
* @return {@code true} if the receiver contained the specified key, {@code false} otherwise.
*/
@Override
public boolean removeKey(long[] key) {
final int i = indexOfKey(key);
if (i < 0) {
return false; // key not contained
}
this.state[i] = REMOVED;
// this.values[i]=0; // delta
this.distinct--;
if (this.distinct < this.lowWaterMark) {
final int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor);
/*
* if (state.length != newCapacity) {
* System.out.print("shrink rehashing ");
* System.out.println("at distinct="
* +distinct+", capacity="+state.length
* +" to newCapacity="+newCapacity+" ..."); }
*/
rehash(newCapacity);
}
return true;
}
/**
* Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal
* memory. An application can use this operation to minimize the storage of the receiver.
*/
@Override
public void trimToSize() {
// * 1.2 because open addressing's performance exponentially degrades
// beyond that point
// so that even rehashing the table can take very long
final int newCapacity = nextPrime((int) (1 + 1.2 * size()));
if (this.state.length > newCapacity) {
rehash(newCapacity);
}
}
/**
* Fills all values contained in the receiver into the specified list. Fills the list, starting at index
* 0. After this call returns the specified list has a new size that equals {@code this.size()}.
* Iteration order is guaranteed to be <i>identical</i> to the order used by method
* {@link #forEachKey(org.ihtsdo.otf.uuidnidmap.UuidProcedure)}. <p> This method can be used to iterate over the values of the
* receiver.
*
* @param list the list to be filled, can have any size.
*/
@Override
public void values(IntArrayList list) {
list.setSize(this.distinct);
final int[] elements = list.elements();
final int[] val = this.values;
final byte[] stat = this.state;
int j = 0;
for (int i = stat.length; i-- > 0; ) {
if (stat[i] == FULL) {
elements[j++] = val[i];
}
}
}
/**
* Index of insertion.
*
* @param key the key to be added to the receiver.
* @return the index where the key would need to be inserted, if it is not already contained. Returns
* -index-1 if the key is already contained at slot index. Therefore, if the returned index < 0, then it
* is already contained at slot -index-1. If the returned index >= 0, then it is NOT already contained and
* should be inserted at slot index.
*/
protected int indexOfInsertion(long[] key) {
final long tab[] = this.table;
final byte stat[] = this.state;
final int length = this.state.length;
final int hash = HashFunctions.hash(key[0] + key[1]) & 0x7FFFFFFF;
int i = hash % length;
int decrement = hash % (length - 2); // double hashing, see
// http://www.eece.unm.edu/faculty/heileman/hash/node4.html
// int decrement = (hash / length) % length;
if (decrement == 0) {
decrement = 1;
}
// stop if we find a removed or free slot, or if we find the key itself
// do NOT skip over removed slots (yes, open addressing is like that...)
while ((stat[i] == FULL) && ((tab[i * 2] != key[0]) || (tab[i * 2 + 1] != key[1]))) {
i -= decrement;
// hashCollisions++;
if (i < 0) {
i += length;
}
}
if (stat[i] == REMOVED) {
// stop if we find a free slot, or if we find the key itself.
// do skip over removed slots (yes, open addressing is like that...)
// assertion: there is at least one FREE slot.
final int j = i;
while ((stat[i] != FREE) && ((stat[i] == REMOVED) || ((tab[i * 2] != key[0]) || (tab[i * 2 + 1] != key[1])))) {
i -= decrement;
// hashCollisions++;
if (i < 0) {
i += length;
}
}
if (stat[i] == FREE) {
i = j;
}
}
if (stat[i] == FULL) {
// key already contained at slot i.
// return a negative number identifying the slot.
return -i - 1;
}
// not already contained, should be inserted at slot i.
// return a number >= 0 identifying the slot.
return i;
}
/**
* Index of insertion for rehash.
*
* @param key the key
* @param tab the tab
* @param stat the stat
* @return the int
*/
protected int indexOfInsertionForRehash(long[] key, long[] tab, byte[] stat) {
final int length = stat.length;
final int hash = HashFunctions.hash(key[0] + key[1]) & 0x7FFFFFFF;
int i = hash % length;
int decrement = hash % (length - 2); // double hashing, see
// http://www.eece.unm.edu/faculty/heileman/hash/node4.html
// int decrement = (hash / length) % length;
if (decrement == 0) {
decrement = 1;
}
// stop if we find a removed or free slot, or if we find the key itself
// do NOT skip over removed slots (yes, open addressing is like that...)
while ((stat[i] == FULL) && ((tab[i * 2] != key[0]) || (tab[i * 2 + 1] != key[1]))) {
i -= decrement;
// hashCollisions++;
if (i < 0) {
i += length;
}
}
if (stat[i] == REMOVED) {
// stop if we find a free slot, or if we find the key itself.
// do skip over removed slots (yes, open addressing is like that...)
// assertion: there is at least one FREE slot.
final int j = i;
while ((stat[i] != FREE) && ((stat[i] == REMOVED) || ((tab[i * 2] != key[0]) || (tab[i * 2 + 1] != key[1])))) {
i -= decrement;
// hashCollisions++;
if (i < 0) {
i += length;
}
}
if (stat[i] == FREE) {
i = j;
}
}
if (stat[i] == FULL) {
// key already contained at slot i.
// return a negative number identifying the slot.
return -i - 1;
}
// not already contained, should be inserted at slot i.
// return a number >= 0 identifying the slot.
return i;
}
/**
* Index of key.
*
* @param key the key to be searched in the receiver.
* @return the index where the key is contained in the receiver, returns -1 if the key was not found.
*/
protected int indexOfKey(long[] key) {
final long tab[] = this.table;
final byte stat[] = this.state;
final int length = stat.length;
final int hash = HashFunctions.hash(key[0] + key[1]) & 0x7FFFFFFF;
int i = hash % length;
int decrement = hash % (length - 2); // double hashing, see
// http://www.eece.unm.edu/faculty/heileman/hash/node4.html
// int decrement = (hash / length) % length;
if (decrement == 0) {
decrement = 1;
}
// stop if we find a free slot, or if we find the key itself.
// do skip over removed slots (yes, open addressing is like that...)
while ((stat[i] != FREE) && ((stat[i] == REMOVED) || ((tab[i * 2] != key[0]) || (tab[i * 2 + 1] != key[1])))) {
i -= decrement;
// hashCollisions++;
if (i < 0) {
i += length;
}
}
if (stat[i] == FREE) {
return -1; // not found
}
return i; // found, return index where key is contained
}
/**
* Index of key.
*
* @param key the key
* @return the int
*/
protected int indexOfKey(UUID key) {
return indexOfKey(new long[] { key.getMostSignificantBits(), key.getLeastSignificantBits() });
}
/**
* Index of value.
*
* @param value the value to be searched in the receiver.
* @return the index where the value is contained in the receiver, returns -1 if the value was not found.
*/
protected int indexOfValue(int value) {
final int val[] = this.values;
final byte stat[] = this.state;
for (int i = stat.length; --i >= 0; ) {
if ((stat[i] == FULL) && (val[i] == value)) {
return i;
}
}
return -1; // not found
}
/**
* Indexes of value.
*
* @param value the value
* @return the list
*/
protected List<Integer> indexesOfValue(int value) {
final List<Integer> indexes = new ArrayList<>();
final int val[] = this.values;
final byte stat[] = this.state;
for (int i = stat.length; --i >= 0; ) {
if ((stat[i] == FULL) && (val[i] == value)) {
indexes.add(i);
}
}
return indexes; // not found
}
/**
* Private put.
*
* @param key the key
* @param value the value
* @return true, if successful
*/
protected boolean privatePut(long[] key, int value) {
int i = indexOfInsertion(key);
if (i < 0) { // already contained
i = -i - 1;
this.values[i] = value;
return false;
}
if (this.distinct > this.highWaterMark) {
final int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
rehash(newCapacity);
return privatePut(key, value);
}
final int msb = i * 2;
this.table[msb] = key[0];
this.table[msb + 1] = key[1];
this.values[i] = value;
if (this.state[i] == FREE) {
this.freeEntries--;
}
this.state[i] = FULL;
this.distinct++;
if (this.freeEntries < 1) { // delta
final int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
rehash(newCapacity);
}
return true;
}
/**
* Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method
* is called automatically when the number of keys in the receiver exceeds the high water mark or falls
* below the low water mark.
*
* @param newCapacity the new capacity
*/
protected void rehash(int newCapacity) {
final int oldCapacity = this.state.length;
if (oldCapacity == newCapacity) {
return;
}
final long oldTable[] = this.table;
final int oldValues[] = this.values;
final byte oldState[] = this.state;
final long newTable[] = new long[newCapacity * 2 + 1];
final int newValues[] = new int[newCapacity];
final byte newState[] = new byte[newCapacity];
for (int i = oldCapacity; i-- > 0; ) {
final long[] element = new long[2];
if (oldState[i] == FULL) {
element[0] = oldTable[i * 2];
element[1] = oldTable[i * 2 + 1];
final int index = indexOfInsertionForRehash(element, newTable, newState);
newTable[index * 2] = element[0];
newTable[index * 2 + 1] = element[1];
newValues[index] = oldValues[i];
newState[index] = FULL;
}
}
updateAfterRehash(newCapacity, newTable, newValues, newState);
}
/**
* Update after rehash.
*
* @param newCapacity the new capacity
* @param newTable the new table
* @param newValues the new values
* @param newState the new state
*/
protected void updateAfterRehash(int newCapacity, long[] newTable, int[] newValues, byte[] newState) {
this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
this.table = newTable;
this.values = newValues;
this.state = newState;
this.freeEntries = newCapacity - this.distinct; // delta
}
//~--- get methods ---------------------------------------------------------
/**
* Gets the number of table entries in state==FREE.
*
* @return the number of table entries in state==FREE
*/
public int getFreeEntries() {
return this.freeEntries;
}
//~--- set methods ---------------------------------------------------------
/**
* Set number of table entries in state==FREE.
*
* @param freeEntries the new number of table entries in state==FREE
*/
public void setFreeEntries(int freeEntries) {
this.freeEntries = freeEntries;
}
//~--- get methods ---------------------------------------------------------
/**
* Returns the value associated with the specified key. It is often a good idea to first check with
* {@link #containsKey(long[])} whether the given key has a value associated or not, i.e. whether there
* exists an association for the given key or not.
*
* @param key the key to be searched for.
* @return the value associated with the specified key; {@code 0} if no such key is present.
*/
@Override
public int get(long[] key) {
final int i = indexOfKey(key);
if (i < 0) {
return Integer.MAX_VALUE; // not contained
}
return this.values[i];
}
/**
* Gets the.
*
* @param key the key
* @return the int
*/
@Override
public int get(UUID key) {
final int i = indexOfKey(key);
if (i < 0) {
return Integer.MAX_VALUE; // not contained
}
return this.values[i];
}
/**
* Gets the state of each hash table entry (FREE, FULL, REMOVED).
*
* @return the state of each hash table entry (FREE, FULL, REMOVED)
*/
public byte[] getState() {
return this.state;
}
/**
* Gets the hash table keys.
*
* @return the hash table keys
*/
public long[] getTable() {
return this.table;
}
//~--- set methods ---------------------------------------------------------
/**
* Initializes the receiver.
*
* @param initialCapacity the initial capacity of the receiver.
* @param minLoadFactor the minLoadFactor of the receiver.
* @param maxLoadFactor the maxLoadFactor of the receiver.
* @throws IllegalArgumentException if
*
* {@code initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 ||
* maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)} .
*/
@Override
protected final void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
int capacity = initialCapacity;
super.setUp(capacity, minLoadFactor, maxLoadFactor);
capacity = nextPrime(capacity);
if (capacity == 0) {
capacity = 1; // open addressing needs at least one FREE slot at any time.
}
this.table = new long[capacity * 2 + 1];
this.values = new int[capacity];
this.state = new byte[capacity];
// memory will be exhausted long before this pathological case happens,
// anyway.
this.minLoadFactor = minLoadFactor;
if (capacity == PrimeFinder.largestPrime) {
this.maxLoadFactor = 1.0;
} else {
this.maxLoadFactor = maxLoadFactor;
}
this.distinct = 0;
this.freeEntries = capacity; // delta
// lowWaterMark will be established upon first expansion.
// establishing it now (upon instance construction) would immediately
// make the table shrink upon first put(...).
// After all the idea of an "initialCapacity" implies violating
// lowWaterMarks when an object is young.
// See ensureCapacity(...)
this.lowWaterMark = 0;
this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
}
//~--- get methods ---------------------------------------------------------
/**
* Gets the hash table values.
*
* @return the hash table values
*/
public int[] getValues() {
return this.values;
}
}