/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.common;
/**
* A hash groupify operator. It implements a hash table with chaining and keeps
* track of additional properties per equivalence class
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*
* @param <T>
*/
public class Groupify<T> {
/**
* Entry
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*
* @param <U>
*/
public static class Group<U> {
/** Var */
private final int hashcode;
/** Var */
private final U element;
/** Var */
private int count = 0;
/** Var */
private Group<U> next = null;
/** Var */
private Group<U> nextInOrder = null;
/**
* Creates a new instance
*
* @param element
* @param hashCode
*/
private Group(U element, int hashCode) {
this.hashcode = hashCode;
this.element = element;
}
/**
* The count
*/
public int getCount() {
return count;
}
/**
* The element
*/
public U getElement() {
return element;
}
/**
* Returns whether a next entry exists
*
* @return
*/
public boolean hasNext() {
return nextInOrder != null;
}
/**
* Returns the next entry, null if this is the last entry
*
* @return
*/
public Group<U> next() {
return nextInOrder;
}
}
/** Current number of elements. */
private int count;
/** The entry array. */
private Group<T>[] buckets;
/** The first entry. */
private Group<T> first;
/** The last entry. */
private Group<T> last;
/** Load factor. */
private final float loadFactor = 0.75f;
/**
* Maximum number of elements that can be put in this map before having to
* rehash.
*/
private int threshold;
/**
* Constructs a new hash groupify operator.
*
* @param capacity
* The capacity
*/
@SuppressWarnings("unchecked")
public Groupify(int capacity) {
// Set capacity
capacity = calculateCapacity(capacity);
this.count = 0;
this.buckets = new Group[capacity];
this.threshold = calculateThreshold(buckets.length, loadFactor);
}
/**
* Adds a new element
*
* @param element
*/
public void add(T element) {
// Add
final int hash = element.hashCode();
// Find or create entry
int index = hash & (buckets.length - 1);
Group<T> entry = findEntry(element, index, hash);
if (entry == null) {
if (++count > threshold) {
rehash();
index = hash & (buckets.length - 1);
}
entry = createEntry(element, index, hash);
}
// Track size
entry.count++;
}
/**
* Returns the first entry for iterations
*
* @return
*/
public Group<T> first() {
return first;
}
/**
* Returns the matching entry, if any
* @param element
* @return
*/
public Group<T> get(T element) {
int hash = element.hashCode();
int index = hash & (buckets.length - 1);
return findEntry(element, index, hash);
}
/**
* Returns the current number of entries
*
* @return
*/
public int size() {
return count;
}
/**
* Calculates a new capacity.
*
* @param x
* the parameter
* @return the capacity
*/
private int calculateCapacity(int x) {
if (x >= (1 << 30)) { return 1 << 30; }
if (x == 0) { return 16; }
x = x - 1;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
/**
* Computes the threshold for rehashing.
*
* @param buckets
* @param loadFactor
* @return
*/
private int calculateThreshold(final int buckets, final float loadFactor) {
return (int) (buckets * loadFactor);
}
/**
* Creates a new entry
*
* @param element
* @param index
* @param hashcode
* @return
*/
private Group<T> createEntry(final T element,
final int index,
final int hashcode) {
final Group<T> entry = new Group<T>(element, hashcode);
entry.next = buckets[index];
buckets[index] = entry;
if (first == null) {
first = entry;
last = entry;
} else {
last.nextInOrder = entry;
last = entry;
}
return entry;
}
/**
* Returns the according entry, null if there is none
*
* @param element
* @param index
* @param hashcode
* @return
*/
private final Group<T> findEntry(final T element,
final int index,
final int hashcode) {
Group<T> m = buckets[index];
while ((m != null) && ((m.hashcode != hashcode) || !element.equals(m.element))) {
m = m.next;
}
return m;
}
/**
* Rehashes this operator.
*/
private void rehash() {
int length = calculateCapacity((buckets.length == 0 ? 1
: buckets.length << 1));
@SuppressWarnings("unchecked")
Group<T>[] newbuckets = new Group[length];
Group<T> entry = first;
while (entry != null) {
int index = entry.hashcode & (length - 1);
entry.next = newbuckets[index];
newbuckets[index] = entry;
entry = entry.nextInOrder;
}
buckets = newbuckets;
threshold = calculateThreshold(buckets.length, loadFactor);
}
}