/** * Copyright 2014, Emory University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.emory.clir.clearnlp.collection.map; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntStack; import edu.emory.clir.clearnlp.collection.pair.ObjectIntPair; import edu.emory.clir.clearnlp.util.HashUtils; import edu.emory.clir.clearnlp.util.MathUtils; /** * @since 3.0.0 * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ public class StringIntMinimalPerfectHashMap { private ObjectIntHashMap<String> m_key; private int n_index; private int[] g_hashes; private int[] g_values; public StringIntMinimalPerfectHashMap() { m_key = new ObjectIntHashMap<>(); n_index = 0; } public void addkey(String key) { if (!m_key.containsKey(key)) m_key.put(key, n_index++); } public void initHashFunction() { int vsize = (int)MathUtils.nextPrimeNumber((int)(1.25 * m_key.size())); int hsize = vsize / 5; StringList[] patterns = getEmptyList(hsize); int[] hashes = new int[hsize]; int[] values = new int[vsize]; int i, b, d, len, item, slot; IntArrayList slots; StringList pattern; IntStack freelist; // initialize Arrays.fill(values, -1); for (ObjectIntPair<String> p : m_key) patterns[hash(p.o, 0, hsize)].add(p.o); // sort patterns in descending order Arrays.sort(patterns, Collections.reverseOrder()); // |pattern| > 1 for (b=0; b<hsize; b++) { pattern = patterns[b]; len = pattern.size(); if (len <= 1) break; slots = new IntArrayList(); item = 0; d = 1; // rotate patterns and search for suitable displacement while (item < len) { slot = hash(pattern.get(item), d, vsize); if (values[slot] != -1 || slots.contains(slot)) { slots = new IntArrayList(); item = 0; d++; } else { slots.add(slot); item++; } } hashes[hash(pattern.get(0), 0, hsize)] = d; for (i=0; i<len; i++) values[slots.get(i)] = m_key.get(pattern.get(i)); } // process patterns with one key and use a negative value of d freelist = new IntStack(); for (i=0; i<vsize; i++) { if (values[i] == -1) freelist.add(i); } // |pattern| == 1 for (; b<hsize; b++) { pattern = patterns[b]; len = pattern.size(); if (len == 0) break; slot = freelist.pop(); hashes[hash(pattern.get(0), 0, hsize)] = -slot-1; values[slot] = m_key.get(pattern.get(0)); } g_hashes = hashes; g_values = values; } public int lookup(String key) { int d = g_hashes[hash(key, 0, g_hashes.length)]; d = (d < 0) ? -d-1 : hash(key, d, g_values.length); return g_values[d]; } /** Called by {@link #initHashFunction()}. */ private int hash(String key, int basis, int size) { int h = (basis == 0) ? HashUtils.fnv1aHash32(key) : HashUtils.fnv1aHash32(key, basis); return MathUtils.divisor(h, size); } /** Called by {@link #initHashFunction()}. */ private class StringList extends ArrayList<String> implements Comparable<StringList> { private static final long serialVersionUID = -6992653145004684254L; @Override public int compareTo(StringList list) { return size() - list.size(); } } /** Called by {@link #initHashFunction()}. */ private StringList[] getEmptyList(int size) { StringList[] list = new StringList[size]; int i; for (i=0; i<size; i++) list[i] = new StringList(); return list; } }