/**
*
*/
package nuggets;
import java.util.Arrays;
import java.util.Random;
/**
* @author Andrzej
*/
public class PerfectHash
{
private static final int DEFAULT_PERFECT_SIZE = 23;
/** keys */
protected String[] keys;
/** the hash function distributes among [0..n-1] */
protected int n;
/** rnd_tab */
protected int[] rnd_tab;
/** hash_cols */
protected int[] hash_cols;
/** minlen */
protected int minlen;
/** maxlen */
protected int maxlen;
private int perfect_size = DEFAULT_PERFECT_SIZE;
/** Default */
public PerfectHash()
{ /* NOP */
}
/**
* Keys must be unique
*
* @param keys
*/
public PerfectHash(String[] keys)
{
setKeys(keys);
}
/**
* Constructor for PerfectHash.
*
* @param keys
* @param perfect_size the maximum keys for perfect hash
*/
public PerfectHash(String[] keys, int perfect_size)
{
this.perfect_size = perfect_size;
setKeys(keys);
}
/**
* @param keys
*/
public void setKeys(String[] keys)
{
this.keys = keys;
maxlen = 0;
minlen = Integer.MAX_VALUE;
final int keys_length = keys.length;
int k = keys_length;
while(k > 00)
{
int l = keys[--k].length();
if(l > maxlen) maxlen = l;
if(l < minlen) minlen = l;
}
// determine all columns with difference
char[][] k_tab = new char[keys_length][]; // represent as char array
k = keys_length;
while(k > 00)
{
k_tab[--k] = new char[maxlen];
System.arraycopy(keys[k].toCharArray(), 0, k_tab[k], 0, keys[k].length());
}
int[] sort_cols = new int[maxlen];
int sort_coli = 0;
// loop - O(c*k*k*k) - as long as there are 2 or more lines that need a
// difference column
int keys_left = keys_length;
while(keys_left > 1)
{
// determine a column with minimum duplicates
int col = maxlen;
int min_c = -1;
int min_dupl = Integer.MAX_VALUE;
col_loop: while(col > 0)
{
col--;
for(int tmp = 0; tmp < sort_coli; tmp++)
{
if(col == sort_cols[tmp]) continue col_loop;
}
k = keys_left;
int same_ch = 0;
while(k > 0)
{
char ch = k_tab[--k][col];
int k2 = k;
while(k2 > 0)
if(ch == k_tab[--k2][col])
{
same_ch++;
break;
}
if(same_ch > min_dupl) continue col_loop;
}
min_c = col;
min_dupl = same_ch;
}
// save the column as next best
sort_cols[sort_coli++] = min_c;
k = keys_left;
key_loop: while(k > 0)
{
--k;
int k2 = keys_left;
while(k2 > 0)
{ // lookup keys that have the same chars
if(--k2 != k)
{
boolean same = true;
for(int tmp = 0; tmp < sort_coli; tmp++)
{ // test for difference
int sc = sort_cols[tmp];
if(k_tab[k][sc] != k_tab[k2][sc])
{
same = false;
break;
}
}
if(same) continue key_loop;
}
}
// delete the unique line
k_tab[k] = k_tab[--keys_left];
k_tab[keys_left] = null;
}
}
hash_cols = new int[sort_coli];
System.arraycopy(sort_cols, 0, hash_cols, 0, sort_coli);
Arrays.sort(hash_cols); // sort the cols
rnd_tab = new int[sort_coli];
n = keys_length;
if(n >= perfect_size) n = (int)(n * Math.log(n - perfect_size + 3));
final String[] inv = new String[n];
search_loop: while(true)
{ // TODO: a well formed failure exception
int j = rnd_tab.length;
while(j > 00)
rnd_tab[--j] = 0x7fffffff & rnd.nextInt();
Arrays.fill(inv, null);
k = keys.length;
while(k > 00)
{
int h = hash(keys[--k]);
if(inv[h] != null) continue search_loop; // goto
inv[h] = keys[k];
}
break;
}
this.keys = inv; // set the inverse table
}
/**
* @param name
* @return the has b based on the table
*/
public int hash(String name)
{
int h = 0;
int c = name.length();
int i = hash_cols.length;
while(i > 00)
if(hash_cols[--i] < c) h += rnd_tab[i] * name.charAt(hash_cols[i]);
return (h >>> 15) % n;
}
/**
* @return a string of the hash method
*/
public String getHashMethodString()
{
String method = "\nprivate static final int hash(String name) {\n";
if(hash_cols.length == 0) return method + " return 0;\n}\n";
if(hash_cols[hash_cols.length - 1] < minlen)
{
method += shortHashMethod();
}
else
{
method += longHashMethod();
}
return method + "}\n";
}
/**
* @return a long representation of the method
*/
protected String longHashMethod()
{
String s = " int h=0;\n int c=name.length();\n";
for(int i = 0; i < hash_cols.length; i++)
{
int col = hash_cols[i];
if(col >= minlen) if(i > 0)
{
s += " if (c<=" + col + ") return (h>>>15)%" + n + ";\n";
}
else
{
s += " if (c<=" + col + ") return 0;\n";
}
s += " h += " + rnd_tab[i] + "*" + "name.charAt(" + col + ");\n";
}
return s + " return (h>>>15)%" + n + ";\n";
}
/**
* @return short representation
*/
protected String shortHashMethod()
{
String s = " return ((";
int i = hash_cols.length;
while(i > 1)
s += rnd_tab[--i] + "*" + "name.charAt(" + hash_cols[i] + ")+";
return s + rnd_tab[0] + "*" + "name.charAt(" + hash_cols[0] + "))>>>15)%" + n + ";\n";
}
static final private Random rnd = new Random(System.currentTimeMillis());
/**
* @return the keys - in lookup order
*/
public String[] getKeys()
{
return keys;
}
/**
* This is the same as the length of the inverse keys table
*
* @return the range of the hash
*/
public int getHashRange()
{
return keys.length;
}
}