/*
* This file is part of the HyperGraphDB source distribution. This is copyrighted
* software. For permitted uses, licensing options and redistribution, please see
* the LicensingInformation file at the root level of the distribution.
*
* Copyright (c) 2005-2010 Kobrix Software, Inc. All rights reserved.
*/
package org.hypergraphdb.atom.impl;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.util.Iterator;
import java.util.Stack;
import org.hypergraphdb.util.Pair;
/**
* <p>An implementation of a trie for storing UUIDs. This is used to efficiently
* represent persistent handle sets.
* </p>
*
* <p>
* Elements of this trie are assumed to be of fixed 16 byte length. The trie is implemented
* with a 16 length alphabet (each byte is split into two parts of 4 bits). Thus the depth of the
* resulting tree structure is 32. Terminal elements are the ones that reach that depth.
* </p>
*
* <p>
* This implementation also supports a compact and efficient serialization of the whole
* structure for storage purposes.
* </p>
*
* <p>
* Since this is intended as an implementation of a <code>HGAtomSet</code>, which is
* statically typed for persistent handles, no checks are made for nulls or
* correct byte array sizes.
* </p>
*
* <p>
* Purging of branches is implemented during lookup. If the lookup procedure reaches a
* death branch (i.e. one with all its children pointers set to null), that branch is removed.
* </p>
*
* @author Borislav Iordanov
*/
public final class UUIDTrie
{
//
// The trie structure is represented by a small nested class hierarchy.
//
// We type internal and leaf nodes separately because leaf nodes don't need
// to hold an array of children. We can do this simple optimization since all
// our elements are of fixed length...
//
private static class trie implements Cloneable { public Object clone() { return this; } }
private static final class leaf_trie extends trie
{ public Object clone() { return THE_LEAF; } }
// a single instance representing leafs is enough since they don't store anything.
private static final leaf_trie THE_LEAF = new leaf_trie();
// there is an optimization opportunity here to eliminate the recursive calls.
private static final class node_trie extends trie
{
trie [] children = new trie[16];
byte count = 0; // number of non-null children
public Object clone()
{
node_trie cl = new node_trie();
cl.count = count;
cl.children = new trie[children.length];
for (int i = 0; i < children.length; i++)
cl.children[i] = (trie)(((trie)children[i]).clone());
return cl;
}
//
// The following recursive version of find was forgotten in favor of
// a non-recursive version (below). However, this recursive version has
// the advantage of performing a more aggressive pruning of death nodes.
// If it turns out that atom sets are very volatile, and that aggressive
// freeing up of unused nodes is needed, this recursive version might
// be preferred. Or implement a separate pruning method (called 'compact'
// or something...).
//
/* boolean find(byte [] uuid, int offset)
{
byte current = uuid[offset / 2];
current = (offset % 2 == 0) ? (byte) ((current & 0xF0) >> 4) : (byte)(current & 0x0F);
if (offset == 31)
return children[current] != null;
node_trie child = (node_trie)children[current];
if (child == null)
return false;
boolean result = child.find(uuid, offset + 1);
if (child.count == 0)
{
children[current]= null;
count--;
}
return result;
} */
/* boolean remove(byte [] uuid, int offset)
{
byte current = uuid[offset / 2];
current = (offset % 2 == 0) ? (byte) ((current & 0xF0) >> 4) : (byte)(current & 0x0F);
if (children[current] == null)
return false;
else if (offset == 31)
{
children[current] = null;
count--;
return true;
}
else
return children[current].find(uuid, offset + 1);
} */
/* boolean add(byte [] uuid, int offset)
{
byte current = uuid[offset / 2];
current = (offset % 2 == 0) ? (byte) ((current & 0xF0) >> 4) : (byte)(current & 0x0F);
if (children[current] == null)
{
count++;
if (offset == 31)
{
children[current] = THE_LEAF;
return false;
}
else
{
node_trie child = new node_trie();
children[current] = child;
return child.add(uuid, offset + 1);
}
}
else
{
if (offset == 31)
return true;
else
return children[current].add(uuid, offset + 1);
}
} */
}
private node_trie root = new node_trie();
public void clear()
{
root = new node_trie();
}
public UUIDTrie clone()
{
UUIDTrie trie = new UUIDTrie();
trie.root = (node_trie)root.clone();
return trie;
}
/**
* <p>Add a new element returning <code>true</code> if it wasn't already in the set
* and <code>false</code> otherwise.</p>
*
* @param uuid
* @return
*/
public boolean add(byte [] uuid)
{
node_trie node = root;
byte offset = 0;
byte position;
while (true)
{
if (offset % 2 == 0)
{
position = (byte)(uuid[offset >> 1] >> 4);
if (position < 0)
position = (byte)(-position + 7);
}
else
position = (byte)(uuid[offset >> 1] & 0x0F);
if (node.children[position] == null)
{
node.count++;
if (offset == 31)
{
node.children[position] = THE_LEAF;
return true;
}
else
{
node = (node_trie)(node.children[position] = new node_trie());
offset++;
}
}
else
{
if (offset == 31)
return false;
else
{
node = (node_trie)node.children[position];
offset++;
}
}
}
}
/**
* <p>Return <code>true</code> if the given element is in the set and <code>false</code>
* otherwise.</p>
*
* @param uuid
* @return
*/
public boolean find(byte [] uuid)
{
node_trie node = root;
byte position;
byte offset = 0;
while (true)
{
if (offset % 2 == 0)
{
position = (byte)(uuid[offset >> 1] >> 4);
if (position < 0)
position = (byte)(-position + 7);
}
else
position = (byte)(uuid[offset >> 1] & 0x0F);
if (offset == 31)
return node.children[position] != null;
node_trie child = (node_trie)node.children[position];
if (child == null)
return false;
if (child.count == 0)
{
node.children[position]= null;
node.count--;
return false;
}
node = child;
offset++;
}
}
/**
* <p>Remove an element and return <code>true</code> if it was present, and
* <code>false</code. otherwise.</p>
*
* @param uuid
* @return
*/
public boolean remove(byte [] uuid)
{
node_trie node = root;
byte offset = 0;
byte position;
while (true)
{
if (offset % 2 == 0)
{
position = (byte)(uuid[offset >> 1] >> 4);
if (position < 0)
position = (byte)(-position + 7);
}
else
position = (byte)(uuid[offset >> 1] & 0x0F);
if (node.children[position] == null)
return false;
else if (offset == 31)
{
node.children[position] = null;
node.count--;
return true;
}
else
{
node = (node_trie)node.children[position];
offset++;
}
}
}
private void serialize(ByteArrayOutputStream out, node_trie node, int depth)
{
byte i, layout;
int bit;
for (i = 0, layout = 0, bit = 1; i < 8; i++, bit*=2)
if (node.children[i] != null) layout |= bit;
out.write(layout);
for (i = 8, layout = 0, bit = 1; i < 16; i++, bit*=2)
if (node.children[i] != null) layout |= bit;
out.write(layout);
if (depth < 31)
for (i = 0; i < 16; i++)
if (node.children[i] != null)
serialize(out, (node_trie)node.children[i], depth + 1);
}
public byte [] serialize()
{
ByteArrayOutputStream out = new ByteArrayOutputStream();
serialize(out, root, 0);
return out.toByteArray();
}
private void deserialize(ByteArrayInputStream in, node_trie node, int depth)
{
byte layout1 = (byte)in.read();
byte layout2 = (byte)in.read();
byte i, bit;
if (depth < 31)
{
for (i = 0, bit = 1; i < 8; i++, bit *= 2)
if ((layout1 & bit) != 0)
{
node.count++;
node_trie child = new node_trie();
node.children[i] = child;
deserialize(in, child, depth + 1);
}
for (i = 8, bit = 1; i < 16; i++, bit *= 2)
if ((layout2 & bit) != 0)
{
node.count++;
node_trie child = new node_trie();
node.children[i] = child;
deserialize(in, child, depth + 1);
}
}
else
{
for (i = 0, bit = 1; i < 8; i++, bit *= 2)
if ((layout1 & bit) != 0)
{
node.count++;
node.children[i] = THE_LEAF;
}
for (i = 8, bit = 1; i < 16; i++, bit *= 2)
if ((layout2 & bit) != 0)
{
node.count++;
node.children[i] = THE_LEAF;
}
}
}
public void deserialize(byte [] data)
{
ByteArrayInputStream in = new ByteArrayInputStream(data);
for (byte i = 0; i < 16; i++)
root.children[i] = null;
deserialize(in, root, 0);
}
public Iterator<byte[]> iterator()
{
return new TrieIterator();
}
private class TrieIterator implements Iterator<byte[]>
{
// a stack that holds the state of what would be a recursive traversal
Stack<Pair<node_trie, Integer>> state = new Stack<Pair<node_trie, Integer>>();
void goToNext()
{
while (!state.isEmpty())
{
Pair<node_trie, Integer> top = state.pop();
node_trie n = top.getFirst();
int i = top.getSecond();
do { i++; } while (i < n.children.length && n.children[i] == null);
if (i < n.children.length)
{
state.push(new Pair<node_trie, Integer>(n, i));
if (state.size() < 32)
state.push(new Pair<node_trie, Integer>((node_trie)n.children[i], -1));
else
break;
}
}
}
public TrieIterator()
{
state.push(new Pair<node_trie, Integer>(root, -1));
goToNext();
}
public boolean hasNext()
{
return !state.isEmpty();
}
public byte[] next()
{
// if we have a next element, then we must be already positionned at a leaf
// in the trie, so we just read off the value from the current stack, which must
// be precisely 32 elements deep
byte [] result = null;
if (hasNext())
{
result = new byte[16];
int idx = 0;
for (Iterator<Pair<node_trie, Integer>> i = state.iterator(); i.hasNext(); )
{
byte high = i.next().getSecond().byteValue();
if (high > 7)
high = (byte)(7 - high);
byte low = i.next().getSecond().byteValue();
result[idx++] = (byte)(16*high + low);
}
goToNext();
}
return result;
}
public void remove()
{
if (state.size() < 16)
throw new IllegalStateException("TrieIterator.remove: the iterator has no current object to remove.");
else if (state.peek().getSecond() < 0)
throw new IllegalStateException("TrieIterator.remove: the iterator has no current object to remove.");
else
{
Pair<node_trie,Integer> curr = state.peek();
curr.getFirst().children[curr.getSecond()] = null;
curr.getFirst().count--;
while (curr.getFirst().count == 0)
{
state.pop();
if (state.isEmpty())
break;
curr = state.peek();
curr.getFirst().children[curr.getSecond()] = null;
curr.getFirst().count--;
}
}
}
}
}