package prefuse.data.search;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import prefuse.data.Tuple;
/**
* A trie data structure for fast-lookup of words based on their
* prefixes. The name "Trie" is a play on the words "tree" and
* "retrieval". This class builds a tree structure representing a set of
* words by their prefixes. It is useful for performing prefix-based
* searches over large amounts of text in an efficient manner.
*
* @version 1.0
* @author <a href="http://jheer.org">jeffrey heer</a>
* @see PrefixSearchTupleSet
*/
public class Trie {
/**
* Base class for nodes in the trie structure.
*/
public class TrieNode {
boolean isLeaf;
int leafCount = 0;
}
/**
* A TrieNode implementation representing a branch in the tree. The
* class maintains a list of characters (the next character in the
* prefix) and associated children TrieNodes for each.
*/
public class TrieBranch extends TrieNode {
char[] chars = new char[] {0};
TrieNode[] children = new TrieNode[1];
}
/**
* A TrieNode implementation representing a leaf in the tree. The class
* stores the word and tuple for the leaf, as well as a reference to the
* successor leaf node in the trie.
*/
public class TrieLeaf extends TrieNode {
public TrieLeaf(String word, Tuple t) {
this.word = word;
tuple = t;
next = null;
leafCount = 1;
}
String word;
Tuple tuple;
TrieLeaf next;
}
/**
* An iterator for traversing a subtree of the Trie.
*/
public class TrieIterator implements Iterator {
private LinkedList queue;
public TrieIterator(TrieNode node) {
queue = new LinkedList();
queue.add(node);
}
public boolean hasNext() {
return !queue.isEmpty();
}
public Object next() {
if ( queue.isEmpty() )
throw new NoSuchElementException();
TrieNode n = (TrieNode)queue.removeFirst();
Object o;
if ( n instanceof TrieLeaf ) {
TrieLeaf l = (TrieLeaf)n;
o = l.tuple;
if ( l.next != null )
queue.addFirst(l.next);
return o;
} else {
TrieBranch b = (TrieBranch)n;
for ( int i = b.chars.length-1; i > 0; i-- ) {
queue.addFirst(b.children[i]);
}
if ( b.children[0] != null )
queue.addFirst(b.children[0]);
return next();
}
}
public void remove() {
throw new UnsupportedOperationException();
}
} // end of inner clas TrieIterator
private TrieBranch root = new TrieBranch();
private boolean caseSensitive = false;
/**
* Create a new Trie with the specified case-sensitivity.
* @param caseSensitive true if the index should be case sensitive for
* indexed words, false otherwise.
*/
public Trie(boolean caseSensitive) {
this.caseSensitive = caseSensitive;
}
/**
* Indicates if this Trie's index takes the case of letters
* into account.
* @return true if the index is case-sensitive, false otherwise
*/
public boolean isCaseSensitive() {
return caseSensitive;
}
/**
* Add a new word to the trie, associated with the given Tuple.
* @param word the word to add to the Trie
* @param t the Tuple associated with the word
*/
public void addString(String word, Tuple t) {
TrieLeaf leaf = new TrieLeaf(word,t);
addLeaf(root, leaf, 0);
}
/**
* Remove a word/Tuple pair from the trie.
* @param word the word to remove
* @param t the associate Tuple to remove
*/
public void removeString(String word, Tuple t) {
removeLeaf(root, word, t, 0);
}
private final int getIndex(char[] chars, char c) {
for ( int i=0; i<chars.length; i++ )
if ( chars[i] == c ) return i;
return -1;
}
private final char getChar(String s, int i) {
char c = ( i < 0 || i >= s.length() ? 0 : s.charAt(i) );
return ( caseSensitive ? c : Character.toLowerCase(c) );
}
private final TrieNode equalityCheck(String word, TrieLeaf l) {
if ( caseSensitive ) {
return l.word.startsWith(word) ? l : null;
} else {
// do our own looping to avoid string allocation for case change
int len = word.length();
if ( len > l.word.length() ) return null;
for ( int i=0; i<len; ++i ) {
char c1 = Character.toLowerCase(word.charAt(i));
char c2 = Character.toLowerCase(l.word.charAt(i));
if ( c1 != c2 ) return null;
}
return l;
}
}
private boolean removeLeaf(TrieBranch b, String word, Tuple t, int depth) {
char c = getChar(word, depth);
int i = getIndex(b.chars, c);
if ( i == -1 ) {
// couldn't find leaf
return false;
} else {
TrieNode n = b.children[i];
if ( n instanceof TrieBranch ) {
TrieBranch tb = (TrieBranch)n;
boolean rem = removeLeaf(tb, word, t, depth+1);
if ( rem ) {
b.leafCount--;
if ( tb.leafCount == 1 )
b.children[i] = tb.children[tb.children[0]!=null?0:1];
}
return rem;
} else {
TrieLeaf nl = (TrieLeaf)n;
if ( nl.tuple == t ) {
b.children[i] = nl.next;
if ( nl.next == null )
repairBranch(b,i);
b.leafCount--;
return true;
} else {
TrieLeaf nnl = nl.next;
while ( nnl != null && nnl.tuple != t ) {
nl = nnl; nnl = nnl.next;
}
if ( nnl == null )
return false; // couldn't find leaf
// update leaf counts
for ( TrieLeaf tl = (TrieLeaf)n; tl.tuple != t; tl = tl.next )
tl.leafCount--;
nl.next = nnl.next;
b.leafCount--;
return true;
}
}
}
}
private void repairBranch(TrieBranch b, int i) {
if ( i == 0 ) {
b.children[0] = null;
} else {
int len = b.chars.length;
char[] nchars = new char[len-1];
TrieNode[] nkids = new TrieNode[len-1];
System.arraycopy(b.chars,0,nchars,0,i);
System.arraycopy(b.children,0,nkids,0,i);
System.arraycopy(b.chars,i+1,nchars,i,len-i-1);
System.arraycopy(b.children,i+1,nkids,i,len-i-1);
b.chars = nchars;
b.children = nkids;
}
}
private void addLeaf(TrieBranch b, TrieLeaf l, int depth) {
b.leafCount += l.leafCount;
char c = getChar(l.word, depth);
int i = getIndex(b.chars, c);
if ( i == -1 ) {
addChild(b,l,c);
} else {
TrieNode n = b.children[i];
if ( n == null ) {
// we have completely spelled out the word
b.children[i] = l;
} else if ( n instanceof TrieBranch ) {
// recurse down the tree
addLeaf((TrieBranch)n,l,depth+1);
} else {
// node is a leaf, need to do a split?
TrieLeaf nl = (TrieLeaf)n;
if ( i==0 || (caseSensitive ? nl.word.equals(l.word)
: nl.word.equalsIgnoreCase(l.word)) )
{
// same word, so chain the entries
for ( ; nl.next != null; nl = nl.next )
nl.leafCount++;
nl.leafCount++;
nl.next = l;
} else {
// different words, need to do a split
TrieBranch nb = new TrieBranch();
b.children[i] = nb;
addLeaf(nb,nl,depth+1);
addLeaf(nb,l,depth+1);
}
}
}
}
private void addChild(TrieBranch b, TrieNode n, char c) {
int len = b.chars.length;
char[] nchars = new char[len+1];
TrieNode[] nkids = new TrieNode[len+1];
System.arraycopy(b.chars,0,nchars,0,len);
System.arraycopy(b.children,0,nkids,0,len);
nchars[len] = c;
nkids[len] = n;
b.chars = nchars;
b.children = nkids;
}
/**
* Look up the given word in this Trie. If a match is found, a TrieNode
* is returned. This node is the root of a subtree containing all the
* matches to the query.
* @param word the word to lookup
* @return the TrieNode root of the subtree containing all matches. A
* null value is returned if no match is found.
*/
public TrieNode find(String word) {
return (word.length() < 1 ? null : find(word, root, 0));
}
private TrieNode find(String word, TrieBranch b, int depth) {
char c = getChar(word, depth);
int i = getIndex(b.chars, c);
if ( i == -1 ) {
return null; // not in trie
} else if ( word.length()-1 == depth ) {
return b.children[i]; // end of search
} else if ( b.children[i] instanceof TrieLeaf ) {
return equalityCheck(word, (TrieLeaf)b.children[i]);
} else {
return find(word, (TrieBranch)b.children[i], depth+1); // recurse
}
}
} // end of class Trie