package com.jwetherell.algorithms.data_structures;
import java.util.Arrays;
import com.jwetherell.algorithms.data_structures.interfaces.ITree;
/**
* A trie, or prefix tree, is an ordered tree data structure that is used to
* store an associative array where the keys are usually strings.
*
* == This is NOT a compact Trie. ==
*
* http://en.wikipedia.org/wiki/Trie
*
* @author Justin Wetherell <phishman3579@gmail.com>
*/
@SuppressWarnings("unchecked")
public class Trie<C extends CharSequence> implements ITree<C> {
private int size = 0;
protected INodeCreator creator = null;
protected Node root = null;
public Trie() {
this.creator = new INodeCreator() {
/**
* {@inheritDoc}
*/
@Override
public Node createNewNode(Node parent, Character character, boolean isWord) {
return (new Node(parent, character, isWord));
}
};
}
/**
* Constructor with external Node creator.
*/
public Trie(INodeCreator creator) {
this.creator = creator;
}
/**
* {@inheritDoc}
*/
@Override
public boolean add(C seq) {
return (this.addSequence(seq) != null);
}
/**
* Add sequence to trie.
*
* @param seq
* to add to the trie.
* @return Node which was added to trie or null if it already exists.
*/
protected Node addSequence(C seq) {
if (root == null)
root = this.creator.createNewNode(null, Node.SENTINAL, false);
int length = (seq.length() - 1);
Node prev = root;
// For each Character in the input, we'll either go to an already define
// child or create a child if one does not exist
for (int i = 0; i < length; i++) {
Node n = null;
Character c = seq.charAt(i);
int index = prev.childIndex(c);
// If 'prev' has a child which starts with Character c
if (index >= 0) {
// Go to the child
n = prev.getChild(index);
} else {
// Create a new child for the character
n = this.creator.createNewNode(prev, c, false);
prev.addChild(n);
}
prev = n;
}
// Deal with the first character of the input string not found in the
// trie
Node n = null;
Character c = seq.charAt(length);
int index = prev.childIndex(c);
// If 'prev' already contains a child with the last Character
if (index >= 0) {
n = prev.getChild(index);
// If the node doesn't represent a string already
if (n.isWord == false) {
// Set the string to equal the full input string
n.character = c;
n.isWord = true;
size++;
return n;
}
// String already exists in Trie
return null;
}
// Create a new node for the input string
n = this.creator.createNewNode(prev, c, true);
prev.addChild(n);
size++;
return n;
}
/**
* {@inheritDoc}
*/
@Override
public boolean contains(C seq) {
Node n = this.getNode(seq);
if (n == null || !n.isWord) return false;
// If the node found in the trie does not have it's string
// field defined then input string was not found
return n.isWord;
}
/**
* {@inheritDoc}
*/
@Override
public C remove(C sequence) {
if (root == null) return null;
// Find the key in the Trie
Node node = getNode(sequence);
if (node==null) return null;
return remove(node);
}
protected C remove(Node node) {
Node previous = node.parent;
if (node.childrenSize > 0) {
// The node which contains the input string and has children, just
// NULL out the string
node.isWord = false;
} else {
// The node which contains the input string does NOT have children
int index = previous.childIndex(node.character);
// Remove node from previous node
previous.removeChild(index);
// Go back up the trie removing nodes until you find a node which
// represents a string
while (previous != null && previous.isWord == false && previous.childrenSize == 0) {
if (previous.parent != null) {
int idx = previous.parent.childIndex(previous.character);
if (idx >= 0)
previous.parent.removeChild(idx);
}
previous = previous.parent;
}
}
size--;
return (C)(String.valueOf(node.character));
}
/**
* {@inheritDoc}
*/
@Override
public void clear() {
root = null;
size = 0;
}
/**
* Get node which represents the sequence in the trie.
*
* @param seq
* to find a node for.
* @return Node which represents the sequence or NULL if not found.
*/
protected Node getNode(C seq) {
if (root == null) return null;
// Find the string in the trie
Node n = root;
int length = (seq.length() - 1);
for (int i = 0; i <= length; i++) {
char c = seq.charAt(i);
int index = n.childIndex(c);
if (index >= 0) {
n = n.getChild(index);
} else {
// string does not exist in trie
return null;
}
}
return n;
}
/**
* {@inheritDoc}
*/
@Override
public int size() {
return size;
}
/**
* {@inheritDoc}
*/
@Override
public boolean validate() {
java.util.Set<C> keys = new java.util.HashSet<C>();
Node node = root;
if (node!=null && !validate(node,"",keys)) return false;
return (keys.size()==size());
}
private boolean validate(Node node, String string, java.util.Set<C> keys) {
StringBuilder builder = new StringBuilder(string);
builder.append(node.character);
String s = builder.toString();
if (node.isWord) {
C c = (C)s;
if (c==null) return false;
if (keys.contains(c)) return false;
keys.add(c);
}
for (int i=0; i<node.childrenSize; i++) {
Node n = node.getChild(i);
if (n==null) return false;
if (n.parent!=node) return false;
if (!validate(n,s,keys)) return false;
}
return true;
}
/**
* {@inheritDoc}
*/
@Override
public java.util.Collection<C> toCollection() {
return (new JavaCompatibleTrie<C>(this));
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return TriePrinter.getString(this);
}
protected static class Node {
private static final int MINIMUM_SIZE = 2;
protected static final char SENTINAL = '\0';
protected Node[] children = new Node[MINIMUM_SIZE];
protected int childrenSize = 0;
protected Node parent = null;
protected boolean isWord = false; // Signifies this node represents a word
protected char character = SENTINAL; // First character that is different the parent's string
protected Node(Node parent, Character character, boolean isWord) {
this.parent = parent;
this.character = character;
this.isWord = isWord;
}
protected void addChild(Node node) {
int growSize = children.length;
if (childrenSize >= children.length) {
children = Arrays.copyOf(children, (growSize + (growSize>>1)));
}
children[childrenSize++] = node;
}
protected boolean removeChild(int index) {
if (index >= childrenSize) return false;
children[index] = null;
childrenSize--;
// Shift down the array
System.arraycopy(children, index + 1, children, index, childrenSize - index);
int shrinkSize = childrenSize;
if (childrenSize >= MINIMUM_SIZE && childrenSize < (shrinkSize + (shrinkSize<<1))) {
System.arraycopy(children, 0, children, 0, childrenSize);
}
return true;
}
protected int childIndex(Character parentChar) {
for (int i = 0; i < childrenSize; i++) {
Node childChar = children[i];
if (parentChar.equals(childChar.character)) return i;
}
return Integer.MIN_VALUE;
}
protected Node getChild(int index) {
if (index >= childrenSize) return null;
return children[index];
}
protected int getChildrenSize() {
return childrenSize;
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
if (isWord == true) builder.append("Node=").append(isWord).append("\n");
for (int i = 0; i < childrenSize; i++) {
Node c = children[i];
builder.append(c.toString());
}
return builder.toString();
}
}
protected static interface INodeCreator {
/**
* Create a new node for sequence.
*
* @param parent
* node of the new node.
* @param character
* which represents this node.
* @param isWord
* signifies if the node represents a word.
* @return Node which was created.
*/
public Node createNewNode(Node parent, Character character, boolean isWord);
}
protected static class TriePrinter {
public static <C extends CharSequence> void print(Trie<C> trie) {
System.out.println(getString(trie));
}
public static <C extends CharSequence> String getString(Trie<C> tree) {
return getString(tree.root, "", null, true);
}
protected static String getString(Node node, String prefix, String previousString, boolean isTail) {
StringBuilder builder = new StringBuilder();
String string = null;
if (node.character != Node.SENTINAL) {
String temp = String.valueOf(node.character);
if (previousString != null)
string = previousString + temp;
else
string = temp;
}
builder.append(prefix + (isTail ? "└── " : "├── ") + ((node.isWord == true) ?
("(" + node.character + ") " + string)
:
node.character) + "\n"
);
if (node.children != null) {
for (int i = 0; i < node.childrenSize - 1; i++) {
builder.append(getString(node.children[i], prefix + (isTail ? " " : "│ "), string, false));
}
if (node.childrenSize >= 1) {
builder.append(getString(node.children[node.childrenSize - 1], prefix + (isTail ? " " : "│ "), string, true));
}
}
return builder.toString();
}
}
public static class JavaCompatibleTrie<C extends CharSequence> extends java.util.AbstractCollection<C> {
private Trie<C> trie = null;
public JavaCompatibleTrie(Trie<C> trie) {
this.trie = trie;
}
/**
* {@inheritDoc}
*/
@Override
public boolean add(C value) {
return trie.add(value);
}
/**
* {@inheritDoc}
*/
@Override
public boolean remove(Object value) {
return (trie.remove((C)value)!=null);
}
/**
* {@inheritDoc}
*/
@Override
public boolean contains(Object value) {
return trie.contains((C)value);
}
/**
* {@inheritDoc}
*/
@Override
public int size() {
return trie.size;
}
/**
* {@inheritDoc}
*
* WARNING: This iterator makes a copy of the trie's contents during it's construction!
*/
@Override
public java.util.Iterator<C> iterator() {
return (new TrieIterator<C>(trie));
}
private static class TrieIterator<C extends CharSequence> implements java.util.Iterator<C> {
private Trie<C> trie = null;
private Trie.Node lastNode = null;
private java.util.Iterator<java.util.Map.Entry<Node, String>> iterator = null;
protected TrieIterator(Trie<C> trie) {
this.trie = trie;
java.util.Map<Trie.Node,String> map = new java.util.LinkedHashMap<Trie.Node,String>();
if (this.trie.root!=null) {
getNodesWhichRepresentsWords(this.trie.root,"",map);
}
iterator = map.entrySet().iterator();
}
private void getNodesWhichRepresentsWords(Trie.Node node, String string, java.util.Map<Trie.Node,String> nodesMap) {
StringBuilder builder = new StringBuilder(string);
if (node.character!=Node.SENTINAL) builder.append(node.character);
if (node.isWord) nodesMap.put(node,builder.toString());
for (int i=0; i<node.childrenSize; i++) {
Node child = node.getChild(i);
getNodesWhichRepresentsWords(child,builder.toString(),nodesMap);
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean hasNext() {
if (iterator!=null && iterator.hasNext()) return true;
return false;
}
/**
* {@inheritDoc}
*/
@Override
public C next() {
if (iterator==null) return null;
java.util.Map.Entry<Trie.Node,String> entry = iterator.next();
lastNode = entry.getKey();
return (C)entry.getValue();
}
/**
* {@inheritDoc}
*/
@Override
public void remove() {
if (iterator==null || trie==null) return;
iterator.remove();
this.trie.remove(lastNode);
}
}
}
}