/******************************************************************************
* Copyright (C) 2014 Yevgeny Krasik *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
******************************************************************************/
package com.github.ykrasik.jaci.util.trie;
import com.github.ykrasik.jaci.util.function.Func;
import com.github.ykrasik.jaci.util.function.Pred;
import com.github.ykrasik.jaci.util.opt.Opt;
import java.util.*;
import java.util.Map.Entry;
/**
* An implementation of a {@link Trie}.
* Contains a character, a possible value, and children.
*
* @author Yevgeny Krasik
*/
// FIXME: Test this class thoroughly!
public class TrieNode<T> implements Trie<T> {
private final char c;
private final Map<Character, TrieNode<T>> children;
private Opt<T> value = Opt.absent();
private volatile boolean numWordsCalculated;
private int numWords;
/**
* Create a new node with the given character and no children.
*
* @param c Character to assign to this node.
*/
public TrieNode(char c) {
this(c, new HashMap<Character, TrieNode<T>>(1));
}
private TrieNode(char c, Map<Character, TrieNode<T>> children) {
this.c = c;
this.children = children;
}
@Override
public int size() {
if (!numWordsCalculated) {
// TODO: Am I overdoing this? Thread safety? For what?
synchronized (this) {
calcNumWords();
}
}
return numWords;
}
/**
* The amount of words reachable from this node is cached.<br>
* This should be called after an outside changed had occurred and the amount was possibly changed.
*/
private void calcNumWords() {
if (numWordsCalculated) {
return;
}
int words = 0;
// The number of words reachable from this node is considered to be the number of words
// reachable from it's children, and +1 if the node itself is a word.
if (value.isPresent()) {
words++;
}
for (TrieNode<T> child : children.values()) {
child.calcNumWords();
words += child.numWords;
}
numWords = words;
numWordsCalculated = true;
}
@Override
public boolean isEmpty() {
return size() == 0;
}
@Override
public boolean contains(String word) {
final Opt<TrieNode<T>> node = getNode(word);
return node.exists(IS_WORD_PREDICATE);
}
@Override
public Opt<T> get(String word) {
final Opt<TrieNode<T>> node = getNode(word);
if (!node.isPresent()) {
return Opt.absent();
}
return node.get().value;
}
@Override
public String getLongestPrefix() {
if (isEmpty()) {
return "";
}
// Keep going down the tree, until a node has more than 1 children or is a word.
final StringBuilder prefixBuilder = new StringBuilder();
TrieNode<T> currentNode = this;
while (currentNode.children.size() == 1 && !currentNode.isWord()) {
// currentNode only has 1 child and is not a word.
// Move on to currentNode's only child.
currentNode = currentNode.children.values().iterator().next();
// Append child's character to prefix.
prefixBuilder.append(currentNode.c);
}
return prefixBuilder.toString();
}
@Override
public Trie<T> add(String word, T value) {
return getOrCreate(word, 0, value);
}
private TrieNode<T> getOrCreate(String word, int index, T value) {
// Recursive stop condition - end of string.
if (index == word.length()) {
final TrieNode<T> newNode = new TrieNode<>(c);
newNode.setValue(value);
return newNode;
}
final char nextChar = word.charAt(index);
final Opt<TrieNode<T>> child = getChild(nextChar);
final TrieNode<T> newChild;
if (child.isPresent()) {
newChild = child.get().getOrCreate(word, index + 1, value);
} else {
// TODO: As an optimization, can probably create the rest of the branch here without recursion.
newChild = new TrieNode<T>(nextChar).getOrCreate(word, index + 1, value);
}
return withChild(newChild);
}
private TrieNode<T> withChild(TrieNode<T> newChild) {
final Map<Character, TrieNode<T>> newChildren = new HashMap<>(children);
newChildren.put(newChild.c, newChild);
return new TrieNode<>(c, newChildren);
}
@Override
public Trie<T> subTrie(String prefix) {
if (prefix.isEmpty() || this.isEmpty()) {
return this;
}
// currentNavigationNode is used to navigate down the existing trie.
// currentCreationNode is used to create a new trie according to the prefix.
final TrieNode<T> prefixTrie = createRoot();
TrieNode<T> currentNavigationNode = this;
TrieNode<T> currentCreationNode = prefixTrie;
// TODO: Consider a more elegant solution.
for (int i = 0; i < prefix.length(); i++) {
final char c = prefix.charAt(i);
final Opt<TrieNode<T>> child = currentNavigationNode.getChild(c);
if (!child.isPresent()) {
return emptyTrie();
}
currentNavigationNode = child.get();
final TrieNode<T> newChild;
if (i == prefix.length() - 1) {
// Use the real node for the last character in the word.
newChild = currentNavigationNode;
} else {
// Create a new node for every character except the last, containing only 'c' as a child.
newChild = new TrieNode<>(c);
}
currentCreationNode.children.put(c, newChild);
currentCreationNode = newChild;
}
return prefixTrie;
}
@Override
public <A> TrieNode<A> mapValues(Func<T, A> function) {
if (isEmpty()) {
return emptyTrie();
}
final Opt<TrieNode<A>> newTrie = this.doMap(function);
return newTrie.getOrElse(TrieNode.<A>emptyTrie());
}
private <A> Opt<TrieNode<A>> doMap(Func<T, A> function) {
final Opt<A> newValue = value.map(function);
// Map the node's children.
final Map<Character, TrieNode<A>> newChildren = mapChildren(function);
if (newChildren.isEmpty() && !newValue.isPresent()) {
// The node didn't map to a value and neither did it's children.
return Opt.absent();
}
// Create a new node.
final TrieNode<A> newNode = new TrieNode<>(c, newChildren);
newNode.value = newValue;
return Opt.of(newNode);
}
private <A> Map<Character, TrieNode<A>> mapChildren(Func<T, A> function) {
if (children.isEmpty()) {
return Collections.emptyMap();
}
final Map<Character, TrieNode<A>> newChildren = new HashMap<>(children.size());
for (TrieNode<T> child : children.values()) {
final Opt<TrieNode<A>> newChild = child.doMap(function);
if (newChild.isPresent()) {
newChildren.put(child.c, newChild.get());
}
}
return newChildren;
}
@Override
public Trie<T> filter(final Pred<T> filter) {
return mapValues(new Func<T, T>() {
@Override
public T apply(T input) {
return filter.test(input) ? input : null;
}
});
}
@Override
public Trie<T> union(Trie<T> other) {
if (this == other || this.isEmpty()) {
return other;
}
if (other.isEmpty()) {
return this;
}
if (other instanceof TrieNode) {
// Other Trie is of the same implementation, we can have an efficient union.
return trieNodeUnion((TrieNode<T>) other);
}
// Other Trie is of a different implementation, create a naive union trie.
final TrieBuilder<T> builder = new TrieBuilder<>();
builder.setAll(this.toMap());
builder.setAll(other.toMap());
return builder.build();
}
private TrieNode<T> trieNodeUnion(TrieNode<T> other) {
final char otherCharacter = other.c;
if (Character.toLowerCase(c) != Character.toLowerCase(otherCharacter)) {
// TODO: Is this the correct way of handling this?
throw new IllegalArgumentException("Trying to create a union between incompatible nodes: " + c + " and " + otherCharacter);
}
// Check which of this node's children are also present in other and vice versa.
// Those that are unique will be used as is.
// Those that are present in both will be replaced with a UnionNode.
final Map<Character, TrieNode<T>> unionChildren = new HashMap<>(children.size() + other.children.size());
this.mergeChildren(other, unionChildren);
other.mergeChildren(this, unionChildren);
final TrieNode<T> unionNode = new TrieNode<>(c, unionChildren);
unionNode.value = value.orElse(other.value);
return unionNode;
}
private void mergeChildren(TrieNode<T> otherNode, Map<Character, TrieNode<T>> unionChildren) {
for (TrieNode<T> child : children.values()) {
final char childCharacter = child.c;
if (unionChildren.containsKey(childCharacter)) {
// This node's character was already handled in a previous iteration.
continue;
}
final Opt<TrieNode<T>> otherChild = otherNode.getChild(childCharacter);
final TrieNode<T> trieNodeToAdd;
if (otherChild.isPresent()) {
// The other node has a child under 'c', use a union node.
trieNodeToAdd = child.trieNodeUnion(otherChild.get());
} else {
// The other node has no child under 'c', can use the original node.
trieNodeToAdd = child;
}
unionChildren.put(childCharacter, trieNodeToAdd);
}
}
@Override
public void visitWords(TrieVisitor<T> visitor) {
if (isEmpty()) {
return;
}
// doVisit does not push or pop characters from the wordBuilder.
// We call it (and not visit()) because we don't want the root's character to be appended.
doVisit(visitor, new StringBuilder());
}
private void visit(TrieVisitor<T> visitor, StringBuilder wordBuilder) {
// Started processing node, push it's character to the prefix.
wordBuilder.append(c);
// Actually visit the node.
doVisit(visitor, wordBuilder);
// Done processing node, pop it's character from the prefix.
wordBuilder.deleteCharAt(wordBuilder.length() - 1);
}
private void doVisit(TrieVisitor<T> visitor, StringBuilder wordBuilder) {
// Visit the node if it has a value.
if (value.isPresent()) {
final String word = wordBuilder.toString();
visitor.visit(word, value.get());
}
// Visit all the node's children.
for (TrieNode<T> child : children.values()) {
child.visit(visitor, wordBuilder);
}
}
@Override
public Collection<String> words() {
return toMap().keySet();
}
@Override
public Collection<T> values() {
return toMap().values();
}
@Override
public Set<Entry<String, T>> entrySet() {
return toMap().entrySet();
}
@Override
public Map<String, T> toMap() {
if (isEmpty()) {
return Collections.emptyMap();
}
final MapTrieVisitor<T> visitor = new MapTrieVisitor<>();
visitWords(visitor);
return visitor.getMap();
}
private boolean isWord() {
return value.isPresent();
}
private Opt<TrieNode<T>> getNode(String prefix) {
// Navigate the tree by the letters of the prefix, starting from the root.
TrieNode<T> currentNode = this;
for (int i = 0; i < prefix.length(); i++) {
final char c = prefix.charAt(i);
final Opt<TrieNode<T>> child = currentNode.getChild(c);
if (!child.isPresent()) {
return Opt.absent();
}
currentNode = child.get();
}
return Opt.of(currentNode);
}
/**
* @return Child node for character 'c', if one exists. <b>Case insensitive</b>
*/
Opt<TrieNode<T>> getChild(char c) {
if (children.isEmpty()) {
return Opt.absent();
}
TrieNode<T> child = children.get(Character.toLowerCase(c));
if (child == null) {
child = children.get(Character.toUpperCase(c));
}
return Opt.ofNullable(child);
}
/**
* Sets the given node as a child of this node. Will overwrite any existing child for the child's character.
*
* @param child TrieNode to set as a child of this node.
*/
void setChild(TrieNode<T> child) {
children.put(child.c, child);
}
/**
* Sets the value of this node.
*
* @param value The value to set. May be null.
*/
void setValue(T value) {
this.value = Opt.ofNullable(value);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
if (c != 0) {
sb.append(c);
sb.append(" : ");
}
sb.append(toMap().toString());
return sb.toString();
}
private static final TrieNode<?> EMPTY_TRIE = createRoot();
/**
* Create a node representing the root node of a Trie.
*
* @param <T> Trie type.
* @return A root node of a Trie.
*/
public static <T> TrieNode<T> createRoot() {
return new TrieNode<>((char) 0);
}
/**
* @param <T> Trie type.
* @return An empty Trie.
*/
@SuppressWarnings("unchecked")
public static <T> TrieNode<T> emptyTrie() {
return (TrieNode<T>) EMPTY_TRIE;
}
private static final Pred<TrieNode<?>> IS_WORD_PREDICATE = new Pred<TrieNode<?>>() {
@Override
public boolean test(TrieNode<?> trieNode) {
return trieNode.isWord();
}
};
}