/* Copyright 2013 Jonatan Jönsson * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package se.softhouse.common.collections; import static com.google.common.base.Preconditions.checkNotNull; import java.util.AbstractMap; import java.util.AbstractSet; import java.util.Collections; import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.TreeMap; import javax.annotation.CheckReturnValue; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** * <pre> * Stores {@link String}s in a <a href="http://en.wikipedia.org/wiki/Trie">trie</a>. * The main purpose when using a structure like this is the methods * {@link #findLongestPrefix(CharSequence)} and {@link #getEntriesWithPrefix(CharSequence)}. * * Neither <code>null</code> keys or <code>null</code> values are allowed because just like the * devil, they are evil. * * If you're iterating over the whole trie more often than you do {@link #getEntriesWithPrefix(CharSequence) simple lookups} * you're probably better off using a {@link TreeMap}. * * TODO(jontejj): Implement SortedMap instead of Map * * @param <V> the type of values stored in the trie * </pre> */ @NotThreadSafe public final class CharacterTrie<V> extends AbstractMap<String, V> { private int size = 0; private final Entry<V> root; private int modCount = 0; /** * An entry represents a node in the tree. */ private static final class Entry<V> implements Map.Entry<String, V> { /** * The char the parent node will use to reference this child with */ private final Character index; /** * If true this node represents a value. * TODO(jontejj): This could have been optimized so that all entries have values. I.e * there's not a level for each character if there's no other string that shares the prefix. */ private boolean isValue; /** * the value of this node */ private V value; /** * The nodes that belongs to this Node. Having them in a {@link TreeMap} ensures that * iterating over them is done in a consistent(sorted) manner. */ private TreeMap<Character, Entry<V>> children; @Nullable private final Entry<V> parent; private Entry(final Character index, @Nullable final Entry<V> parent) { this.index = index; this.parent = parent; } @Override public String getKey() { StringBuilder sb = new StringBuilder(); Entry<V> current = this; while(!current.isRoot()) { sb.append(current.index); current = current.parent; } return sb.reverse().toString(); } @Override public V getValue() { return value; } @Override public V setValue(final V value) { V oldValue = this.value; isValue = true; this.value = value; return oldValue; } @Override public boolean equals(Object obj) { if(!(obj instanceof Map.Entry<?, ?>)) return false; Map.Entry<?, ?> entry = (Map.Entry<?, ?>) obj; return getKey().equals(entry.getKey()) && getValue().equals(entry.getValue()); } @Override public int hashCode() { return getKey().hashCode() ^ getValue().hashCode(); } @Override public String toString() { return getKey() + "=" + getValue(); } private Map.Entry<String, V> findLongestPrefix(final CharSequence prefix) { Entry<V> child = findLastChild(prefix); if(child.isValue) return child; return null; } /** * Clear this entry from being a value * * @return true if this call had any effect */ private boolean unset() { boolean wasValue = isValue; isValue = false; value = null; return wasValue; } private boolean isRoot() { return parent == null; } private boolean hasChildren() { return children != null ? children.size() > 0 : false; } private Entry<V> getChild(final Character c) { return children != null ? children.get(c) : null; } /** * @param keyToFetch the key to find the child/leaf for * @return the leaf in the tree that is reached for the given key, * or null if no such leaf could be found */ private Entry<V> findChild(final CharSequence keyToFetch) { // Start at the root and search the tree for the entry matching the // given key Entry<V> current = this; for(int i = 0, len = keyToFetch.length(); i < len && current != null; i++) { Character c = keyToFetch.charAt(i); current = current.getChild(c); } return current; } /** * @param prefix the key to find the child/leaf for * @return the leaf in the tree that is reached for the given key, * or null if no such leaf could be found */ private Entry<V> findLastChild(final CharSequence prefix) { // Start at the root and search the tree for an entry starting with // key, return the last possible match so that matches with more matching chars will be // prioritized Entry<V> current = this; for(int i = 0, len = prefix.length(); i < len; i++) { Character c = prefix.charAt(i); Entry<V> next = current.getChild(c); if(next == null) return current; current = next; } return current; } /** * @param keyToFetch the key to find the child/leaf for * @return the value for the leaf in the tree that is reached for the * given key, * or null if no such value could be found */ private V get(final CharSequence keyToFetch) { Entry<V> child = findChild(keyToFetch); if(child == null) return null; if(child.isValue) return child.value; return null; } /** * @param c the Character index to remove * @throws NullPointerException if this Entry doesn't have had any * children before */ private void deleteChild(final Character c) { children.remove(c); } /** * Makes sure that a child that represents the given {@code childChar} is found in this * entry. * * @param childChar the character to create/get a child for * @return either the already existing child or a newly created one */ private Entry<V> ensureChild(final Character childChar) { if(children == null) { children = new TreeMap<Character, Entry<V>>(); Entry<V> child = new Entry<V>(childChar, this); children.put(childChar, child); return child; } Entry<V> existing = children.get(childChar); if(existing != null) return existing; Entry<V> child = new Entry<V>(childChar, this); children.put(childChar, child); return child; } /** * Removes all key-value pairs in this trie */ private void clear() { children = new TreeMap<Character, CharacterTrie.Entry<V>>(); unset(); } /** * Finds the successor entry for predecessor, * It starts by looking if it's a value itself, then it checks * the children and if nothing there then it walks back up and checks siblings. * (essentially a pre-order tree traversal) * * @param level the current level we're in (in the current stack) */ private Entry<V> successor(Entry<V> predecessor, CharSequence predecessorKey, int level, boolean isGoingDown) { if(isValue && predecessor != this && isGoingDown) return this; if(hasChildren()) { Map.Entry<Character, Entry<V>> next = null; if(predecessor != null && predecessor.commonDescent(this) && level < predecessorKey.length()) { // Go through each sibling one after the other char charAtLevel = predecessorKey.charAt(level); next = children.higherEntry(charAtLevel); } else { next = children.firstEntry(); } // Visit the next child if(next != null) return next.getValue().successor(predecessor, predecessorKey, level + 1, true); } if(!isRoot()) // Go back up and enter the sibling return parent.successor(predecessor, predecessorKey, level - 1, false); return null; } /** * Returns <code>true</code> if this node is an ancestor for {@code entry}. */ private boolean ancestorFor(Entry<V> entry) { if(isRoot()) return true; Entry<V> entryAncestor = entry.parent; while(entryAncestor != null && this != entryAncestor) { entryAncestor = entryAncestor.parent; } return this == entryAncestor; } private boolean commonDescent(Entry<V> entry) { if(this.ancestorFor(entry)) return true; else if(entry.ancestorFor(this)) return true; return false; } private int size() { int size = isValue ? 1 : 0; if(hasChildren()) { for(Entry<V> child : children.values()) { size += child.size(); } } return size; } } /** * Creates a new, empty, {@link CharacterTrie} */ @CheckReturnValue public static <V> CharacterTrie<V> newTrie() { return new CharacterTrie<V>(); } /** * Creates a new {@link CharacterTrie} with the entries from {@code map} */ @CheckReturnValue public static <V> CharacterTrie<V> newTrie(Map<String, V> map) { CharacterTrie<V> trie = newTrie(); trie.putAll(map); return trie; } private CharacterTrie() { root = createRoot(); } /** * @throws NullPointerException if {@code key} or {@code value} is null */ @Override public V put(final String key, final V value) { checkNotNull(key, "Null key given, CharacterTrie does not support null keys as they are error-prone"); checkNotNull(value, "Null value given, CharacterTrie does not support null values as they are error-prone. " + "Use the Null Object Pattern instead."); // Start at the root and search the tree for the entry to insert the // final character into Entry<V> current = root; for(int i = 0, len = key.length(); i < len; i++) { Character c = key.charAt(i); // Traverses the tree down to the end where we put in our child current = current.ensureChild(c); } V oldValue = current.setValue(value); if(oldValue == null) { size++; modCount++; } return oldValue; } @Override @CheckReturnValue public int size() { return size; } @Override public V remove(final Object keyToRemove) { CharSequence key = (CharSequence) keyToRemove; // Start at the root and search the tree for the entry to delete Entry<V> current = root; for(int i = 0, len = key.length(); i < len; i++) { Character c = key.charAt(i); current = current.getChild(c); if(current == null) return null; } return removeEntry(current); } private V removeEntry(Entry<V> entryToRemove) { V oldValue = entryToRemove.getValue(); if(entryToRemove.unset()) { size--; modCount++; if(entryToRemove.hasChildren()) // We have children so we are important and can't be removed return oldValue; Entry<V> parent = entryToRemove.parent; // Remove ourselves from the parent parent.deleteChild(entryToRemove.index); // Clean up unused entries while(!parent.hasChildren() && !parent.isValue) { Entry<V> grandParent = parent.parent; if(grandParent == null) { break; // we reached root } // Ask the grandParent to remove our parent grandParent.deleteChild(parent.index); // Walk up the tree and remove entries without children parent = grandParent; } return oldValue; } return null; } @Override @CheckReturnValue public boolean containsKey(@Nullable final Object key) { if(key == null) return false; String keyToCheckContainMentFor = (String) key; return root.get(keyToCheckContainMentFor) != null; } @Override @CheckReturnValue public V get(final Object key) { CharSequence keyToFetch = (CharSequence) key; return root.get(keyToFetch); } /** * Returns the entry that shares the longest prefix with {@code key}, or null * if no such entry exists * * @see <a href="http://en.wikipedia.org/wiki/Longest_prefix_match">Longest_prefix_match</a> */ @CheckReturnValue public Map.Entry<String, V> findLongestPrefix(final CharSequence prefix) { return root.findLongestPrefix(prefix); } /** * Returns all entries whose key starts with {@code prefix}. The returned {@link Set} is a view * so removed elements from it are also removed in this structure. */ public Set<Map.Entry<String, V>> getEntriesWithPrefix(final CharSequence prefix) { Entry<V> startingPoint = root.findChild(prefix); if(startingPoint == null) return Collections.emptySet(); return new EntrySet(startingPoint); } /** * Create a simple Entry which parent is null. */ private Entry<V> createRoot() { return new Entry<V>('r', null); } @Override public void clear() { root.clear(); size = 0; modCount++; } @Override public Set<Map.Entry<String, V>> entrySet() { return new EntrySet(root); } private final class EntrySet extends AbstractSet<Map.Entry<String, V>> { private final Entry<V> startingPoint; private EntrySet(Entry<V> startingPoint) { this.startingPoint = startingPoint; } @Override public Iterator<Map.Entry<String, V>> iterator() { return new EntryIterator(startingPoint); } @Override public int size() { return startingPoint.size(); } @Override public void clear() { startingPoint.clear(); } } private final class EntryIterator implements Iterator<Map.Entry<String, V>> { private int expectedModCount = modCount; private Entry<V> next; private Entry<V> lastReturned = null; private EntryIterator(Entry<V> startingPoint) { next = startingPoint; } @Override public boolean hasNext() { if(next == null) return false; // Don't recalculate next if someone calls hasNext twice without calling next if(next == lastReturned || lastReturned == null) { if(lastReturned == null) { next = next.successor(lastReturned, "", 0, true); } else { CharSequence lastKey = lastReturned.getKey(); int lastDepth = lastKey.length(); next = next.successor(lastReturned, lastKey, lastDepth, true); } } return next != null; } @Override public Map.Entry<String, V> next() { verifyUnmodified(); if(!hasNext()) throw new NoSuchElementException(); lastReturned = next; return next; } @Override public void remove() { verifyUnmodified(); if(lastReturned == null) throw new IllegalStateException("You probably forgot to call next before calling remove"); boolean failedToRemove = removeEntry(lastReturned) == null; if(failedToRemove) throw new IllegalStateException("You probably forgot to call next before calling remove"); expectedModCount = modCount; } private void verifyUnmodified() { if(expectedModCount != modCount) throw new ConcurrentModificationException("Trie modified during iteration"); } } }