/*
* Copyright 2009, Richard Eckart de Castilho
* Copyright 2012, Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License atya
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.pdf;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.TreeMap;
/**
* A HashMap-based Trie. Zero-length or null keys are not allowed. Null values are allowed.
*
* @param <V>
* the value type.
*/
public class Trie<V>
// implements Map<CharSequence, V>
{
private int _size = 0;
public class Node
{
final Map<Character, Node> children;
public V value;
public final int level;
boolean set;
Node(final int l)
{
children = new TreeMap<Character, Node>();
level = l;
set = false;
}
}
private Node _root;
/**
* Create an emptry Trie.
*/
public Trie()
{
clear();
}
/**
* @see Map#clear()
*/
public void clear()
{
_root = new Node(0);
_size = 0;
}
/**
* @param key
* the key.
* @param value
* the value.
* @return the old value.
* @see java.util.Map#put(java.lang.Object, java.lang.Object)
*/
public V put(final CharSequence key, final V value)
{
if (key.length() == 0) {
throw new IllegalArgumentException("Zero-length keys are illegal");
}
// if (key.length() == 0) {
// V oldval = _root.value;
// _root.value = value;
// return oldval;
// }
Node last = _root;
int level = 1;
for (int i = 0; i < key.length(); i++) {
final char k = key.charAt(i);
Node cur = last.children.get(k);
if (cur == null) {
cur = new Node(level);
last.children.put(k, cur);
}
last = cur;
level++;
}
if (!last.set) {
_size++;
}
final V oldval = last.value;
last.value = value;
last.set = true;
return oldval;
}
/**
* Try to match the character sequence given in key against the trie starting at the given
* offset in the key string.
*
* @param key
* the key.
* @param offset
* the offset.
* @return the node.
*/
public Node getNode(final CharSequence key, final int offset)
{
// offset outside range
if (offset > key.length() - 1) {
return null;
}
if (key.length() == 0) {
return _root;
}
Node last = _root;
Node match = null;
for (int i = offset; i < key.length(); i++) {
final char k = key.charAt(i);
final Node cur = last.children.get(k);
if (cur == null) {
break;
}
else {
if (cur.set) {
match = cur;
}
}
last = cur;
}
return (match != null) ? match : null;
}
/**
* Try to match the character sequence given in key against the trie. This is the same as
* calling get(key, 0, key.length()).
*
* @param key
* the key.
* @return the node.
*/
public Node getNode(final CharSequence key)
{
return getNode(key, 0, key.length());
}
/**
* Try to match the character sequence given in key against the trie starting at the given
* offset in the key string using a specified number of characters.
*
* Returns the node even if there is no value set at that point of the Trie!
*
* @param key
* the key.
* @param offset
* the offset.
* @param length
* the length to match.
* @return the node.
*/
private Node _getNode(final CharSequence key, final int offset, final int length)
{
// offset or length outside range
if ((offset > key.length() - 1) || (offset + length > key.length())) {
return null;
}
if (key.length() == 0) {
return _root;
}
Node last = _root;
Node match = null;
for (int i = offset; i < offset + length; i++) {
final char k = key.charAt(i);
final Node cur = last.children.get(k);
if (cur == null) {
break;
}
else {
match = cur;
}
last = cur;
}
return ((match != null) && (match.level == length)) ? match : null;
}
/**
* Try to match the character sequence given in key against the trie starting at the given
* offset in the key string using a specified number of characters.
*
* @param key
* the key.
* @param offset
* the offset.
* @param length
* the length.
* @return the node.
*/
public Node getNode(final CharSequence key, final int offset, final int length)
{
if (key == null) {
return null;
}
final Node match = _getNode(key, offset, length);
return ((match != null) && match.set) ? match : null;
}
public boolean containsKey(final Object key)
{
if (!(key instanceof CharSequence)) {
return false;
}
return get(key) != null;
}
/**
* Checks if the given string is a prefix of a key in the Trie.
*
* @param prefix
* the prefix.
* @return if the prefix is in the trie.
*/
public boolean containsPrefix(final CharSequence prefix)
{
return containsPrefix(prefix, 0, prefix.length());
}
/**
* Checks if the given character sequence matches against the trie starting at the given offset
* in the key string using a specified number of characters.
*
* @param prefix
* the prefix.
* @param offset
* the offset.
* @param length
* the length to match.
* @return whether the prefix is in the trie.
*/
public boolean containsPrefix(final CharSequence prefix, final int offset, final int length)
{
if (prefix == null) {
return false;
}
final Node match = _getNode(prefix, offset, length);
return match != null;
}
/*
* (non-Javadoc)
*
* @see java.util.Map#get(java.lang.Object)
*/
public V get(final Object key)
{
if (!(key instanceof CharSequence)) {
return null;
}
final Node n = getNode((CharSequence) key);
return (n == null) ? null : n.value;
}
public boolean isEmpty()
{
return _size == 0;
}
/*
* (non-Javadoc)
*
* @see java.util.Map#putAll(java.util.Map)
*/
public void putAll(final Map<? extends CharSequence, ? extends V> t)
{
for (final Map.Entry<? extends CharSequence, ? extends V> e : t.entrySet()) {
put(e.getKey(), e.getValue());
}
}
public int size()
{
return _size;
}
public Collection<V> values()
{
final List<V> vals = new ArrayList<V>(_size);
values(_root, vals);
return vals;
}
private void values(final Node cur, final List<V> vals)
{
if (cur == null) {
return;
}
if (cur.set) {
vals.add(cur.value);
}
for (final Node n : cur.children.values()) {
values(n, vals);
}
}
public Set<String> keys()
{
final Set<String> vals = new HashSet<String>(_size);
final StringBuilder b = new StringBuilder();
for (final Character c : _root.children.keySet()) {
b.setLength(0);
keys(c, _root.children.get(c), b, vals);
}
return vals;
}
public Iterator<String> keyIterator()
{
return new KeyIterator();
}
/**
* Utility method to collect the keys.
*
* @param c
* the character under which the current node is filed in its parent node.
* @param n
* the current node.
* @param b
* a re-used string buffer in which the keys are manifested one after the other.
* @param vals
* the found key values.
*/
private void keys(final Character c, final Node n, final StringBuilder b, final Set<String> vals)
{
b.append(c);
if (n.set) {
vals.add(b.toString());
}
for (final Character cc : n.children.keySet()) {
b.setLength(n.level);
keys(cc, n.children.get(cc), b, vals);
}
}
public class KeyIterator
implements Iterator<String>
{
private final StringBuilder sb;
private final Stack<Frame> stack;
private class Frame
{
private final Character _c;
private final Node _n;
private final Iterator<Character> _i;
private boolean _nodeDone;
public Frame(final Character c, final Node n)
{
_c = c;
_n = n;
_i = n.children.keySet().iterator();
_nodeDone = _c == null || !_n.set;
}
boolean hasNext()
{
return _i.hasNext() || !_nodeDone;
}
void step()
{
sb.append(_c);
sb.setLength(_n.level);
if (!_nodeDone) {
// Render the node self once
_nodeDone = true;
}
else {
// Render the children
final Character c = _i.next();
final Frame f = new Frame(c, _n.children.get(c));
stack.add(f);
f.step();
}
}
}
{
sb = new StringBuilder();
stack = new Stack<Frame>();
stack.push(new Frame(null, _root));
step();
}
private void step()
{
while (true) {
// Return when there is nothing more to do.
if (stack.isEmpty()) {
break;
}
final Frame f = stack.peek();
final boolean doBreak = false;
if (f.hasNext()) {
f.step(); // Go to the next
break;
}
// Remove done stuff from the stack.
while (!stack.isEmpty() && !stack.peek().hasNext()) {
stack.pop();
}
}
}
@Override
public boolean hasNext()
{
return !stack.isEmpty();
}
@Override
public String next()
{
final String s = sb.toString();
step();
return s;
}
@Override
public void remove()
{
throw new UnsupportedOperationException("Remove not supported");
}
}
}