/*
* Copyright 2012 Takao Nakaguchi
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.trie4j.patricia;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import org.trie4j.AbstractTrie;
import org.trie4j.Node;
import org.trie4j.Trie;
import org.trie4j.tail.FastTailCharIterator;
import org.trie4j.tail.TailCharIterator;
import org.trie4j.tail.builder.SuffixTrieTailBuilder;
import org.trie4j.tail.builder.TailBuilder;
import org.trie4j.util.Pair;
public class TailPatriciaTrie
extends AbstractTrie
implements Serializable, Trie{
public TailPatriciaTrie() {
this(new SuffixTrieTailBuilder());
}
public TailPatriciaTrie(TailBuilder builder){
this.tailBuilder = builder;
this.tails = builder.getTails();
}
public TailPatriciaTrie(Trie orig, TailBuilder builder){
this.tailBuilder = builder;
this.tails = builder.getTails();
this.root = cloneNode(orig.getRoot());
this.size = orig.size();
this.nodeSize = orig.nodeSize();
trimToSize();
}
private TailPatriciaTrieNode cloneNode(Node node){
char[] letters = node.getLetters();
char fc = letters.length == 0 ? (char)0xffff : letters[0];
int ti = letters.length < 2 ? -1 : tailBuilder.insert(letters, 1, letters.length - 1);
Node[] orgChildren = node.getChildren();
TailPatriciaTrieNode[] children = newNodeArray(orgChildren.length);
for(int i = 0; i < children.length; i++){
children[i] = cloneNode(orgChildren[i]);
}
return new TailPatriciaTrieNode(fc, ti, node.isTerminate(), children);
}
@Override
public int size() {
return size;
}
@Override
public int nodeSize() {
return nodeSize;
}
@Override
public Node getRoot() {
return new TailPatriciaTrieNodeAdapter(root, tails);
}
@Override
public boolean contains(String text) {
TailPatriciaTrieNode node = root;
FastTailCharIterator it = new FastTailCharIterator(tails, -1);
int n = text.length();
for(int i = 0; i < n; i++){
node = node.getChild(text.charAt(i));
if(node == null) return false;
int ti = node.getTailIndex();
if(ti == -1) continue;
it.setIndex(node.getTailIndex());
char c;
while((c = it.getNext()) != '\0'){
i++;
if(i == n) return false;
if(text.charAt(i) != c) return false;
}
}
return node.isTerminate();
}
public TailPatriciaTrieNode getNode(String text) {
TailPatriciaTrieNode node = root;
FastTailCharIterator it = new FastTailCharIterator(tails, -1);
int n = text.length();
for(int i = 0; i < n; i++){
node = node.getChild(text.charAt(i));
if(node == null) return null;
int ti = node.getTailIndex();
if(ti == -1) continue;
it.setIndex(node.getTailIndex());
char c;
while((c = it.getNext()) != '\0'){
i++;
if(i == n) return null;
if(text.charAt(i) != c) return null;
}
}
return node;
}
public CharSequence getTails() {
return tails;
}
@Override
public int findWord(CharSequence chars, int start, int end, StringBuilder word){
TailCharIterator it = new TailCharIterator(tails, -1);
for(int i = start; i < end; i++){
TailPatriciaTrieNode node = root;
for(int j = i; j < end; j++){
node = node.getChild(chars.charAt(j));
if(node == null) break;
boolean matched = true;
it.setIndex(node.getTailIndex());
while(it.hasNext()){
j++;
if(j == end || chars.charAt(j) != it.next()){
matched = false;
break;
}
}
if(matched){
if(node.isTerminate()){
if(word != null) word.append(chars, i, j + 1);
return i;
}
} else{
break;
}
}
}
return -1;
}
@Override
public Iterable<String> commonPrefixSearch(final String query) {
if(query.length() == 0) return new ArrayList<String>(0);
return new Iterable<String>(){
@Override
public Iterator<String> iterator() {
return new Iterator<String>() {
private int cur;
private StringBuilder currentChars = new StringBuilder();
private TailPatriciaTrieNode current = root;
private String next;
{
cur = 0;
findNext();
}
private void findNext(){
next = null;
while(next == null){
if(query.length() <= cur) return;
TailPatriciaTrieNode child = current.getChild(query.charAt(cur));
if(child == null) return;
int rest = query.length() - cur;
char[] letters = child.getLetters(tails);
int len = letters.length;
if(rest < len) return;
for(int i = 1; i < len; i++){
int c = letters[i] - query.charAt(cur + i);
if(c != 0) return;
}
String b = query.substring(cur, cur + len);
if(child.isTerminate()){
next = currentChars + b;
}
cur += len;
currentChars.append(b);
current = child;
}
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public String next() {
String ret = next;
if(ret == null){
throw new NoSuchElementException();
}
findNext();
return ret;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
public Iterable<Pair<String, TailPatriciaTrieNode>> commonPrefixSearchWithNode(final String query) {
if(query.length() == 0) return new ArrayList<Pair<String, TailPatriciaTrieNode>>(0);
return new Iterable<Pair<String, TailPatriciaTrieNode>>(){
@Override
public Iterator<Pair<String, TailPatriciaTrieNode>> iterator() {
return new Iterator<Pair<String, TailPatriciaTrieNode>>() {
private int cur;
private StringBuilder currentChars = new StringBuilder();
private TailPatriciaTrieNode current = root;
private Pair<String, TailPatriciaTrieNode> next;
{
cur = 0;
findNext();
}
private void findNext(){
next = null;
while(next == null){
if(query.length() <= cur) return;
TailPatriciaTrieNode child = current.getChild(query.charAt(cur));
if(child == null) return;
int rest = query.length() - cur;
char[] letters = child.getLetters(tails);
int len = letters.length;
if(rest < len) return;
for(int i = 1; i < len; i++){
int c = letters[i] - query.charAt(cur + i);
if(c != 0) return;
}
String b = query.substring(cur, cur + len);
cur += len;
currentChars.append(b);
if(child.isTerminate()){
next = Pair.create(currentChars.toString(), child);
}
current = child;
}
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public Pair<String, TailPatriciaTrieNode> next() {
Pair<String, TailPatriciaTrieNode> ret = next;
if(ret == null){
throw new NoSuchElementException();
}
findNext();
return ret;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
@Override
public Iterable<String> predictiveSearch(String prefix) {
char[] queryChars = prefix.toCharArray();
int cur = 0;
TailPatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters(tails);
int n = Math.min(letters.length, queryChars.length - cur);
for(int i = 0; i < n; i++){
if(letters[i] != queryChars[cur + i]){
return Collections.emptyList();
}
}
cur += n;
if(queryChars.length == cur){
List<String> ret = new ArrayList<String>();
prefix += new String(letters, n, letters.length - n);
if(node.isTerminate()) ret.add(prefix);
enumLetters(node, prefix, ret);
return ret;
}
node = node.getChild(queryChars[cur]);
}
return Collections.emptyList();
}
public Iterable<Pair<String, TailPatriciaTrieNode>> predictiveSearchWithNode(String prefix) {
char[] queryChars = prefix.toCharArray();
int cur = 0;
TailPatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters(tails);
int n = Math.min(letters.length, queryChars.length - cur);
for(int i = 0; i < n; i++){
if(letters[i] != queryChars[cur + i]){
return Collections.emptyList();
}
}
cur += n;
if(queryChars.length == cur){
List<Pair<String, TailPatriciaTrieNode>> ret = new ArrayList<Pair<String, TailPatriciaTrieNode>>();
prefix += new String(letters, n, letters.length - n);
if(node.isTerminate()) ret.add(Pair.create(prefix, node));
enumLettersWithNode(node, prefix, ret);
return ret;
}
node = node.getChild(queryChars[cur]);
}
return Collections.emptyList();
}
private void enumLetters(TailPatriciaTrieNode node, String prefix, List<String> letters){
TailPatriciaTrieNode[] children = node.getChildren();
if(children == null) return;
for(TailPatriciaTrieNode child : children){
String text = prefix + new String(child.getLetters(tails));
if(child.isTerminate()) letters.add(text);
enumLetters(child, text, letters);
}
}
private void enumLettersWithNode(TailPatriciaTrieNode node, String prefix, List<Pair<String, TailPatriciaTrieNode>> letters){
TailPatriciaTrieNode[] children = node.getChildren();
if(children == null) return;
for(TailPatriciaTrieNode child : children){
String text = prefix + new String(child.getLetters(tails));
if(child.isTerminate()) letters.add(Pair.create(text, child));
enumLettersWithNode(child, text, letters);
}
}
@Override
public void insert(String text){
if(tailBuilder == null){
throw new UnsupportedOperationException("insert isn't permitted for freezed trie");
}
insert(root, text, 0);
}
protected TailPatriciaTrieNode insert(TailPatriciaTrieNode node, String letters, int offset){
TailCharIterator it = new TailCharIterator(tails, node.getTailIndex());
int count = 0;
boolean matchComplete = true;
int lettersLength = letters.length();
while(it.hasNext() && offset < lettersLength){
if(letters.charAt(offset) != it.next()){
matchComplete = false;
break;
}
offset++;
count++;
}
if(offset == lettersLength){
if(it.hasNext()){
// n: abcde
// l: abc
char c = it.next();
int idx = it.getNextIndex();
if(!it.hasNext()){
idx = -1;
}
TailPatriciaTrieNode newChild = newNode(c, idx, node);
node.setTailIndex(
(count > 0) ? tailBuilder.insert(letters, offset - count, count)
: -1
);
node.setChildren(newNodeArray(newChild));
node.setTerminate(true);
size++;
nodeSize++;
return node;
} else{
// n: abc
// l: abc
if(!node.isTerminate()){
node.setTerminate(true);
size++;
}
return node;
}
} else{
if(!matchComplete){
// n: abcwz
// l: abcde
int firstOffset = offset - count;
char n1Fc = it.current();
int n1Idx = it.getNextIndex();
if(!it.hasNext()){
n1Idx = -1;
}
TailPatriciaTrieNode n1 = newNode(n1Fc, n1Idx, node);
char n2Fc = letters.charAt(offset++);
int n2Idx = (offset < lettersLength) ?
tailBuilder.insert(letters, offset, lettersLength - offset) :
-1;
TailPatriciaTrieNode n2 = newNode(n2Fc, n2Idx, true);
if(count > 0){
node.setTailIndex(tailBuilder.insert(letters, firstOffset, count));
} else{
node.setTailIndex(-1);
}
node.setTerminate(false);
node.setChildren(
(n1.getFirstLetter() < n2.getFirstLetter()) ?
newNodeArray(n1, n2) : newNodeArray(n2, n1));
size++;
nodeSize += 2;
return n2;
} else{
// n: abc
// l: abcde
char fc = letters.charAt(offset++);
// find node
Pair<TailPatriciaTrieNode, Integer> ret = node.findNode(fc);
TailPatriciaTrieNode child = ret.getFirst();
if(child != null){
return insert(child, letters, offset);
} else{
int idx = (offset < lettersLength) ?
tailBuilder.insert(letters, offset, lettersLength - offset) :
-1;
TailPatriciaTrieNode newNode = newNode(fc, idx, true);
node.addChild(ret.getSecond(), newNode);
size++;
nodeSize++;
return newNode;
}
}
}
}
@Override
public void trimToSize() {
if(tails instanceof StringBuilder){
((StringBuilder)tails).trimToSize();
}
}
@Override
public void freeze(){
trimToSize();
tailBuilder = null;
}
public TailBuilder getTailBuilder(){
return tailBuilder;
}
private void writeObject(ObjectOutputStream out)
throws IOException{
trimToSize();
out.defaultWriteObject();
}
protected TailPatriciaTrieNode newNode(){
return new TailPatriciaTrieNode((char)0xffff, -1, false, newNodeArray());
}
protected TailPatriciaTrieNode newNode(char firstChar, int tailIndex, TailPatriciaTrieNode source){
return new TailPatriciaTrieNode(firstChar, tailIndex, source.isTerminate(), source.getChildren());
}
protected TailPatriciaTrieNode newNode(char firstChar, int tailIndex, boolean terminated) {
return new TailPatriciaTrieNode(firstChar, tailIndex, terminated, newNodeArray());
}
protected TailPatriciaTrieNode[] newNodeArray(TailPatriciaTrieNode... nodes){
return nodes;
}
protected TailPatriciaTrieNode[] newNodeArray(int size){
return new TailPatriciaTrieNode[size];
}
private int size;
private int nodeSize;
private TailPatriciaTrieNode root = newNode();
private TailBuilder tailBuilder;
private CharSequence tails;
private static final long serialVersionUID = -2084269385978925271L;
}