/*
* Copyright 2012 Takao Nakaguchi
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.trie4j.patricia;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.trie4j.AbstractTrie;
import org.trie4j.NodeVisitor;
import org.trie4j.Trie;
import org.trie4j.util.Pair;
public class PatriciaTrie
extends AbstractTrie
implements Serializable, Trie{
public PatriciaTrie(){
}
public PatriciaTrie(String... words){
for(String s : words) insert(s);
}
@Override
public int nodeSize() {
return nodeSize;
}
@Override
public int size() {
return size;
}
@Override
public boolean contains(String text) {
PatriciaTrieNode node = root;
int n = text.length();
for(int i = 0; i < n; i++){
node = node.getChild(text.charAt(i));
if(node == null) return false;
char[] letters = node.getLetters();
int lettersLen = letters.length;
for(int j = 1; j < lettersLen; j++){
i++;
if(i == n) return false;
if(text.charAt(i) != letters[j]) return false;
}
}
return node.isTerminate();
}
public PatriciaTrieNode getNode(String text){
PatriciaTrieNode node = root;
int n = text.length();
for(int i = 0; i < n; i++){
node = node.getChild(text.charAt(i));
if(node == null) return null;
char[] letters = node.getLetters();
int lettersLen = letters.length;
for(int j = 1; j < lettersLen; j++){
i++;
if(i == n) return null;
if(text.charAt(i) != letters[j]) return null;
}
}
if(node.isTerminate()){
return node;
} else{
return null;
}
}
@Override
public Iterable<String> commonPrefixSearch(String query) {
List<String> ret = new ArrayList<String>();
char[] queryChars = query.toCharArray();
int cur = 0;
PatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters();
if(letters.length > (queryChars.length - cur)) return ret;
for(int i = 0; i < letters.length; i++){
if(letters[i] != queryChars[cur + i]) return ret;
}
if(node.isTerminate()){
ret.add(new String(queryChars, 0 , cur + letters.length));
}
cur += letters.length;
if(queryChars.length == cur) return ret;
node = node.getChild(queryChars[cur]);
}
return ret;
}
public Iterable<Pair<String, PatriciaTrieNode>> commonPrefixSearchWithNode(String query) {
List<Pair<String, PatriciaTrieNode>> ret = new ArrayList<Pair<String, PatriciaTrieNode>>();
char[] queryChars = query.toCharArray();
int cur = 0;
PatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters();
if(letters.length > (queryChars.length - cur)) return ret;
for(int i = 0; i < letters.length; i++){
if(letters[i] != queryChars[cur + i]) return ret;
}
if(node.isTerminate()){
ret.add(Pair.create(
new String(queryChars, 0 , cur + letters.length),
node));
}
cur += letters.length;
if(queryChars.length == cur) return ret;
node = node.getChild(queryChars[cur]);
}
return ret;
}
@Override
public Iterable<String> predictiveSearch(String prefix) {
char[] queryChars = prefix.toCharArray();
int cur = 0;
PatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters();
int n = Math.min(letters.length, queryChars.length - cur);
for(int i = 0; i < n; i++){
if(letters[i] != queryChars[cur + i]){
return Collections.emptyList();
}
}
cur += n;
if(queryChars.length == cur){
List<String> ret = new ArrayList<String>();
int rest = letters.length - n;
if(rest > 0){
prefix += new String(letters, n, rest);
}
if(node.isTerminate()) ret.add(prefix);
enumLetters(node, prefix, ret);
return ret;
}
node = node.getChild(queryChars[cur]);
}
return Collections.emptyList();
}
public Iterable<Pair<String, PatriciaTrieNode>> predictiveSearchWithNode(String prefix) {
char[] queryChars = prefix.toCharArray();
int cur = 0;
PatriciaTrieNode node = root;
while(node != null){
char[] letters = node.getLetters();
int n = Math.min(letters.length, queryChars.length - cur);
for(int i = 0; i < n; i++){
if(letters[i] != queryChars[cur + i]){
return Collections.emptyList();
}
}
cur += n;
if(queryChars.length == cur){
List<Pair<String, PatriciaTrieNode>> ret = new ArrayList<Pair<String, PatriciaTrieNode>>();
int rest = letters.length - n;
if(rest > 0){
prefix += new String(letters, n, rest);
}
if(node.isTerminate()) ret.add(Pair.create(prefix, node));
enumLettersWithNode(node, prefix, ret);
return ret;
}
node = node.getChild(queryChars[cur]);
}
return Collections.emptyList();
}
public void insert(String text){
insert(root, text, 0);
}
protected PatriciaTrieNode insert(PatriciaTrieNode node, String letters, int offset){
int lettersRest = letters.length() - offset;
while(true){
int thisLettersLength = node.getLetters().length;
int n = Math.min(lettersRest, thisLettersLength);
int i = 0;
while(i < n && (letters.charAt(i + offset) - node.getLetters()[i]) == 0) i++;
if(i != n){
PatriciaTrieNode child1 = newNode(
Arrays.copyOfRange(node.getLetters(), i, node.getLetters().length)
, node);
PatriciaTrieNode child2 = newNode(
letters.substring(i + offset).toCharArray()
, true);
node.setLetters(Arrays.copyOfRange(node.getLetters(), 0, i));
node.setTerminate(false);
node.setChildren(
(child1.getLetters()[0] < child2.getLetters()[0]) ?
newNodeArray(child1, child2) : newNodeArray(child2, child1));
size++;
nodeSize += 2;
return child2;
} else if(lettersRest == thisLettersLength){
if(!node.isTerminate()){
node.setTerminate(true);
size++;
}
return node;
} else if(lettersRest < thisLettersLength){
PatriciaTrieNode newChild = newNode(
Arrays.copyOfRange(node.getLetters(), lettersRest, thisLettersLength)
, node);
node.setLetters(Arrays.copyOfRange(node.getLetters(), 0, i));
node.setTerminate(true);
node.setChildren(newNodeArray(newChild));
size++;
nodeSize++;
return node;
} else{
int index = 0;
int end = node.getChildren().length;
boolean cont = false;
if(end > 16){
int start = 0;
while(start < end){
index = (start + end) / 2;
PatriciaTrieNode child = node.getChildren()[index];
int c = letters.charAt(i + offset) - child.getLetters()[0];
if(c == 0){
node = child;
offset += i;
lettersRest -= i;
cont = true;
break;
}
if(c < 0){
end = index;
} else if(start == index){
index = end;
break;
} else{
start = index;
}
}
} else{
for(; index < end; index++){
PatriciaTrieNode child = node.getChildren()[index];
int c = letters.charAt(i + offset) - child.getLetters()[0];
if(c < 0) break;
if(c == 0){
node = child;
offset += i;
lettersRest -= i;
cont = true;
break;
}
}
}
if(cont) continue;
PatriciaTrieNode child = newNode(letters.substring(i + offset).toCharArray(), true);
node.addChild(index, child);
size++;
nodeSize++;
return child;
}
}
}
public void visit(NodeVisitor visitor){
root.visit(visitor, 0);
}
public PatriciaTrieNode getRoot(){
return root;
}
protected PatriciaTrieNode newNode(){
return new PatriciaTrieNode();
}
protected PatriciaTrieNode newNode(char[] letters, PatriciaTrieNode source){
return new PatriciaTrieNode(letters, source.isTerminate(), source.getChildren());
}
protected PatriciaTrieNode newNode(char[] letters, boolean terminated) {
return new PatriciaTrieNode(letters, terminated);
}
protected PatriciaTrieNode[] newNodeArray(PatriciaTrieNode... nodes){
return nodes;
}
private static void enumLetters(PatriciaTrieNode node, String prefix, List<String> letters){
for(PatriciaTrieNode child : node.getChildren()){
String text = prefix + new String(child.getLetters());
if(child.isTerminate()) letters.add(text);
enumLetters(child, text, letters);
}
}
private static void enumLettersWithNode(PatriciaTrieNode node, String prefix, List<Pair<String, PatriciaTrieNode>> letters){
for(PatriciaTrieNode child : node.getChildren()){
String text = prefix + new String(child.getLetters());
if(child.isTerminate()) letters.add(Pair.create(text, child));
enumLettersWithNode(child, text, letters);
}
}
private int size;
private int nodeSize;
private PatriciaTrieNode root = newNode();
private static final long serialVersionUID = -7611399538600722195L;
}