/*
* Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -------------------
* To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
* http://www.manning.com/ingersoll
*/
package com.tamingtext.fuzzy;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class TrieNode {
//<start id="trie-node1"/>
private boolean isWord; //<co id="co.trie.word"/>
private TrieNode[] children;
private String suffix; //<co id="co.trie.suffix"/>
public TrieNode(boolean word, String suffix) {
this.isWord = word;
if (suffix == null) children = new TrieNode[26]; //<co id="co.trie.children"/>
this.suffix = suffix;
}
/*
<calloutlist>
<callout arearefs="co.trie.word"><para>Does this prefix make a word.</para></callout>
<callout arearefs="co.trie.suffix"><para>Rest of word if prefix is unique.</para></callout>
<callout arearefs="co.trie.children"><para>Initialize children for each letter.</para></callout>
</calloutlist>
*/
//<end id="trie-node1"/>
public TrieNode(boolean word) {
this(word,null);
}
public boolean isWord() {
return isWord;
}
//<start id="trie-addWord"/>
public boolean addWord(String word) {
return addWord(word.toLowerCase(),0);
}
private boolean addWord(String word, int index) {
if (index == word.length()) { //<co id="co.trie.end-of-word"/>
if (isWord) {
return false; //<co id="co.trie.duplicate"/>
}
else {
isWord = true; //<co id="co.trie.mark-word"/>
return true;
}
}
if (suffix != null) { //<co id="co.trie.has-suf"/>
if (suffix.equals(word.substring(index))) {
return false; //<co id="co.trie.duplicate-suf"/>
}
String tmp = suffix;
this.suffix = null;
children = new TrieNode[26];
addWord(tmp,0); //<co id="co.trie.split-suffix"/>
}
int ci = word.charAt(index)-(int)'a';
TrieNode child = children[ci];
if (child == null) {
if (word.length() == index -1) {
children[ci] = new TrieNode(true,null); //<co id="co.trie.create-word"/>
}
else {
children[ci] = new TrieNode(false,word.substring(index+1)); //<co id="co.trie.create-suf"/>
}
return true;
}
return child.addWord(word, index+1); //<co id="co.trie.recurse"/>
}
/*
<calloutlist>
<callout arearefs="co.trie.end-of-word"><para>Check if end of the word.</para></callout>
<callout arearefs="co.trie.duplicate"><para>Existing word; return false.</para></callout>
<callout arearefs="co.trie.mark-word"><para>Mark prefix as a word.</para></callout>
<callout arearefs="co.trie.has-suf"><para>Check if this node has a suffix.</para></callout>
<callout arearefs="co.trie.duplicate-suf">existing word, return false.<para></para></callout>
<callout arearefs="co.trie.split-suffix">Split up the suffix.<para></para></callout>
<callout arearefs="co.trie.create-word"><para>Prefix creates a new word.</para></callout>
<callout arearefs="co.trie.create-suf"><para>Prefix and suffix create a new word.</para></callout>
<callout arearefs="co.trie.recurse"><para>Recurse on next character.</para></callout>
</calloutlist>
*/
//<end id="trie-addWord"/>
//<start id="trie-getWords"/>
public String[] getWords(String prefix, int numWords) { //<co id="co.trie.prefix"/>
List<String> words = new ArrayList<String>(numWords);
TrieNode prefixRoot = this;
for (int i=0;i<prefix.length();i++) {
if (prefixRoot.suffix == null) {
int ci = prefix.charAt(i)-(int)'a';
prefixRoot = prefixRoot.children[ci];
if (prefixRoot == null) {
break;
}
}
else { //<co id="co.trie.no-prefix"/>
if (prefixRoot.suffix.startsWith(prefix.substring(i))) {
words.add(prefix.substring(0,i)+prefixRoot.suffix);
}
prefixRoot = null;
break;
}
}
if (prefixRoot != null) {
prefixRoot.collectWords(words,numWords,prefix);
}
return words.toArray(new String[words.size()]);
}
private void collectWords(List<String> words, //<co id="co.trie.collect"/>
int numWords, String prefix) {
if (this.isWord()) {
words.add(prefix);
if (words.size() == numWords) return;
}
if (suffix != null) {
words.add(prefix+suffix);
return;
}
for (int ci=0;ci<children.length;ci++) {
String nextPrefix = prefix+(char) (ci+(int)'a');
if (children[ci] != null) {
children[ci].collectWords(words, numWords, nextPrefix);
if (words.size() == numWords) return;
}
}
}
/*
<calloutlist>
<callout arearefs="co.trie.prefix"><para>Traverse the tree until the prefix is consumed.</para></callout>
<callout arearefs="co.trie.no-prefix"><para>Handle the case where the entire prefix has not been split into trie nodes.</para></callout>
<callout arearefs="co.trie.collect"><para>Collect all the words that are children of the prefix node.</para></callout>
</calloutlist>
*/
//<end id="trie-getWords"/>
public String toString() {
StringBuffer cs = new StringBuffer(children.length);
for (int ci=0;ci<children.length;ci++) {
if (children[ci] != null) {
cs.append((char) (ci+(int)'a'));
}
}
return "word="+isWord+" suffix="+suffix+" cs="+cs;
}
public static void main(String[] args) throws IOException {
TrieNode node = new TrieNode(false);
int lc = 0;
BufferedReader br = new BufferedReader(new FileReader(args[0]));
for (String line = br.readLine();line !=null;line = br.readLine()) {
node.addWord(line);
lc++;
}
System.out.println("Loaded "+lc+" lines");
BufferedReader br2 = new BufferedReader(new InputStreamReader(System.in));
for (String line = br2.readLine();line !=null;line = br2.readLine()) {
String[] words = node.getWords(line, 10);
System.out.println(java.util.Arrays.asList(words));
}
}
}