/*
* Copyright 2012 Takao Nakaguchi
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.trie4j;
import java.io.PrintWriter;
import java.util.concurrent.atomic.AtomicInteger;
import org.trie4j.patricia.TailPatriciaTrie;
import org.trie4j.tail.builder.ConcatTailBuilder;
import org.trie4j.test.LapTimer;
import org.trie4j.test.WikipediaTitles;
public class TestWikipedia {
private static final int maxCount = 20000000;
public static void main2(String[] args) throws Exception{
int base = 137320;
int c = 0;
for(String word : new WikipediaTitles()){
if(c > base) System.out.println(word);
c++;
if(c == (base + 100)) break;
}
}
public static void main(String[] args) throws Exception{
// Trie trie = new org.trie4j.patricia.simple.PatriciaTrie();
// Trie trie = new org.trie4j.patricia.multilayer.MultilayerPatriciaTrie();
Trie trie = new org.trie4j.patricia.TailPatriciaTrie(new ConcatTailBuilder());
LapTimer t = new LapTimer();
{
System.out.println("-- building first trie: " + trie.getClass().getName());
int c = 0;
int charCount = 0;
long sum = 0;
for(String word : new WikipediaTitles()){
t.reset();
trie.insert(word);
sum += t.lapMillis();
charCount += word.length();
c++;
if(c == maxCount) break;
}
System.out.println(String.format(
"-- done in %d millis with %d entries, %d chars"
, sum / 1000000, c, charCount
));
}
{
System.out.println("-- building second trie.");
t.reset();
trie = new org.trie4j.doublearray.DoubleArray(trie, 65536);
// trie = new org.trie4j.doublearray.TailDoubleArray(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new SuffixTrieTailBuilder());
trie.trimToSize();
System.out.println(String.format(
"-- done in %d millis.", t.lapMillis() / 1000000
));
System.gc();
System.gc();
System.out.println("waiting 10 seconds.");
// Thread.sleep(10000);
}
System.out.println("-- dump trie.");
trie.dump(new PrintWriter(System.out));
return;
/*
System.out.println("-- traversing trie.");
final AtomicInteger cnt = new AtomicInteger();
trie.traverse(new NodeVisitor() {
@Override
public boolean visit(Node node, int nest) {
if(node instanceof InternalCharsNode){
if(((InternalCharsNode)node).getChildren().length == 1){
cnt.incrementAndGet();
}
}
return true;
}
});
System.out.println(cnt + " nodes have 1 child.");
// investigate(trie, charCount);
//*
// dump(trie);
System.out.println("-- pack");
t.lap();
if(trie instanceof MultilayerPatriciaTrie){
MultilayerPatriciaTrie mt = (MultilayerPatriciaTrie)trie;
mt.pack();
System.out.println("-- pack done in " + (t.lap() / 1000000) + " millis.");
// dump(trie);
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
investigate(mt);
}
//*/
}
@SuppressWarnings("unused")
private static void investigate(Trie trie)
throws Exception{
System.out.println("-- dump root children.");
for(Node n : trie.getRoot().getChildren()){
System.out.print(n.getLetters()[0]);
}
System.out.println();
System.out.println("-- count elements.");
final AtomicInteger count = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if(node.isTerminate()) count.incrementAndGet();
return true;
}
});
System.out.println(count.intValue() + " elements.");
//*
System.out.println("-- list elements.");
final AtomicInteger n = new AtomicInteger();
final AtomicInteger l = new AtomicInteger();
final AtomicInteger ln = new AtomicInteger();
final AtomicInteger chars = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if(node.isTerminate()){
l.incrementAndGet();
} else{
n.incrementAndGet();
}
return true;
}
});
System.out.println("node: " + n.intValue());
System.out.println("leaf: " + l.intValue());
System.out.println("label node: " + ln.intValue());
System.out.println("total char count in trie: " + chars.intValue());
System.out.println("verifying trie...");
long lap = System.currentTimeMillis();
int c = 0;
int sum = 0;
for(String word : new WikipediaTitles()){
if(c == maxCount) break;
long d = System.currentTimeMillis();
boolean found = Algorithms.contains(trie.getRoot(), word);//trie.contains(word);
sum += System.currentTimeMillis() - d;
if(!found){
System.out.println("trie not contains [" + word + "]");
break;
}
if(c % 100000 == 0){
System.out.println(c + " elements done.");
}
c++;
}
System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
System.out.println("contains time: " + sum + " millis.");
// System.out.println(trie.getRoot().getChildren().length + "children in root");
if(trie instanceof TailPatriciaTrie){
// ((TailPatriciaTrie) trie).pack();
System.out.println("tail length: " + ((TailPatriciaTrie) trie).getTailBuilder().getTails().length());
}
final Trie t = trie;
new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(100000);
t.contains("hello");
} catch (InterruptedException e) {
}
}
}).start();
//*/
}
}