/* * Copyright 2012 Takao Nakaguchi * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.trie4j.doublearray; import java.io.PrintWriter; import java.util.concurrent.atomic.AtomicInteger; import org.trie4j.Algorithms; import org.trie4j.Node; import org.trie4j.NodeVisitor; import org.trie4j.Trie; import org.trie4j.patricia.PatriciaTrie; import org.trie4j.test.LapTimer; import org.trie4j.test.WikipediaTitles; public class TestWikipedia { private static final int maxCount = 20000000; public static void main(String[] args) throws Exception{ System.out.println("--- building patricia trie ---"); Trie trie = new PatriciaTrie(); // Trie trie = new TailPatriciaTrie(new ConcatTailBuilder()); int c = 0; LapTimer t1 = new LapTimer(); for(String word : new WikipediaTitles()){ trie.insert(word); c++; if(c == maxCount) break; } System.out.println("done in " + t1.lapMillis() + " millis."); System.out.println(c + "entries in ja wikipedia titles."); System.out.println("-- building double array."); t1.reset(); // Trie da = new TailDoubleArray(trie, 65536, new ConcatTailBuilder()); // Trie da = new DoubleArray(trie, 65536); Trie da = trie; trie = null; System.out.println("done in " + t1.lapMillis() + " millis."); final AtomicInteger count = new AtomicInteger(); Algorithms.traverseByBreadth(da.getRoot(), new NodeVisitor() { @Override public boolean visit(Node node, int nest) { count.incrementAndGet(); return true; } }); System.out.println(count + " nodes in trie."); da.dump(new PrintWriter(System.out)); verify(da); System.out.println("---- common prefix search ----"); System.out.println("-- for 東京国際フォーラム"); for(String s : da.commonPrefixSearch("東京国際フォーラム")){ System.out.println(s); } System.out.println("-- for 大阪城ホール"); for(String s : da.commonPrefixSearch("大阪城ホール")){ System.out.println(s); } System.out.println("---- predictive search ----"); System.out.println("-- for 大阪城"); for(String s : da.predictiveSearch("大阪城")){ System.out.println(s); } System.out.println("---- done ----"); Thread.sleep(10000); da.contains("hello"); } private static void verify(Trie da) throws Exception{ System.out.println("verifying double array..."); int c = 0; int sum = 0; LapTimer t1 = new LapTimer(); LapTimer t = new LapTimer(); for(String word : new WikipediaTitles()){ if(c == maxCount) break; t.reset(); boolean found = da.contains(word); sum += t.lapMillis(); c++; if(!found){ System.out.println("verification failed. trie not contains " + c + " th word: [" + word + "]"); break; } } System.out.println("done " + c + "words in " + t1.lapMillis() + " millis."); System.out.println("contains time: " + sum + " millis."); } }