/*
* Copyright 2012 Takao Nakaguchi
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.trie4j;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import org.trie4j.patricia.PatriciaTrie;
import org.trie4j.util.CharsetUtil;
public class TestWikipediaCPS {
private static final int maxCount = 2000000;
public static void main(String[] args) throws Exception{
System.out.println("--- recursive patricia trie ---");
Trie trie = new PatriciaTrie();
int c = 0;
BufferedReader r = new BufferedReader(new InputStreamReader(
new GZIPInputStream(new FileInputStream("jawiki-20120220-all-titles-in-ns0.gz"))
, CharsetUtil.newUTF8Decoder()));
String word = null;
long sum = 0;
long lap = System.currentTimeMillis();
while((word = r.readLine()) != null){
long d = System.currentTimeMillis();
trie.insert(word);
sum += System.currentTimeMillis() - d;
if(c % 100000 == 0){
d = System.currentTimeMillis() - lap;
long free = Runtime.getRuntime().freeMemory();
System.out.println(
c + "," + free + "," + Runtime.getRuntime().maxMemory() + "," + d
);
lap = System.currentTimeMillis();
}
c++;
if(c == maxCount) break;
}
System.out.println(c + "entries in ja wikipedia titles.");
System.out.println("insert time: " + sum + " millis.");
System.out.println("-- insert done.");
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
doSearches(trie);
}
private static void doSearches(Trie trie){
long start = System.currentTimeMillis();
System.out.println("---- common prefix search ----");
System.out.println("-- for 東京国際フォーラム");
for(String s : trie.commonPrefixSearch("東京国際フォーラム")){
System.out.println(s);
}
System.out.println("-- for 大阪城ホール");
for(String s : trie.commonPrefixSearch("大阪城ホール")){
System.out.println(s);
}
System.out.println("---- predictive search ----");
System.out.println("-- for 大阪城");
for(String s : trie.predictiveSearch("大阪城")){
System.out.println(s);
}
System.out.println("---- predictive search ----");
System.out.println("-- for 東");
for(String s : trie.predictiveSearch("東京国")){
System.out.println(s);
}
System.out.println("-- total search millis: " + (System.currentTimeMillis() - start));
}
}