package org.ansj.demo;
import java.io.BufferedReader;
import java.io.IOException;
import org.ansj.domain.Term;
import org.ansj.splitWord.Analysis;
import org.ansj.splitWord.analysis.BaseAnalysis;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.nlpcn.commons.lang.util.IOUtil;
/**
* 对文件进行分词的例子
*
* @author ansj
*
*/
public class FileDemo {
public static void main(String[] args) throws IOException {
//
// MyStaticValue.isRealName = true;
BufferedReader reader = IOUtil.getReader("/home/ansj/temp/360baikeData/360tag_all.txt", "utf-8");
ToAnalysis.parse("test 123 孙");
Analysis na = new BaseAnalysis(reader);
long start = System.currentTimeMillis();
int allCount = 0;
Term term = null;
while ((term = na.next()) != null) {
if(term.getOffe()%10000==0)
System.out.println(term.getOffe() + "\t" + term.getName());
allCount += term.getName().length();
if (allCount > 30000000) {
break;
}
}
long end = System.currentTimeMillis();
System.out.println(end - start);
System.out.println("共 " + allCount + " 个字符,每秒处理了:" + (allCount * 1000.0 / (end - start)));
}
}