package org.ansj.ansj_lucene_plug; import java.io.IOException; import java.io.StringReader; import java.util.HashMap; import java.util.Map; import org.ansj.library.DicLibrary; import org.ansj.library.SynonymsLibrary; import org.ansj.lucene6.AnsjAnalyzer; import org.ansj.lucene6.AnsjAnalyzer.TYPE; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TestToken { public static void main(String[] args) { SynonymsLibrary.put(SynonymsLibrary.DEFAULT, "../../library/synonyms.dic"); DicLibrary.insert(DicLibrary.DEFAULT, "清华", "n", 2000); DicLibrary.insert(DicLibrary.DEFAULT, "大学", "n", 2000); Map<String, String> map = new HashMap<String, String>(); map.put("type", "dic_ansj"); map.put(SynonymsLibrary.DEFAULT, SynonymsLibrary.DEFAULT); Analyzer ca = new AnsjAnalyzer(map); String content = "我爱北京天安门天安门上太阳升我美丽的清华大学"; try { TokenStream tokenStream = ca.tokenStream(content, new StringReader(content)); while (tokenStream.incrementToken()) { System.out.print(tokenStream.getAttribute(CharTermAttribute.class)); System.out.print("\t") ; System.out.print(tokenStream.getAttribute(OffsetAttribute.class).startOffset()); System.out.print("\t") ; System.out.print(tokenStream.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); System.out.print("\t") ; System.out.println(tokenStream.getAttribute(TypeAttribute.class).type()); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } ca.close(); } }