package org.ansj.ansj_lucene5_plug; import java.io.IOException; import java.io.StringReader; import org.ansj.library.DicLibrary; import org.ansj.lucene5.AnsjAnalyzer; import org.ansj.lucene5.AnsjAnalyzer.TYPE; import org.ansj.splitWord.analysis.IndexAnalysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; public class HeightLightTest { private static Directory directory = new RAMDirectory(); private static Analyzer indexAnalyzer = new AnsjAnalyzer(TYPE.index_ansj); private static Analyzer queryAnalyzer = new AnsjAnalyzer(TYPE.index_ansj); public static void main(String[] args) throws CorruptIndexException, IOException, ParseException { DicLibrary.insert(DicLibrary.DEFAULT, "交通安全", "ansj", 2000); DicLibrary.insert(DicLibrary.DEFAULT, "交通", "ansj", 2000); DicLibrary.insert(DicLibrary.DEFAULT, "安全", "ansj", 2000); String content = "不强行上下车,做到先下后上,候车要排队,按秩序上车;下车后要等车辆开走后再行走,如要穿越马路,一定要确保安全的情况下穿行;交通信号灯的正确使用,什么事交通安全出行交通信号灯的正确使用,什么事交通安全出行"; System.out.println(IndexAnalysis.parse(content)); String query = "text:\"交通安全出行\""; // 建立内存索引对象 index(indexAnalyzer, content); // 查询 search(queryAnalyzer, new QueryParser("text", queryAnalyzer).parse(query)); } private static void search(Analyzer analyzer, Query query) throws IOException { DirectoryReader directoryReader = DirectoryReader.open(directory); // 查询索引 IndexSearcher isearcher = new IndexSearcher(directoryReader); System.out.println(query); TopDocs hits = isearcher.search(query, 5); System.out.println(hits.scoreDocs.length); for (int i = 0; i < hits.scoreDocs.length; i++) { int docId = hits.scoreDocs[i].doc; Document document = isearcher.doc(docId); System.out.println(toHighlighter(analyzer, query, document)); } } /** * 高亮设置 * * @param query * @param doc * @param field * @return */ private static String toHighlighter(Analyzer analyzer, Query query, Document doc) { String field = "text"; try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream1 = indexAnalyzer.tokenStream("text", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException { Document doc = new Document(); IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis)); doc.add(new TextField("text", content, Field.Store.YES)); iwriter.addDocument(doc); iwriter.commit(); iwriter.close(); } }