package org.ansj.ansj_lucene5_plug;
import java.io.IOException;
import java.io.StringReader;
import org.ansj.domain.Term;
import org.ansj.library.DicLibrary;
import org.ansj.lucene5.AnsjAnalyzer;
import org.ansj.lucene5.AnsjAnalyzer.TYPE;
import org.ansj.splitWord.analysis.IndexAnalysis;
import org.ansj.util.MyStaticValue;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;
public class IndexAndTest {
@Test
public void test() throws Exception {
DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
Directory directory = null;
IndexWriter iwriter = null;
IndexWriterConfig ic = new IndexWriterConfig(analyzer);
String text = "旅游和服务是最好的";
System.out.println(IndexAnalysis.parse(text));
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, text);
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
System.out.println("index ok to search!");
for (Term t : IndexAnalysis.parse(text)) {
System.out.println(t.getName());
search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
}
}
private void search(Analyzer queryAnalyzer, Directory directory, String queryStr) throws CorruptIndexException, IOException, ParseException {
IndexSearcher isearcher;
DirectoryReader directoryReader = DirectoryReader.open(directory);
// 查询索引
isearcher = new IndexSearcher(directoryReader);
QueryParser tq = new QueryParser("text", queryAnalyzer);
Query query = tq.parse(queryStr);
System.out.println(query);
TopDocs hits = isearcher.search(query, 5);
System.out.println(queryStr + ":共找到" + hits.totalHits + "条记录!");
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docId = hits.scoreDocs[i].doc;
Document document = isearcher.doc(docId);
System.out.println(toHighlighter(queryAnalyzer, query, document));
}
}
private void addContent(IndexWriter iwriter, String text) throws CorruptIndexException, IOException {
Document doc = new Document();
IndexableField field = new TextField("text", text, Store.YES);
doc.add(field);
iwriter.addDocument(doc);
}
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
private String toHighlighter(Analyzer analyzer, Query query, Document doc) {
String field = "text";
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}