package message.search.engine;
import message.base.pagination.PaginationSupport;
import message.base.pagination.PaginationUtils;
import message.search.SearchBean;
import message.search.SearchInitException;
import message.utils.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeanUtils;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* 基于lucene实现的索引引擎.
*
* @author sunhao(sunhao.java@gmail.com)
* @version V1.0
* @createTime 13-5-5 上午10:38
*/
public class LuceneSearchEngine extends AbstractSearchEngine {
private static final Logger logger = LoggerFactory.getLogger(LuceneSearchEngine.class);
/**
* 索引存放路径
*/
private String indexPath;
/**
* 分词器
*/
private Analyzer analyzer = new SimpleAnalyzer();
public synchronized void doIndex(List<SearchBean> searchBeans) throws Exception {
this.createOrUpdateIndex(searchBeans, true);
}
public synchronized void deleteIndex(SearchBean bean) throws Exception {
if(bean == null){
logger.warn("Get search bean is empty!");
return;
}
String id = bean.getId();
if(StringUtils.isEmpty(id)){
logger.warn("get id and id value from bean is empty!");
return;
}
String indexType = getIndexType(bean);
Directory indexDir = this.getIndexDir(indexType);
IndexWriter writer = this.getWriter(indexDir);
writer.deleteDocuments(new Term("pkId", id));
writer.commit();
this.destroy(writer);
}
public synchronized void deleteIndexs(List<SearchBean> beans) throws Exception {
if(beans == null){
logger.warn("Get beans is empty!");
return;
}
for(SearchBean bean : beans){
this.deleteIndex(bean);
}
}
public PaginationSupport<SearchBean> doSearch(List<SearchBean> beans, boolean isHighlighter, int start, int num) throws Exception {
beans = mergerSearchBean(beans);
if(beans == null || beans.isEmpty()){
logger.debug("given search beans is empty!");
return PaginationUtils.getNullPagination();
}
IndexSearcher[] searchers = new IndexSearcher[beans.size()];
for(int i = 0; i < beans.size(); i++){
SearchBean bean = beans.get(i);
String indexType = getIndexType(bean);
IndexReader reader = null;
try {
reader = IndexReader.open(this.getIndexDir(indexType));
} catch (Exception e) {
logger.warn("this folder is not a index directory!");
continue;
}
IndexSearcher searcher = reader != null ? new IndexSearcher(reader) : null;
searchers[i] = searcher;
}
//使用MultiSearcher进行多域搜索
MultiSearcher searcher = new MultiSearcher(searchers);
List<String> fieldNames = new ArrayList<String>(); //查询的字段名
List<String> queryValue = new ArrayList<String>(); //待查询字段的值
List<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>();
for(SearchBean bean : beans){
//要进行检索的字段
String[] doSearchFields = bean.getDoSearchFields();
if(doSearchFields == null || doSearchFields.length == 0)
return PaginationUtils.getNullPagination();
//默认字段
if(StringUtils.isNotEmpty(bean.getKeyword())){
for(String field : doSearchFields){
fieldNames.add(field);
queryValue.add(bean.getKeyword());
flags.add(BooleanClause.Occur.SHOULD);
}
}
}
Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queryValue.toArray(new String[]{}), fieldNames.toArray(new String[]{}),
flags.toArray(new BooleanClause.Occur[]{}), analyzer);
logger.debug("make query string is '{}'!", query.toString());
ScoreDoc[] scoreDocs = searcher.search(query, 1000000).scoreDocs;
//查询起始记录位置
int begin = (start == -1 && num == -1) ? 0 : start;
//查询终止记录位置
int end = (start == -1 && num == -1) ? scoreDocs.length : Math.min(begin + num, scoreDocs.length);
//高亮处理
Highlighter highlighter = null;
if(isHighlighter){
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(this.getHtmlPrefix(), this.getHtmlSuffix());
highlighter = new Highlighter(formatter, new QueryScorer(query));
}
List<SearchBean> queryResults = new ArrayList<SearchBean>();
for (int i = begin; i < end; i++) {
int docID = scoreDocs[i].doc;
Document hitDoc = searcher.doc(docID);
String indexType = hitDoc.get("indexType");
SearchBean result = super.getSearchBean(indexType, beans);
if(result == null) continue;
result.setId(hitDoc.get("pkId"));
result.setLink(hitDoc.get("link"));
result.setOwerId(hitDoc.get("owerId"));
result.setOwerName(hitDoc.get("owerName"));
result.setCreateDate(hitDoc.get("createDate"));
result.setIndexType(indexType);
String keyword = StringUtils.EMPTY;
if(isHighlighter && highlighter != null)
keyword = highlighter.getBestFragment(analyzer, "keyword", hitDoc.get("keyword"));
if(StringUtils.isEmpty(keyword))
keyword = hitDoc.get("keyword");
result.setKeyword(keyword);
//要进行检索的字段
String[] doSearchFields = result.getDoSearchFields();
if(doSearchFields == null || doSearchFields.length == 0)
continue;
Map<String, String> extendValues = new HashMap<String, String>();
for(String field : doSearchFields){
String value = hitDoc.get(field);
if(isHighlighter && highlighter != null)
value = highlighter.getBestFragment(analyzer, field, hitDoc.get(field));
if(StringUtils.isEmpty(value))
value = hitDoc.get(field);
extendValues.put(field, value);
}
result.setSearchValues(extendValues);
queryResults.add(result);
}
//关闭链接
searcher.close();
for(IndexSearcher indexSearcher : searchers){
if(indexSearcher != null)
indexSearcher.close();
}
PaginationSupport<SearchBean> paginationSupport = PaginationUtils.makePagination(queryResults, scoreDocs.length, num, start);
return paginationSupport;
}
public synchronized void deleteIndexsByIndexType(Class<? extends SearchBean> clazz) throws Exception {
String indexType = getIndexType(BeanUtils.instantiate(clazz));
this.deleteIndexsByIndexType(indexType);
}
public synchronized void deleteIndexsByIndexType(String indexType) throws Exception {
//传入readOnly的参数,默认是只读的
IndexReader reader = IndexReader.open(this.getIndexDir(indexType), false);
int result = reader.deleteDocuments(new Term("indexType", indexType));
reader.close();
logger.debug("the rows of delete index is '{}'! index type is '{}'!", result, indexType);
}
public synchronized void deleteAllIndexs() throws Exception {
File indexFolder = new File(this.indexPath);
if(indexFolder == null || !indexFolder.isDirectory()){
//不存在或者不是文件夹
logger.debug("indexPath is not a folder! indexPath: '{}'!", indexPath);
return;
}
File[] children = indexFolder.listFiles();
for(File child : children){
if(child == null || !child.isDirectory()) continue;
String indexType = child.getName();
logger.debug("Get indexType is '{}'!", indexType);
this.deleteIndexsByIndexType(indexType);
}
}
public void updateIndex(SearchBean searchBean) throws Exception {
this.updateIndexs(Collections.singletonList(searchBean));
}
public void updateIndexs(List<SearchBean> searchBeans) throws Exception {
this.createOrUpdateIndex(searchBeans, false);
}
/**
* 创建或者更新索引
*
* @param searchBeans 需要创建或者更新的对象
* @param isCreate 是否是创建索引;true创建索引,false更新索引
* @throws Exception
*/
private synchronized void createOrUpdateIndex(List<SearchBean> searchBeans, boolean isCreate) throws Exception {
if(searchBeans == null || searchBeans.isEmpty()){
logger.debug("do no index!");
return;
}
Directory indexDir = null;
IndexWriter writer = null;
for(Iterator<SearchBean> it = searchBeans.iterator(); it.hasNext(); ){
SearchBean sb = it.next();
String indexType = getIndexType(sb);
if(sb == null){
logger.debug("give SearchBean is null!");
return;
}
boolean anotherSearchBean = indexDir != null && !indexType.equals(((FSDirectory) indexDir).getFile().getName());
if(indexDir == null || anotherSearchBean){
indexDir = this.getIndexDir(indexType);
}
if(writer == null || anotherSearchBean){
this.destroy(writer);
writer = this.getWriter(indexDir);
}
Document doc = new Document();
//初始化一些字段
sb.initPublicFields();
String id = sb.getId();
//主键的索引,不作为搜索字段,并且也不进行分词
Field idField = new Field("pkId", id, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(idField);
logger.debug("create id index for '{}', value is '{}'! index is '{}'!", new Object[]{"pkId", id, idField});
String owerId = sb.getOwerId();
if(StringUtils.isEmpty(owerId)){
throw new SearchInitException(10003, "you must give a owerId");
}
Field owerId_ = new Field("owerId", owerId, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(owerId_);
String owerName = sb.getOwerName();
if(StringUtils.isEmpty(owerName)){
throw new SearchInitException(10003, "you must give a owerName");
}
Field owerName_ = new Field("owerName", owerName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(owerName_);
String link = sb.getLink();
if(StringUtils.isEmpty(link)){
throw new SearchInitException(10003, "you must give a link");
}
Field link_ = new Field("link", link, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(link_);
String keyword = sb.getKeyword();
if(StringUtils.isEmpty(keyword)){
throw new SearchInitException(10003, "you must give a keyword");
}
Field keyword_ = new Field("keyword", keyword, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(keyword_);
String createDate = sb.getCreateDate();
if(StringUtils.isEmpty(createDate)){
throw new SearchInitException(10003, "you must give a createDate");
}
Field createDate_ = new Field("createDate", createDate, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(createDate_);
//索引类型字段
Field indexType_ = new Field("indexType", indexType, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(indexType_);
//进行索引的字段
String[] doIndexFields = sb.getDoIndexFields();
Map<String, String> indexFieldValues = sb.getIndexFieldValues();
if(doIndexFields != null && doIndexFields.length > 0){
for(String field : doIndexFields){
String fieldValue = indexFieldValues.get(field);
if(StringUtils.isEmpty(fieldValue)) {
continue;
}
Field extInfoField = new Field(field, fieldValue, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(extInfoField);
}
}
//进行索引的文件字段
Map<String, File> files = sb.getFileMap();
if(files != null && !files.isEmpty()) {
for(Iterator<Map.Entry<String, File>> i = files.entrySet().iterator(); i.hasNext(); ){
Map.Entry<String, File> e = i.next();
String column = e.getKey();
File file = e.getValue();
if(!Arrays.asList(doIndexFields).contains(column)) continue;
String content = getFileContent(file);
Field extInfoField = new Field(column, content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(extInfoField);
}
}
if(isCreate)
writer.addDocument(doc);
else
writer.updateDocument(new Term("pkId", sb.getId()), doc);
writer.optimize();
}
this.destroy(writer);
logger.debug("create or update index success!");
}
private Directory getIndexDir(String suffix) throws Exception {
File indexDir = new File(indexPath + File.separator + suffix);
// if(indexDir == null || !indexDir.exists()){
// logger.warn("index directory '{}' is not exist!", indexDir.getAbsolutePath());
// return null;
// }
try {
return FSDirectory.open(indexDir);
} catch (IOException e) {
logger.warn("index directory '{}' is not a index directory!", indexDir.getAbsolutePath());
return null;
}
}
private List<SearchBean> mergerSearchBean(List<SearchBean> beans){
List<SearchBean> beans_ = new ArrayList<SearchBean>();
if(beans == null || beans.isEmpty()){
return beans_;
}
for(SearchBean bean : beans){
IndexReader reader = null;
try {
Directory dir = getIndexDir(bean.getIndexType());
reader = getReader(dir);
} catch (Exception e) {
}
if(reader != null)
beans_.add(bean);
}
return beans_;
}
private IndexWriter getWriter(Directory indexDir) throws IOException {
return new IndexWriter(indexDir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
}
private void destroy(IndexWriter writer) throws Exception {
if(writer != null)
writer.close();
}
private IndexReader getReader(Directory dir){
IndexReader reader = null;
try {
reader = IndexReader.open(dir);
} catch (Exception e) {
logger.warn("this folder '{}' is not a index directory!", dir);
}
return reader;
}
public void setIndexPath(String indexPath) {
this.indexPath = indexPath;
}
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
}