package com.lucene.test4; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.lucene.utils.BbsBean; import com.lucene.utils.Lucene; import com.lucene.utils.LuceneType; import com.lucene.utils.Page; public class LuceneUtils { private String indexDir = "D:\\test\\gm\\index4\\"; @SuppressWarnings("deprecation") private static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_2); /** * 索引创建 * * @param revert * @return */ public boolean createrIndex(List<BbsBean> lbean) { Directory directory = null; IndexWriter indexWriter = null; try { directory = FSDirectory.open(new File(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); indexWriter = new IndexWriter(directory, iwc); indexWriter.deleteAll(); for(int i = 0 ; i < lbean.size(); i++){ Document doc = new Document(); BbsBean bean = lbean.get(i); doc.add(new StringField(LuceneType.ALL_TYPE,LuceneType.TYPE_TOPIC, Field.Store.YES)); doc.add(new TextField(LuceneType.ALL_ID, bean.getTopicId(),Field.Store.YES)); doc.add(new TextField(LuceneType.ALL_TITLE, bean.getTitle(),Field.Store.YES)); doc.add(new TextField(LuceneType.ALL_CONTENT, bean.getRevertContent().replaceAll("<\\S[^>]+>", "").replaceAll("<p>", ""), Field.Store.YES)); doc.add(new TextField(LuceneType.ALL_URL,bean.getRevertUrl(), Field.Store.YES)); indexWriter.addDocument(doc); } } catch (Exception e) { e.printStackTrace(); return false; } finally { try { if (indexWriter != null) { indexWriter.close(); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return true; } public Page<Map<String, String>> search(String[] queryStr,String[] field,int start,int pageSize) throws Exception { Page<Map<String, String>> pager = new Page<Map<String, String>>(start,pageSize, 0, new ArrayList<Map<String, String>>()); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher indexSearch = new IndexSearcher(reader); /** * 同时搜索name和descr两个field,并设定它们在搜索结果排序过程中的权重,权重越高,排名越靠前 * 为了后面解释score问题的方便,这里设置相同的权重 * */ Map<String, Float> boosts = new HashMap<String, Float>(); boosts.put(LuceneType.ALL_TITLE, 1.0f); boosts.put(LuceneType.ALL_CONTENT, 1.0f); /** * 用MultiFieldQueryParser类实现对同一关键词的跨域搜索 * */ QueryParser queryParser = new QueryParser(LuceneType.ALL_CONTENT, analyzer); Query query = queryParser.parse(queryStr[0]); CachingWrapperFilter filter = null; // 创建二次索引前的一次缓存 if (queryStr.length > 1) { // 判断是否要进行二次检索 filter = filterIndex(field, queryStr[1]); } Lucene entity = new Lucene(); if(start<=0){start = 1;} TopDocs result = indexSearch.search(query, filter, start*pageSize); int index = (start - 1) * pageSize; if (index > result.totalHits) { index -= pageSize; start--; } ScoreDoc scoreDoc = null; if (index > 0) { scoreDoc = result.scoreDocs[index - 1]; } TopDocs topDocs = indexSearch.searchAfter(scoreDoc, query, filter,pageSize); entity.setTopDocs(topDocs); entity.setRowCount(topDocs.totalHits); // 总条数 entity.setPageSize(pageSize); entity.setCurrent(start); List<Map<String, String>> list = searchHLResult(indexSearch, query,entity.getTopDocs(), field, field); pager = new Page<Map<String, String>>(entity.getCurrent(),pageSize, entity.getRowCount(), list); return pager; } /** * 二次检索的一次缓存检索 * * @param field * 索引字段 * @param content * 检索条件 * @return * @throws ParseException */ @SuppressWarnings("deprecation") public CachingWrapperFilter filterIndex(String[] fields, String content) throws ParseException { String[] field = new String[1]; // 取得检索缓存字段 field[0] = fields[0]; QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_4_10_2,field, analyzer); Query query = queryParser.parse(content); // 创建检索语句 QueryWrapperFilter oldFilter = new QueryWrapperFilter(query); // 创建检索缓存 CachingWrapperFilter filter = new CachingWrapperFilter(oldFilter); // 取得检索结果 return filter; } /** * 高亮处理,返回处理结果列表 * * @param indexSearch * 检索器 * @param query * 检索内容 * @param topDocs * 检索结果 * @param reValues * 返回参数 * @return * @throws CorruptIndexException * @throws IOException * @throws InvalidTokenOffsetsException */ public List<Map<String, String>> searchHLResult(IndexSearcher indexSearch, Query query, TopDocs topDocs, String[] reValues, String[] fields) throws CorruptIndexException, IOException, InvalidTokenOffsetsException { List<Map<String, String>> list = new ArrayList<Map<String, String>>(); SimpleHTMLFormatter shf = new SimpleHTMLFormatter(LuceneType.HIGH_LIGHT_PRE, LuceneType.HIGH_LIGHT_SUB); // 创建高亮模式 Highlighter hl = new Highlighter(shf, new QueryScorer(query)); // 创建高亮检索 if (topDocs != null) { for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document doc = indexSearch.doc(topDocs.scoreDocs[i].doc); Map<String, String> map = new HashMap<String, String>(); for (String reValue : reValues) { // 根据返回参数设置,循环取得检索结果 for (String field : fields) { if (reValue.equals(field)) { // 判断是否做高亮处理 TokenStream tokenStream = analyzer.tokenStream(reValue, // 取得匹配检索结果 new StringReader(doc.get(reValue))); String str = hl.getBestFragment(tokenStream,doc.get(reValue)); map.put(reValue, str == null ? doc.get(reValue): str); // 判断该高亮处理是否为空 break; // 高亮处理完毕,跳出内层循环 } else { // 不做高亮处理 map.put(reValue, doc.get(reValue)); } } } list.add(map); } } return list; } }