package com.blinkcoder.search; import com.jfinal.log.Logger; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; /** * User: Michael Chen * Email: yidongnan@gmail.com * Date: 14-2-28 * Time: 下午4:34 */ public class SearchHelper { private final static Logger log = Logger.getLogger(SearchHelper.class); private final static IKAnalyzer analyzer = new IKAnalyzer(); private final static int MAX_COUNT = 1000; private String indexPath; public final static String FN_ID = "___id"; public final static String FN_CLASSNAME = "___class"; public static SearchHelper init(String indexPath) throws FileNotFoundException { SearchHelper searchHelper = new SearchHelper(); indexPath = FilenameUtils.normalize(indexPath); File file = new File(indexPath); if (!file.exists() || !file.isDirectory()) { file.mkdirs(); } if (!indexPath.endsWith(File.separator)) { indexPath += File.separator; } searchHelper.indexPath = indexPath; return searchHelper; } private IndexWriter getWriter(Class<? extends Searchable> objClass) throws IOException { Directory dir = FSDirectory.open(new File(indexPath + objClass .getSimpleName())); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); return new IndexWriter(dir, config); } private IndexSearcher getSearcher(Class<? extends Searchable> objClass) throws IOException { Directory dir = FSDirectory.open(new File(indexPath + objClass .getSimpleName())); return new IndexSearcher(DirectoryReader.open(dir)); } private IndexSearcher getSearchers(List<Class<? extends Searchable>> objClasses) throws IOException { IndexReader[] readers = new IndexReader[objClasses.size()]; int idx = 0; for (Class<? extends Searchable> objClass : objClasses) { FSDirectory dir = FSDirectory.open(new File(indexPath + objClass .getSimpleName())); readers[idx++] = DirectoryReader.open(dir); } return new IndexSearcher(new MultiReader(readers, true)); } /** * 优化索引库 * * @param objClass * @throws IOException */ public void optimize(Class<? extends Searchable> objClass) throws IOException { IndexWriter writer = getWriter(objClass); try { writer.forceMerge(1); writer.commit(); } finally { writer.close(); writer = null; } } /** * 多库搜索 * * @param objClasses * @param query * @param filter * @param sort * @param page * @param count * @return * @throws IOException */ public List<Searchable> find(List<Class<? extends Searchable>> objClasses, Query query, Filter filter, Sort sort, int page, int count) throws IOException { IndexSearcher searcher = getSearchers(objClasses); return find(searcher, query, filter, sort, page, count); } /** * 单库搜索 * * @param objClass * @param query * @param filter * @param sort * @param page * @param count * @return * @throws IOException */ public List<Integer> find(Class<? extends Searchable> objClass, Query query, Filter filter, Sort sort, int page, int count) throws IOException { IndexSearcher searcher = getSearcher(objClass); List<Searchable> results = find(searcher, query, filter, sort, page, count); List<Integer> ids = new ArrayList<>(); for (Searchable obj : results) { if (obj != null) ids.add(obj.getId()); } return ids; } /** * 多库搜索 * * @param objClasses * @param query * @param filter * @return * @throws IOException */ public int count(List<Class<? extends Searchable>> objClasses, Query query, Filter filter) throws IOException { IndexSearcher searcher = getSearchers(objClasses); return count(searcher, query, filter); } /** * 搜索 * * @param beanClass * @param query * @param filter * @return * @throws IOException */ public int count(Class<? extends Searchable> objClass, Query query, Filter filter) throws IOException { IndexSearcher searcher = getSearcher(objClass); return count(searcher, query, filter); } /** * 搜索 * * @param searcher * @param query * @param filter * @param sort * @param page * @param count * @return * @throws IOException */ private List<Searchable> find(IndexSearcher searcher, Query query, Filter filter, Sort sort, int page, int count) throws IOException { try { TopDocs hits; if (filter != null && sort != null) hits = searcher.search(query, filter, MAX_COUNT, sort); else if (filter != null) hits = searcher.search(query, filter, MAX_COUNT); else if (sort != null) hits = searcher.search(query, MAX_COUNT, sort); else hits = searcher.search(query, MAX_COUNT); if (hits == null) return null; List<Searchable> results = new ArrayList<>(); int nBegin = (page - 1) * count; int nEnd = Math.min(nBegin + count, hits.scoreDocs.length); for (int i = nBegin; i < nEnd; i++) { ScoreDoc s_doc = hits.scoreDocs[i]; Document doc = searcher.doc(s_doc.doc); Searchable obj = doc2obj(doc); if (obj != null && !results.contains(obj)) { results.add(obj); } } return results; } catch (IOException e) { log.error("Unabled to find via query: " + query, e); } return null; } /** * 根据查询条件统计搜索结果数 * * @param searcher * @param query * @param filter * @return * @throws IOException */ private int count(IndexSearcher searcher, Query query, Filter filter) throws IOException { try { TotalHitCountCollector thcc = new TotalHitCountCollector(); if (filter != null) searcher.search(query, filter, thcc); else searcher.search(query, thcc); return Math.min(MAX_COUNT, thcc.getTotalHits()); } catch (IOException e) { log.error("Unabled to find via query: " + query, e); return -1; } } /** * 批量添加索引 * * @param docs * @throws IOException */ public int add(List<? extends Searchable> objs) throws IOException { if (objs == null || objs.size() == 0) return 0; int doc_count = 0; IndexWriter writer = getWriter(objs.get(0).getClass()); try { for (Searchable obj : objs) { Document doc = obj2doc(obj); writer.addDocument(doc); doc_count++; } writer.commit(); } finally { writer.close(); writer = null; } return doc_count; } /** * 批量删除索引 * * @param docs * @throws IOException */ public int delete(List<? extends Searchable> objs) throws IOException { if (objs == null || objs.size() == 0) return 0; int doc_count = 0; IndexWriter writer = getWriter(objs.get(0).getClass()); try { for (Searchable obj : objs) { writer.deleteDocuments(new Term("id", String.valueOf(obj.getId()))); doc_count++; } writer.commit(); } finally { writer.close(); writer = null; } return doc_count; } /** * 批量更新索引 * * @param docs * @throws IOException */ public void update(List<? extends Searchable> objs) throws IOException { delete(objs); add(objs); } public static Searchable doc2obj(Document doc) { try { int id = NumberUtils.toInt(doc.get(FN_ID), 0); if (id <= 0) return null; Searchable obj = (Searchable) Class.forName(doc.get(FN_CLASSNAME) ).newInstance(); obj.setId(id); return obj; } catch (Exception e) { log.error("Unabled generate object from document#id=" + doc .toString(), e); return null; } } private static Field obj2field(String field, Object fieldValue, boolean store) { if (fieldValue == null) return null; if (fieldValue instanceof Date) //日期 return new LongField(field, ((Date) fieldValue).getTime(), store ? Field.Store.YES : Field.Store.NO); if (fieldValue instanceof Number) //其他数值 return new StringField(field, String.valueOf(((Number) fieldValue).longValue()), store ? Field.Store.YES : Field .Store.NO); //其他默认当字符串处理 return new StringField(field, (String) fieldValue, store ? Field.Store.YES : Field.Store.NO); } private static Document obj2doc(Searchable obj) { if (obj == null) return null; Document doc = new Document(); doc.add(new IntField(FN_ID, obj.getId(), Field.Store.YES)); doc.add(new StoredField(FN_CLASSNAME, obj.getClass().getName())); //存储字段 List<String> fields = new ArrayList<>(); Map<String, Object> eDatas = obj.storeDatas(); if (eDatas != null) for (String fn : eDatas.keySet()) { if (fields.contains(fn)) continue; Object fv = eDatas.get(fn); if (fv != null) doc.add(obj2field(fn, fv, true)); } //索引字段 eDatas = obj.indexDatas(); if (eDatas != null) for (String fn : eDatas.keySet()) { if (fields.contains(fn)) continue; String fv = eDatas.get(fn).toString(); if (fv != null) { TextField tf = new TextField(fn, fv, Field.Store.NO); tf.setBoost(obj.boost()); doc.add(tf); } } return doc; } public static Query makeQuery(String field, String q, float boost) { if (StringUtils.isBlank(q) || StringUtils.isBlank(field)) return new BooleanQuery(); QueryParser parser = new QueryParser(Version.LUCENE_41, field, analyzer); parser.setDefaultOperator(QueryParser.AND_OPERATOR); try { Query querySinger = parser.parse(q); querySinger.setBoost(boost); return querySinger; } catch (Exception e) { TermQuery queryTerm = new TermQuery(new Term(field, q)); queryTerm.setBoost(boost); return queryTerm; } } }