/**
* 通联支付-研发中心
* @author zhanggh
* 2014-5-20
* version 1.0
* 说明:
*/
package com.mtools.core.plugin.lucene;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.springframework.stereotype.Component;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.google.common.collect.Lists;
import com.mtools.core.plugin.BasePlugin;
import com.mtools.core.plugin.entity.PageInfo;
import com.mtools.core.plugin.properties.CoreParams;
/**
* 功能:全文搜索工具
*
* @date 2014-5-20
*/
@Component("lucencePlugin")
public class LucencePlugin extends BasePlugin {
private static Directory directory = null;
private Map<String, Float> scores = new HashMap<String, Float>();
private Analyzer anal = new IKAnalyzer(true);//中文分词
private String indexpth;
private static IndexReader reader=null;
public LucencePlugin() {
super();
}
/**
* 功能:分词 2014-5-27
*/
public String[] parserStr(String orgStr) throws IOException {
log.info("将短语:【"+orgStr+"】 进行分词");
List<String> keywords = Lists.newArrayList();
// 创建分词对象
StringReader reader = new StringReader(orgStr);
// 分词
TokenStream ts = this.anal.tokenStream("", reader);
CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
// String[] keywords=new String[term.length()];
// 遍历分词数据
StringBuilder sb=new StringBuilder();
while (ts.incrementToken()) {
sb.append(term.toString()).append("|");
keywords.add(term.toString());
}
reader.close();
log.info("短语:【"+orgStr+"】 分词结果:"+sb.toString());
final int size = keywords.size();
String[] arr = (String[]) keywords.toArray(new String[size]);
if (arr != null && arr.length == 0)
return null;
else
return arr;
}
/**
* 功能:初始化 2014-5-27
*/
public void initDirectory() {
log.info("初始化全文搜索目录环境FSDirectory");
try {
this.indexpth=this.coreParams.getIndexpth();
directory = FSDirectory.open(new File(this.getIndexpth()));
log.info("初始化全文搜索目录环境FSDirectory完毕");
// directory = new RAMDirectory();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 功能:初始化 2014-5-27
*/
public void initReader() {
log.info("初始化全文搜索目录环境IndexReader");
try {
this.reader = IndexReader.open(directory, false);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 功能:建立索引 2014-5-27
*/
public void createIndex(List<Document> docs) {
log.info("建立索引");
IndexWriter writer=null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, this.anal));
writer.deleteAll();
for (Document doc : docs) {
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 功能:更新索引 2014-5-27
*/
public void update(Document doc, Term term) {
log.info("更新索引" + term.text());
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, this.anal));
writer.updateDocument(term, doc);
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 功能:强制删除索引 2014-5-27
*/
public void deleteIndex(Term term) {
try {
log.info("强制删除索引");
IndexReader reader = IndexReader.open(directory, false);
reader.deleteDocuments(term);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 功能:单元搜索 2014-5-27
*/
public List<Document> termQuery(Term term, int num) {
List<Document> docs = Lists.newArrayList();
try {
log.info("单条件查询");
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(term);
TopDocs tds = searcher.search(query, num);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
docs.add(doc);
log.info("(" + sd.doc + "-" + doc.getBoost() + "-" + sd.score
+ ")");
}
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return docs;
}
/**
* 功能:多条件搜索 2014-5-27
*/
public List<Document> booleanQuery(String title,String[] feilds,String keyword,PageInfo page) {
List<Document> bdocs = Lists.newArrayList();
try {
log.info("多个条件联合查询");
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
String str = QueryParser.escape(keyword);
QueryParser queryParser = new QueryParser(Version.LUCENE_35, title, new IKAnalyzer());
queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
Query baseQuery = queryParser.parse(str);
FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(title, str), 0.5F);
BooleanQuery bquery1 = new BooleanQuery();
BooleanQuery bquery2 = new BooleanQuery();
for(String key:feilds){
TermQuery termquery = new TermQuery(new Term(key, str));
bquery1.add(termquery, BooleanClause.Occur.SHOULD);
}
bquery1.add(baseQuery, BooleanClause.Occur.SHOULD);
bquery1.add(fuzzyQuery, BooleanClause.Occur.MUST);
bquery2.add(bquery1, BooleanClause.Occur.MUST);
bdocs=pageDeal(searcher, bquery2, page);
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return bdocs;
}
/**
* 功能:多条件搜索 2014-5-27
*/
public List<Document> multiFieldQuery(String[] feilds,String keyword,PageInfo page) {
List<Document> docs = Lists.newArrayList();
try {
log.info("multiFieldQuery多个条件联合查询");
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
// 构造布尔查询(可根据你的要求随意组合)
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_35, feilds, this.anal);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);// 设置检索的条件.OR_OPERATOR表示"或"
Query query = qp.parse(keyword);
//分页
docs=pageDeal(searcher, query, page);
if(docs.size()==0){
qp.setDefaultOperator(QueryParser.OR_OPERATOR);// 设置检索的条件.OR_OPERATOR表示"或"
query = qp.parse(keyword);
docs=pageDeal(searcher, query, page);
}
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return docs;
}
/**
* 删除整个索引库
*
* @return
*/
public boolean deleteAllIndex() {
IndexWriter writer = null;
Analyzer anal = new IKAnalyzer(true);//中文分词
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, this.anal));
writer.deleteAll();
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
return false;
}
/**
* @return the indexpth
*/
public String getIndexpth() {
return indexpth;
}
/**
* @param indexpth
*/
public void setIndexpth(String indexpth) {
this.indexpth = indexpth;
}
private List<Document> pageDeal(IndexSearcher searcher,Query query,PageInfo page) throws NumberFormatException, IOException{
List<Document> bdocs = Lists.newArrayList();
TopDocs tds = searcher.search(query, Integer.parseInt(page.getPageIndex())*Integer.parseInt(page.getPageSize()));
//取出结束位置的数据
page.setItemCount(tds.totalHits);
int start=(Integer.parseInt(page.getPageIndex())-1)*Integer.parseInt(page.getPageSize());
//分页
for(int i=start;i<tds.scoreDocs.length;i++){
ScoreDoc sd = tds.scoreDocs[i];
Document doc = searcher.doc(sd.doc);
bdocs.add(doc);
log.info("【命中次数:" + sd.doc + "- 权重:" + doc.getBoost() + "- 相似度:" + sd.score+ "】");
}
return bdocs;
}
}