/* ==================================================================
* Created [2009-4-27 下午11:32:55] by Jon.King
* ==================================================================
* TSS
* ==================================================================
* mailTo:jinpujun@hotmail.com
* Copyright (c) Jon.King, 2009-2012
* ==================================================================
*/
package com.jinhe.tss.cms.lucene.executor;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.dom4j.Element;
import com.jinhe.tss.cms.helper.ArticleHelper;
import com.jinhe.tss.cms.lucene.AnalyzerFactory;
import com.jinhe.tss.cms.lucene.ArticleContent;
import com.jinhe.tss.core.util.FileHelper;
import com.jinhe.tss.core.util.XMLDocUtil;
import com.jinhe.tss.core.util.XmlUtil;
/**
* <p> DefaultIndexExecutor.java </p>
* 默认索引执行器:根据文章的所有字段来创建索引。
*/
public class DefaultIndexExecutor implements IIndexExecutor {
protected Logger log = Logger.getLogger(this.getClass());
// 发布日期、创建日期 字段常用于结果排序
protected final static String FIELD_ISSUEDATE = "issueDate";
protected final static String FIELD_CREATETIME = "createTime";
protected final static String FIELD_CONTENTS = "contents";
public void createIndex(ArticleContent bean, IndexWriter indexWriter) throws CorruptIndexException, IOException {
if ( !bean.checkPubUrl() )
return;
// 从发布的xml文件中获取用以创建索引的内容信息
String publishPath = bean.getPubUrl();
File pubFile = new File(publishPath);
org.dom4j.Document doc;
try{
doc = XMLDocUtil.createDocByAbsolutePath(publishPath);
} catch ( Exception e ){
String fileContent = FileHelper.readFile(pubFile, "UTF-8");
doc = XMLDocUtil.dataXml2Doc(XmlUtil.stripNonValidXMLCharacters(fileContent));
}
if( !"UTF-8".equalsIgnoreCase(doc.getXMLEncoding()) ) {
doc.setXMLEncoding(ArticleHelper.getSystemEncoding());
}
List<?> childNodes = doc.getRootElement().elements();
for (int i = 0; i < childNodes.size(); i++) {
Element element = (Element) childNodes.get(i);
// 除附件外所有字段都作为FIELD
if ( !"Attachments".equals(element.getName()) ) {
bean.getArticleAttributes().put(element.getName(), element.getTextTrim());
}
}
Document indexDoc = createIndexDoc( bean );
indexWriter.addDocument(indexDoc);
}
/**
* 创建索引文本
* @param pubFile 索引文件存放路径
* @param attachsContent 附件内容
* @param map 文章属性Map
* @return
*/
protected Document createIndexDoc( ArticleContent bean ) throws IOException {
Document luceneDoc = new Document();
StringBuffer buffer = new StringBuffer();
for( Entry<String, String> entry : bean.getArticleAttributes().entrySet() ) {
String key = entry.getKey();
String value = entry.getValue();
// 发布日期、创建日期 字段特殊处理,用于结果排序
if(FIELD_ISSUEDATE.equals(key) || FIELD_CREATETIME.equals(key)){
luceneDoc.add(new Field(key, value, Field.Store.YES, Field.Index.UN_TOKENIZED));
}
else {
luceneDoc.add(new Field(key, value, Field.Store.YES, Field.Index.NO));
}
// 将文章标题、关键字、副标题、正文、日期、作者灯信息集合起来加到文章内容里去
buffer.append(value);
}
if(bean.getAttachContent() != null) {
buffer.append(bean.getAttachContent());
}
luceneDoc.add(new Field(FIELD_CONTENTS, buffer.toString(), Field.Store.NO, Field.Index.TOKENIZED));
return luceneDoc;
}
public Query createIndexQuery(String searchStr) throws ParseException {
return createIndexQuery(FIELD_CONTENTS, searchStr);
}
protected Query createIndexQuery(String filedName, String searchStr) throws ParseException {
Analyzer analyzer = AnalyzerFactory.createAnalyzer(searchStr);
Query query1 = new QueryParser(filedName, analyzer).parse(searchStr);
//TermQuery query2 = new TermQuery(new Term(FIELD_ISSUEDATE, searchStr));
TermQuery query3 = new TermQuery(new Term(FIELD_CREATETIME, searchStr));
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query1, BooleanClause.Occur.SHOULD);
//booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
booleanQuery.add(query3, BooleanClause.Occur.SHOULD);
return booleanQuery;
}
}