/* ==================================================================
* Created [2009-4-27 下午11:32:55] by Jon.King
* ==================================================================
* TSS
* ==================================================================
* mailTo:jinpujun@hotmail.com
* Copyright (c) Jon.King, 2009-2012
* ==================================================================
*/
package com.jinhe.tss.cms.lucene;
import java.io.File;
import java.io.IOException;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexWriter;
import com.jinhe.tss.cms.dao.IArticleDao;
import com.jinhe.tss.cms.dao.IChannelDao;
import com.jinhe.tss.cms.entity.Attachment;
import com.jinhe.tss.cms.entity.Channel;
import com.jinhe.tss.cms.entity.TimerStrategy;
import com.jinhe.tss.cms.helper.ArticleHelper;
import com.jinhe.tss.cms.lucene.executor.IIndexExecutor;
import com.jinhe.tss.cms.lucene.executor.IndexExecutorFactory;
import com.jinhe.tss.core.common.progress.Progress;
import com.jinhe.tss.core.exception.BusinessException;
import com.jinhe.tss.core.util.FileHelper;
public class IndexHelper {
private static Logger log = Logger.getLogger(IndexHelper.class);
/**
* 获取索引的所有文章地址列表
* @param channelDao
* @param articleDao
* @return
*/
public static Set<ArticleContent> getIndexableArticles4Lucene(TimerStrategy strategy,
IChannelDao channelDao, IArticleDao articleDao) {
Set<ArticleContent> articleContentSet = new HashSet<ArticleContent>();
String[] channelIds = strategy.getContent().split(",");
for ( String channelId : channelIds ) {
Channel channel = channelDao.getEntity(new Long(channelId));
Channel site = channelDao.getSiteByChannel(channel.getId());
// 过滤栏目下未生成xml文件的文章(并且过滤掉已经过期的文章)
List<?> publishedArticles = articleDao.getPublishedArticleByChannel(channel.getId());
for ( Object temp : publishedArticles ) {
Object[] objs = (Object[]) temp; // 数组形式{articleId, pubUrl, issueDate}
Long articleId = (Long)objs[0];
// 只为当天发布的文章创建索引
if( strategy.isIncrement() ){
java.util.Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DAY_OF_MONTH, -1);
Date yesterday = calendar.getTime();
// 如果是今天之前发布的文章,则不再为其重建索引。
Date issueDate = (Date) objs[2];
if(issueDate.before(yesterday)){
continue;
}
}
Map<String, Attachment> map = articleDao.getArticleAttachments(articleId);
StringBuffer buffer = new StringBuffer();
for( Attachment attachment : map.values() ){
File attachmentPath = new File(ArticleHelper.getAttachUploadPath(site, attachment)[0]);
String attachContent = AttachmentIndex.getInstance().disposeAttachment(attachmentPath);
buffer.append(attachContent); // 放入附件内容
}
articleContentSet.add(new ArticleContent((String)objs[1], buffer.toString()));
}
}
return articleContentSet;
}
public static void createIndex(TimerStrategy tacticIndex, Set<ArticleContent> articleContentSet, Progress progress) {
String indexExecutorClass = tacticIndex.getIndexExecutorClass();
IIndexExecutor executor = IndexExecutorFactory.create(indexExecutorClass);
//创建索引文件存放路径
String indexPath = tacticIndex.createIndexPath();
File indexDir = new File(indexPath);
if( !indexDir.exists() ) {
FileHelper.createDir(indexPath);
}
File tempIndexDir = FileHelper.createDir(indexPath + "/temp"); // 先把新建的索引文件放在临时文件里,创建成功再覆盖原先的
IndexWriter indexWriter = null;
int count = 0;
try {
// 如果 不是增量创建索引 或者 tempIndexDir目录为空, 则重新创建索引目录
boolean isRecreateIndex = !tacticIndex.isIncrement() || FileHelper.listFiles(tempIndexDir).isEmpty();
indexWriter = new IndexWriter(tempIndexDir, AnalyzerFactory.createAnalyzer(), isRecreateIndex);
indexWriter.setMaxBufferedDocs(10); // 设置强制索引document对象后flush
for ( Iterator<ArticleContent> it = articleContentSet.iterator(); it.hasNext(); ) {
ArticleContent articleContent = it.next();
try{
executor.createIndex(articleContent, indexWriter);
} catch(Exception e){
log.error("创建发布路径为:" + articleContent.getPubUrl() + "的文章索引时出错", e);
// TODO 将创建索引失败的文章记录下来
continue;
}
count ++;
// 进度条的信息更新,每一百个更新一次
if ((count > 0 && count % 100 == 0) || progress.isCompleted()) {
progress.add(count);
count = 0;
indexWriter.optimize();
}
it.remove(); // 把已经创建完索引的文章从articleContentSet中去掉,好让垃圾回收站及节约内存时回收
}
} catch (Exception e) {
throw new BusinessException("读取索引文件出错!", e);
} finally {
progress.add(count); // 确保最后一次进度信息更新
try {
if(indexWriter != null) {
indexWriter.close();
}
} catch (IOException e) {
throw new BusinessException("关闭索引文件错误!", e);
}
}
// 先删除老的索引文件
List<String> list = FileHelper.listFiles(indexDir);
for ( String fileName : list ) {
File file = new File(indexDir.getPath() + "/" + fileName);
if ( file.isDirectory() ) {
continue;
}
file.delete();
}
// 覆盖更新后的索引文件到索引目录。注: 索引文件不从Temp目录下删除,下次更新索引时接着往该索引文件后添加。
FileHelper.copyFilesInDir("", tempIndexDir, indexDir, false);
}
}