/* ================================================================== * Created [2009-4-27 下午11:32:55] by Jon.King * ================================================================== * TSS * ================================================================== * mailTo:jinpujun@hotmail.com * Copyright (c) Jon.King, 2009-2012 * ================================================================== */ package com.jinhe.tss.cms.lucene; import java.io.File; import java.io.IOException; import java.util.Calendar; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.log4j.Logger; import org.apache.lucene.index.IndexWriter; import com.jinhe.tss.cms.dao.IArticleDao; import com.jinhe.tss.cms.dao.IChannelDao; import com.jinhe.tss.cms.entity.Attachment; import com.jinhe.tss.cms.entity.Channel; import com.jinhe.tss.cms.entity.TimerStrategy; import com.jinhe.tss.cms.helper.ArticleHelper; import com.jinhe.tss.cms.lucene.executor.IIndexExecutor; import com.jinhe.tss.cms.lucene.executor.IndexExecutorFactory; import com.jinhe.tss.core.common.progress.Progress; import com.jinhe.tss.core.exception.BusinessException; import com.jinhe.tss.core.util.FileHelper; public class IndexHelper { private static Logger log = Logger.getLogger(IndexHelper.class); /** * 获取索引的所有文章地址列表 * @param channelDao * @param articleDao * @return */ public static Set<ArticleContent> getIndexableArticles4Lucene(TimerStrategy strategy, IChannelDao channelDao, IArticleDao articleDao) { Set<ArticleContent> articleContentSet = new HashSet<ArticleContent>(); String[] channelIds = strategy.getContent().split(","); for ( String channelId : channelIds ) { Channel channel = channelDao.getEntity(new Long(channelId)); Channel site = channelDao.getSiteByChannel(channel.getId()); // 过滤栏目下未生成xml文件的文章(并且过滤掉已经过期的文章) List<?> publishedArticles = articleDao.getPublishedArticleByChannel(channel.getId()); for ( Object temp : publishedArticles ) { Object[] objs = (Object[]) temp; // 数组形式{articleId, pubUrl, issueDate} Long articleId = (Long)objs[0]; // 只为当天发布的文章创建索引 if( strategy.isIncrement() ){ java.util.Calendar calendar = Calendar.getInstance(); calendar.add(Calendar.DAY_OF_MONTH, -1); Date yesterday = calendar.getTime(); // 如果是今天之前发布的文章,则不再为其重建索引。 Date issueDate = (Date) objs[2]; if(issueDate.before(yesterday)){ continue; } } Map<String, Attachment> map = articleDao.getArticleAttachments(articleId); StringBuffer buffer = new StringBuffer(); for( Attachment attachment : map.values() ){ File attachmentPath = new File(ArticleHelper.getAttachUploadPath(site, attachment)[0]); String attachContent = AttachmentIndex.getInstance().disposeAttachment(attachmentPath); buffer.append(attachContent); // 放入附件内容 } articleContentSet.add(new ArticleContent((String)objs[1], buffer.toString())); } } return articleContentSet; } public static void createIndex(TimerStrategy tacticIndex, Set<ArticleContent> articleContentSet, Progress progress) { String indexExecutorClass = tacticIndex.getIndexExecutorClass(); IIndexExecutor executor = IndexExecutorFactory.create(indexExecutorClass); //创建索引文件存放路径 String indexPath = tacticIndex.createIndexPath(); File indexDir = new File(indexPath); if( !indexDir.exists() ) { FileHelper.createDir(indexPath); } File tempIndexDir = FileHelper.createDir(indexPath + "/temp"); // 先把新建的索引文件放在临时文件里,创建成功再覆盖原先的 IndexWriter indexWriter = null; int count = 0; try { // 如果 不是增量创建索引 或者 tempIndexDir目录为空, 则重新创建索引目录 boolean isRecreateIndex = !tacticIndex.isIncrement() || FileHelper.listFiles(tempIndexDir).isEmpty(); indexWriter = new IndexWriter(tempIndexDir, AnalyzerFactory.createAnalyzer(), isRecreateIndex); indexWriter.setMaxBufferedDocs(10); // 设置强制索引document对象后flush for ( Iterator<ArticleContent> it = articleContentSet.iterator(); it.hasNext(); ) { ArticleContent articleContent = it.next(); try{ executor.createIndex(articleContent, indexWriter); } catch(Exception e){ log.error("创建发布路径为:" + articleContent.getPubUrl() + "的文章索引时出错", e); // TODO 将创建索引失败的文章记录下来 continue; } count ++; // 进度条的信息更新,每一百个更新一次 if ((count > 0 && count % 100 == 0) || progress.isCompleted()) { progress.add(count); count = 0; indexWriter.optimize(); } it.remove(); // 把已经创建完索引的文章从articleContentSet中去掉,好让垃圾回收站及节约内存时回收 } } catch (Exception e) { throw new BusinessException("读取索引文件出错!", e); } finally { progress.add(count); // 确保最后一次进度信息更新 try { if(indexWriter != null) { indexWriter.close(); } } catch (IOException e) { throw new BusinessException("关闭索引文件错误!", e); } } // 先删除老的索引文件 List<String> list = FileHelper.listFiles(indexDir); for ( String fileName : list ) { File file = new File(indexDir.getPath() + "/" + fileName); if ( file.isDirectory() ) { continue; } file.delete(); } // 覆盖更新后的索引文件到索引目录。注: 索引文件不从Temp目录下删除,下次更新索引时接着往该索引文件后添加。 FileHelper.copyFilesInDir("", tempIndexDir, indexDir, false); } }