/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package com.xpn.xwiki.plugin.lucene;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.log4j.MDC;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.queryParser.QueryParser;
import com.xpn.xwiki.XWiki;
import com.xpn.xwiki.XWikiContext;
import com.xpn.xwiki.doc.XWikiAttachment;
import com.xpn.xwiki.doc.XWikiDocument;
import com.xpn.xwiki.notify.XWikiActionNotificationInterface;
import com.xpn.xwiki.notify.XWikiDocChangeNotificationInterface;
import com.xpn.xwiki.notify.XWikiNotificationRule;
/**
* @version $Id: $
*/
public class IndexUpdater extends AbstractXWikiRunnable
implements XWikiDocChangeNotificationInterface, XWikiActionNotificationInterface
{
/** Logging helper. */
private static final Log LOG = LogFactory.getLog(IndexUpdater.class);
/** Milliseconds of sleep between checks for changed documents. */
private int indexingInterval = 30000;
private boolean exit = false;
private IndexWriter writer;
private String indexDir;
private XWikiDocumentQueue queue = new XWikiDocumentQueue();
/**
* Soft threshold after which no more documents will be added to the indexing queue. When the
* queue size gets larger than this value, the index rebuilding thread will sleep chuks of
* {@link IndexRebuilder#retryInterval} milliseconds until the queue size will get back bellow
* this threshold. This does not affect normal indexing through wiki updates.
*/
public int maxQueueSize = 1000;
private Analyzer analyzer;
private LucenePlugin plugin;
private IndexSearcher searcher;
private IndexReader reader;
private XWikiContext context;
private XWiki xwiki;
private long activesIndexedDocs = 0;
static List<String> fields = new ArrayList<String>();
public boolean needInitialBuild = false;
public void doExit()
{
exit = true;
}
/**
* Main loop. Polls the queue for documents to be indexed.
*
* @see java.lang.Runnable#run()
*/
public void run()
{
MDC.put("url", "Lucene index updating thread");
// Since this is where a new thread is created this is where we need to initialize the Container
// ThreadLocal variables and not in the init() method. Otherwise we would simply overwrite the
// Container values for the main thread...
try {
initXWikiContainer(this.context);
runMainLoop();
} finally {
// Cleanup Container component (it has ThreadLocal variables)
cleanupXWikiContainer(this.context);
this.xwiki.getStore().cleanUp(this.context);
MDC.remove("url");
}
}
/**
* Main loop. Polls the queue for documents to be indexed.
*/
private void runMainLoop()
{
while (!this.exit) {
if (this.queue.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("IndexUpdater: queue empty, nothing to do");
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("IndexUpdater: documents in queue, start indexing");
}
Map<String, IndexData> toIndex = new HashMap<String, IndexData>();
List<Integer> toDelete = new ArrayList<Integer>();
activesIndexedDocs = 0;
try {
openSearcher();
while (!this.queue.isEmpty()) {
IndexData data = this.queue.remove();
List<Integer> oldDocs = (data==null) ? null : getOldIndexDocIds(data);
if (oldDocs != null) {
for (Integer id : oldDocs) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding " + id + " to remove list");
}
if (!toDelete.contains(id)) {
toDelete.add(id);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Found " + id
+ " already in list while adding it to remove list");
}
}
}
}
String id = data.getId();
LOG.debug("Adding " + id + " to index list");
if (toIndex.containsKey(id)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found " + id
+ " already in list while adding it to index list");
}
toIndex.remove(id);
}
++activesIndexedDocs;
toIndex.put(id, data);
}
} catch (Exception e) {
LOG.error("error preparing index queue", e);
} finally {
closeSearcher();
}
// Let's delete
try {
openSearcher();
if (LOG.isInfoEnabled()) {
LOG.info("deleting " + toDelete.size() + " docs from lucene index");
}
int nb = deleteOldDocs(toDelete);
if (LOG.isInfoEnabled()) {
LOG.info("deleted " + nb + " docs from lucene index");
}
} catch (Exception e) {
LOG.error("error deleting previous documents", e);
} finally {
closeSearcher();
}
// Let's index
try {
if (LOG.isInfoEnabled()) {
LOG.info("indexing " + toIndex.size() + " docs to lucene index");
}
XWikiContext context = (XWikiContext) this.context.clone();
context.getWiki().getStore().cleanUp(context);
openWriter(false);
int nb = 0;
for (Map.Entry<String, IndexData> entry : toIndex.entrySet()) {
String id = entry.getKey();
IndexData data = entry.getValue();
try {
XWikiDocument doc =
this.xwiki.getDocument(data.getFullName(), context);
if (data.getLanguage() != null && !data.getLanguage().equals("")) {
doc = doc.getTranslatedDocument(data.getLanguage(), context);
}
addToIndex(data, doc, context);
++nb;
--activesIndexedDocs;
} catch (Exception e) {
LOG.error("error indexing document " + id, e);
}
}
if (LOG.isInfoEnabled()) {
LOG.info("indexed " + nb + " docs to lucene index");
}
writer.flush();
} catch (Exception e) {
LOG.error("error indexing documents", e);
} finally {
this.context.getWiki().getStore().cleanUp(this.context);
closeWriter();
}
plugin.openSearchers();
}
try {
Thread.sleep(indexingInterval);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private synchronized void closeSearcher()
{
try {
if (this.searcher != null) {
this.searcher.close();
}
if (this.reader != null) {
this.reader.close();
}
} catch (IOException e) {
LOG.error("error closing index searcher", e);
} finally {
this.searcher = null;
this.reader = null;
}
}
/**
* Opens the index reader and searcher used for finding and deleting old versions of indexed
* documents.
*/
private synchronized void openSearcher()
{
try {
this.reader = IndexReader.open(this.indexDir);
this.searcher = new IndexSearcher(this.reader);
} catch (IOException e) {
LOG.error("error opening index searcher", e);
}
}
/**
* Deletes the documents with the given ids from the index.
*/
private int deleteOldDocs(List<Integer> oldDocs)
{
int nb = 0;
for (Integer id : oldDocs) {
if (LOG.isDebugEnabled()) {
LOG.debug("delete doc " + id);
}
try {
this.reader.deleteDocument(id);
nb++;
} catch (IOException e1) {
LOG.error("error deleting doc " + id, e1);
}
}
return nb;
}
private List<Integer> getOldIndexDocIds(IndexData data)
{
List<Integer> retval = new ArrayList<Integer>(3);
Query query = data.buildQuery();
try {
Hits hits = this.searcher.search(query);
for (int i = 0; i < hits.length(); i++) {
retval.add(new Integer(hits.id(i)));
}
} catch (Exception e) {
LOG.error(String.format(
"Error looking for old versions of document [%s] with query [%s]", data, query),
e);
}
return retval;
}
private void openWriter(boolean create)
{
if (writer != null) {
LOG.error("Writer already open and createWriter called");
return;
}
try {
// fix for windows by Daniel Cortes:
FSDirectory f = FSDirectory.getDirectory(indexDir);
writer = new IndexWriter(f, analyzer, create);
// writer = new IndexWriter (indexDir, analyzer, create);
writer.setUseCompoundFile(true);
if (LOG.isDebugEnabled()) {
LOG.debug("successfully opened index writer : " + indexDir);
}
} catch (IOException e) {
LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e);
}
}
private void closeWriter()
{
if (this.writer == null) {
LOG.error("Writer not open and closeWriter called");
return;
}
try {
this.writer.optimize();
} catch (IOException e1) {
LOG.error("Exception caught when optimizing Index", e1);
}
try {
this.writer.close();
} catch (Exception e) {
LOG.error("Exception caught when closing IndexWriter", e);
}
this.writer = null;
if (LOG.isDebugEnabled()) {
LOG.debug("closed writer.");
}
}
private void addToIndex(IndexData data, XWikiDocument doc, XWikiContext context)
throws IOException
{
if (LOG.isDebugEnabled()) {
LOG.debug("addToIndex: " + data);
}
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
data.addDataToLuceneDocument(luceneDoc, doc, context);
Field fld = null;
// collecting all the fields for using up in search
for (Iterator<Field> it = luceneDoc.getFields().iterator(); it.hasNext();) {
fld = it.next();
if (!fields.contains(fld.name())) {
fields.add(fld.name());
}
}
this.writer.addDocument(luceneDoc);
}
/**
* @param indexDir The indexDir to set.
*/
public void setIndexDir(String indexDir)
{
this.indexDir = indexDir;
}
/**
* @param analyzer The analyzer to set.
*/
public void setAnalyzer(Analyzer analyzer)
{
this.analyzer = analyzer;
}
public synchronized void init(Properties config, LucenePlugin plugin, XWikiContext context)
{
this.xwiki = context.getWiki();
this.context = (XWikiContext) context.clone();
this.context.setDatabase(this.context.getMainXWiki());
this.plugin = plugin;
// take the first configured index dir as the one for writing
// String[] indexDirs =
// StringUtils.split(config.getProperty(LucenePlugin.PROP_INDEX_DIR), "
// ,");
String[] indexDirs = StringUtils.split(plugin.getIndexDirs(), ",");
if (indexDirs != null && indexDirs.length > 0) {
this.indexDir = indexDirs[0];
File f = new File(indexDir);
if (!f.isDirectory()) {
f.mkdirs();
this.needInitialBuild = true;
}
if (!IndexReader.indexExists(f)) {
this.needInitialBuild = true;
}
}
this.indexingInterval =
1000 * Integer
.parseInt(config.getProperty(LucenePlugin.PROP_INDEXING_INTERVAL, "30"));
this.maxQueueSize =
Integer.parseInt(config.getProperty(LucenePlugin.PROP_MAX_QUEUE_SIZE, "1000"));
// Note: There's no need to open the Searcher here (with a call to
// openSearcher()) as each task needing it will open it itself.
}
public void cleanIndex()
{
if (LOG.isInfoEnabled()) {
LOG.info("trying to clear index for rebuilding");
}
while (writer != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("waiting for existing index writer to close");
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
synchronized (this) {
openWriter(true);
closeWriter();
}
}
public void add(XWikiDocument document, XWikiContext context)
{
this.queue.add(new DocumentData(document, context));
if (document.hasElement(XWikiDocument.HAS_OBJECTS)) {
addObject(document, context);
}
}
public void addObject(XWikiDocument document, XWikiContext context)
{
this.queue.add(new ObjectData(document, context));
}
public void add(XWikiDocument document, XWikiAttachment attachment, XWikiContext context)
{
if (document != null && attachment != null && context != null) {
this.queue.add(new AttachmentData(document, attachment, context));
} else {
LOG.error("invalid parameters given to add: " + document + ", " + attachment + ", "
+ context);
}
}
public int addAttachmentsOfDocument(XWikiDocument document, XWikiContext context)
{
int retval = 0;
final List<XWikiAttachment> attachmentList = document.getAttachmentList();
retval += attachmentList.size();
for (XWikiAttachment attachment : attachmentList) {
try {
add(document, attachment, context);
} catch (Exception e) {
LOG.error("error retrieving attachment of document " + document.getFullName(), e);
}
}
return retval;
}
/**
* Notification of changes in document content
*
* @see com.xpn.xwiki.notify.XWikiNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
* com.xpn.xwiki.doc.XWikiDocument,com.xpn.xwiki.doc.XWikiDocument,
* int,com.xpn.xwiki.XWikiContext)
*/
public void notify(XWikiNotificationRule rule, XWikiDocument newDoc, XWikiDocument oldDoc,
int event, XWikiContext context)
{
if (LOG.isDebugEnabled()) {
LOG.debug("notify from XWikiDocChangeNotificationInterface, event=" + event
+ ", newDoc=" + newDoc + " oldDoc=" + oldDoc);
}
try {
add(newDoc, context);
} catch (Exception e) {
LOG.error("error in notify", e);
}
}
/**
* Notification of attachment uploads.
*
* @see com.xpn.xwiki.notify.XWikiActionNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
* com.xpn.xwiki.doc.XWikiDocument,java.lang.String,com.xpn.xwiki.XWikiContext)
*/
public void notify(XWikiNotificationRule arg0, XWikiDocument doc, String action,
XWikiContext context)
{
if ("upload".equals(action)) {
if (LOG.isDebugEnabled()) {
LOG.debug("upload action notification for doc " + doc.getName());
}
try {
// Retrieve the latest version (with the file just attached)
XWikiDocument basedoc = context.getWiki().getDocument(doc.getFullName(), context);
List<XWikiAttachment> attachments = basedoc.getAttachmentList();
// find out the most recently changed attachment
XWikiAttachment newestAttachment = null;
for (XWikiAttachment attachment : attachments) {
if ((newestAttachment == null)
|| attachment.getDate().after(newestAttachment.getDate())) {
newestAttachment = attachment;
}
}
add(basedoc, newestAttachment, context);
} catch (Exception e) {
LOG.error("error in notify", e);
}
}
}
/**
* @return the number of documents in the queue.
*/
public long getQueueSize()
{
return this.queue.getSize();
}
/**
* @return the number of documents Lucene index writer.
*/
public long getLuceneDocCount()
{
if (writer != null)
return writer.docCount();
else {
try {
openWriter(false);
return writer.docCount();
} catch (Exception e) {
return -1;
} finally {
closeWriter();
}
}
}
/**
* @return the number of documents in the second queue gave to Lucene.
*/
public long getActiveQueueSize()
{
return this.activesIndexedDocs;
}
public boolean isIndexed(String wiki, String pagename) {
try {
openSearcher();
QueryParser qp = new QueryParser(IndexFields.DOCUMENT_FULLNAME, analyzer);
Query query = qp.parse(pagename);
Hits hits = searcher.search(query);
if (LOG.isDebugEnabled()) {
LOG.debug("search for document " + pagename + " in wiki " + wiki + " returned " + hits.length() + " hits");
}
if (hits==null || hits.length()==0) {
if (LOG.isDebugEnabled()) {
LOG.debug("document " + pagename + " in wiki " + wiki + " is not indexed");
}
return false;
}
Iterator it = hits.iterator();
while (it.hasNext()) {
Hit hit = (Hit) it.next();
Document doc = hit.getDocument();
if (wiki.equals(doc.get(IndexFields.DOCUMENT_WIKI))&&pagename.equals(doc.get(IndexFields.DOCUMENT_FULLNAME))) {
if (LOG.isDebugEnabled()) {
LOG.debug("document " + pagename + " in wiki " + wiki + " is already indexed");
}
return true;
}
}
if (LOG.isErrorEnabled()) {
LOG.error("document " + pagename + " in wiki " + wiki + " was not found in search result");
}
return false;
} catch (Exception e) {
if (LOG.isErrorEnabled()) {
LOG.debug("error checking if document " + pagename + " in wiki " + wiki + " is indexed", e);
}
return false;
} finally {
closeSearcher();
}
}
}