/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package com.xpn.xwiki.plugin.lucene; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.log4j.MDC; import com.xpn.xwiki.XWiki; import com.xpn.xwiki.XWikiContext; import com.xpn.xwiki.XWikiException; import com.xpn.xwiki.doc.XWikiDocument; /** * <p> * Handles rebuilding of the whole Lucene Search Index. This involves the following steps: * <ul> * <li>empty the existing index</li> * <li>retrieve the names of all virtual wikis</li> * <li>foreach document in each virtual wiki: * <ul> * <li>index the document</li> * <li>get and index all translations of the document</li> * <li>get and index all attachments of the document</li> * <li>get and index all objects of the document</li> * </ul> * </li> * </ul> * The rebuild can be triggered using the {@link LucenePluginApi#rebuildIndex()} method of the * {@link LucenePluginApi}. Once a rebuild request is made, a new thread is created, so the * requesting script can continue processing, while the rebuilding is done in the background. The * actual indexing is done by the IndexUpdater thread, this thread just gathers the data and passes * it to the IndexUpdater. * </p> * <p> * As a summary, this plugin: * <ul> * <li>cleans the Lucene search indexes and re-submits all the contents of all the wikis for * indexing</li> * <li>without clogging the indexing thread (since 1.2)</li> * <li>all in a background thread (since 1.2)</li> * <li>making sure that only one rebuild is in progress (since 1.2)</li> * </ul> * </p> * * @version $Id: $ */ public class IndexRebuilder extends AbstractXWikiRunnable { /** Logging helper. */ private static final Log LOG = LogFactory.getLog(IndexRebuilder.class); /** The actual object/thread that indexes data. */ private IndexUpdater indexUpdater; /** The XWiki context. */ private XWikiContext context; /** Amount of time (milliseconds) to sleep while waiting for the indexing queue to empty. */ private static int retryInterval = 30000; /** Variable used for indicating that a rebuild is already in progress. */ private boolean rebuildInProgress = false; /** sql query to reindex with */ private String sql = null; /** clear index before rebuilding */ private boolean clearIndex = false; /** only index if the page is not in the index */ private boolean refresh = false; /** documents currently being checked **/ private long tocheck = 0; /** documents that had to be refreshed **/ private List torefresh = new ArrayList(); public IndexRebuilder(IndexUpdater indexUpdater, XWikiContext context) { this.indexUpdater = indexUpdater; if (indexUpdater.needInitialBuild) { this.startRebuildIndex(null, false, false, context); LOG.info("Launched initial lucene indexing"); } } public synchronized int startRebuildIndex(String sql, boolean clearIndex, boolean refresh, XWikiContext context) { if (rebuildInProgress) { LOG.warn("Cannot launch rebuild because a build is in progress"); return LucenePluginApi.REBUILD_IN_PROGRESS; } else { this.rebuildInProgress = true; this.context = context; this.sql = sql; this.clearIndex = clearIndex; this.refresh = refresh; Thread indexRebuilderThread = new Thread(this, "Lucene Index Rebuilder"); // The JVM should be allowed to shutdown while this thread is running indexRebuilderThread.setDaemon(true); // Client requests are more important than indexing indexRebuilderThread.setPriority(3); // Finally, start the rebuild in the background indexRebuilderThread.start(); // Too bad that now we can't tell how many items are there to be indexed... return 0; } } public void run() { MDC.put("url", "Lucene index rebuilder thread"); LOG.debug("Starting lucene index rebuild"); XWikiContext context = null; try { // The context must be cloned, as otherwise setDatabase() might affect the response to // the current request. // TODO This is not a good way to do this; ideally there would be a method that creates // a new context and copies only a few needed objects, as some objects are not supposed // to be used in 2 different contexts. // TODO This seems to work on a simple run: // context = new XWikiContext(); // context.setWiki(this.context.getWiki()); // context.setEngineContext(this.context.getEngineContext()); // context.setMode(this.context.getMode()); // context.setAction(this.context.getAction()); // context.put("msg", this.context.get("msg")); // context.setMainXWiki(this.context.getMainXWiki()); // context.setURLFactory(this.context.getURLFactory()); // context.setLanguage(this.context.getLanguage()); // context.setDatabase(this.context.getDatabase()); // context.put("org.xwiki.component.manager.ComponentManager", this.context // .get("org.xwiki.component.manager.ComponentManager")); context = (XWikiContext) this.context.clone(); this.context = null; // For example, we definitely don't want to use the same hibernate session... context.remove("hibsession"); context.remove("hibtransaction"); // This is also causing seriuos problems, as the same xcontext gets shared between // threads and causes the hibernate session to be shared in the end. The vcontext is // automatically recreated by the velocity renderer, if it isn't found in the xcontext. context.remove("vcontext"); // Since this is where a new thread is created this is where we need to initialize the Container // ThreadLocal variables and not in the init() method. Otherwise we would simply overwrite the // Container values for the main thread... initXWikiContainer(context); // The original request and response should not be used outside the actual request // processing thread, as they will be cleaned later by the container. context.setRequest(null); context.setResponse(null); rebuildIndex(context); } catch (Exception e) { LOG.error("Error in lucene rebuild thread", e); } finally { rebuildInProgress = false; // Cleanup Container component (it has ThreadLocal variables) cleanupXWikiContainer(context); if (context != null) { context.getWiki().getStore().cleanUp(context); } MDC.remove("url"); } LOG.debug("Lucene index rebuild done"); } /** * First empties the index, then fetches all Documents, their translations and their attachments * for re-addition to the index. * * @param context * @return total number of documents and attachments successfully added to the indexer queue, -1 * when errors occured. */ private int rebuildIndex(XWikiContext context) { // only clear index if it is asked if (this.clearIndex) this.indexUpdater.cleanIndex(); int retval = 0; Collection<String> wikiServers; XWiki xwiki = context.getWiki(); if (xwiki.isVirtualMode()) { wikiServers = findWikiServers(context); if (LOG.isDebugEnabled()) { LOG.debug("found " + wikiServers.size() + " virtual wikis:"); for (String wikiName : wikiServers) { LOG.debug(wikiName); } } } else { // No virtual wiki configuration, just index the wiki the context belongs to wikiServers = new ArrayList<String>(); wikiServers.add(context.getDatabase()); } // Iterate all found virtual wikis for (String wikiName : wikiServers) { int wikiResult = indexWiki(wikiName, context); if (wikiResult > 0) { retval += wikiResult; } } return retval; } public long getPreIndexQueueSize() { return tocheck; } public List getRefreshedDocuments() { return torefresh; } /** * Adds the content of a given wiki to the indexUpdater's queue. * * @param wikiName * @param context * @return */ protected int indexWiki(String wikiName, XWikiContext context) { LOG.info("Reading content of wiki " + wikiName); // Number of index entries processed int retval = 0; XWiki xwiki = context.getWiki(); String database = context.getDatabase(); if (refresh) { torefresh.clear(); } try { context.setDatabase(wikiName); Collection<String> docNames = null; try { docNames = xwiki.getStore().searchDocumentsNames((this.sql!=null) ? this.sql : "", context); } catch (XWikiException ex) { LOG.warn(String.format( "Error getting document names for wiki [%s]. Internal error is: $s", wikiName, ex.getMessage())); return -1; } // update to check size tocheck = (docNames==null) ? 0 : docNames.size(); for (String docName : docNames) { tocheck--; if (refresh) { // we should check if the page exists in the index if (this.indexUpdater.isIndexed(wikiName, docName)) { if (LOG.isDebugEnabled()) LOG.debug("bypassing document " + wikiName + ":" + docName); continue; } else { torefresh.add(wikiName + ":" + docName); } } if (LOG.isDebugEnabled()) LOG.debug("indexing document " + wikiName + ":" + docName); XWikiDocument document; try { document = xwiki.getDocument(docName, context); } catch (XWikiException e2) { LOG.error("error fetching document " + wikiName + ":" + docName, e2); continue; } if (document != null) { // In order not to load the whole database in memory, we're limiting the number // of documents that are in the processing queue at a moment. We could use a // Bounded Queue in the index updater, but that would generate exceptions in the // rest of the platform, as the index rebuilder could fill the queue, and then a // user trying to save a document would cause an exception. Thus, it is better // to limit the index rebuilder thread only, and not the index updater. while (this.indexUpdater.getQueueSize() > this.indexUpdater.maxQueueSize) { try { // Don't leave any database connections open while sleeping // This shouldn't be needed, but we never know what bugs might be there context.getWiki().getStore().cleanUp(context); Thread.sleep(retryInterval); } catch (InterruptedException e) { return -2; } } this.indexUpdater.add(document, context); retval++; retval += addTranslationsOfDocument(document, context); retval += this.indexUpdater.addAttachmentsOfDocument(document, context); retval += addObjectsOfDocument(document, context); } else { if (LOG.isInfoEnabled()) { LOG.info("XWiki delivered null for document name " + wikiName + ":" + docName); } } } } finally { context.setDatabase(database); } return retval; } /** * Getting the content(values of title/category/content/extract properties ) from the * XWiki.ArticleClass objects */ private int addObjectsOfDocument(XWikiDocument document, XWikiContext wikiContext) { int retval = 0; if (document.hasElement(XWikiDocument.HAS_OBJECTS)) { retval += document.getxWikiObjects().size(); this.indexUpdater.addObject(document, wikiContext); } return retval; } protected int addTranslationsOfDocument(XWikiDocument document, XWikiContext wikiContext) { int retval = 0; List<String> translations; try { translations = document.getTranslationList(wikiContext); } catch (XWikiException e) { LOG.error("error getting list of translations from document " + document.getFullName(), e); e.printStackTrace(); return 0; } for (String lang : translations) { try { this.indexUpdater.add(document.getTranslatedDocument(lang, wikiContext), wikiContext); retval++; } catch (XWikiException e1) { LOG.error("Error getting translated document for document " + document.getFullName() + " and language " + lang, e1); } } return retval; } private Collection<String> findWikiServers(XWikiContext context) { List<String> retval = Collections.emptyList(); try { retval = context.getWiki().getVirtualWikisDatabaseNames(context); if (!retval.contains(context.getMainXWiki())) { retval.add(context.getMainXWiki()); } } catch (Exception e) { LOG.error("Error getting list of wiki servers!", e); } return retval; } }