/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package com.xpn.xwiki.plugin.lucene; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Properties; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import com.xpn.xwiki.XWikiContext; import com.xpn.xwiki.api.Api; import com.xpn.xwiki.doc.XWikiAttachment; import com.xpn.xwiki.doc.XWikiDocument; import com.xpn.xwiki.notify.DocChangeRule; import com.xpn.xwiki.notify.XWikiActionRule; import com.xpn.xwiki.plugin.XWikiDefaultPlugin; import com.xpn.xwiki.plugin.XWikiPluginInterface; /** * A plugin offering support for advanced searches using Lucene, a high performance, open source * search engine. It uses an {@link IndexUpdater} to monitor and submit wiki pages for indexing to * the Lucene engine, and offers simple methods for searching documents, with the possiblity to sort * by one or several document fields (besides the default sort by relevance), filter by one or * several languages, and search in one, several or all virtual wikis. * * @version $Id: $ */ public class LucenePlugin extends XWikiDefaultPlugin implements XWikiPluginInterface { public static final String DOCTYPE_WIKIPAGE = "wikipage"; public static final String DOCTYPE_OBJECTS = "objects"; public static final String DOCTYPE_ATTACHMENT = "attachment"; public static final String PROP_INDEX_DIR = "xwiki.plugins.lucene.indexdir"; public static final String PROP_ANALYZER = "xwiki.plugins.lucene.analyzer"; public static final String PROP_INDEXING_INTERVAL = "xwiki.plugins.lucene.indexinterval"; public static final String PROP_MAX_QUEUE_SIZE = "xwiki.plugins.lucene.maxQueueSize"; private static final String DEFAULT_ANALYZER = "org.apache.lucene.analysis.standard.StandardAnalyzer"; private static final Log LOG = LogFactory.getLog(LucenePlugin.class); /** * The Lucene text analyzer, can be configured in <tt>xwiki.cfg</tt> using the key * {@link #PROP_ANALYZER} (<tt>xwiki.plugins.lucene.analyzer</tt>). */ private Analyzer analyzer; /** * Lucene index updater. Listens for changes and indexes wiki documents in a separate thread. */ private IndexUpdater indexUpdater; /** The thread running the index updater. */ private Thread indexUpdaterThread; protected Properties config; /** * List of Lucene indexes used for searching. By default there is only one such index for all * the wiki. One searches is created for each entry in {@link #indexDirs}. */ private Searcher[] searchers; /** * Comma separated list of directories holding Lucene index data. The first such directory is * used by the internal indexer. Can be configured in <tt>xwiki.cfg</tt> using the key * {@link #PROP_INDEX_DIR} (<tt>xwiki.plugins.lucene.indexdir</tt>). If no directory is * configured, then a subdirectory <tt>lucene</tt> in the application's work directory is * used. */ private String indexDirs; private IndexRebuilder indexRebuilder; public DocChangeRule docChangeRule = null; public XWikiActionRule xwikiActionRule = null; public LucenePlugin(String name, String className, XWikiContext context) { super(name, className, context); } /** * {@inheritDoc} * * @see java.lang.Object#finalize() */ protected void finalize() throws Throwable { LOG.error("Lucene plugin will exit !"); if (indexUpdater != null) { indexUpdater.doExit(); } super.finalize(); } public synchronized int rebuildIndex(XWikiContext context) { return indexRebuilder.startRebuildIndex(null, true, false, context); } public synchronized int rebuildIndex(boolean clearIndex, boolean refresh, XWikiContext context) { return indexRebuilder.startRebuildIndex(null, clearIndex, refresh, context); } public synchronized int reindexFromQuery(String sql, boolean clearIndex, boolean refresh, XWikiContext context) { return indexRebuilder.startRebuildIndex(sql, clearIndex, refresh, context); } /** * Allows to search special named lucene indexes without having to configure them in * <tt>xwiki.cfg</tt>. Slower than * {@link #getSearchResults(String, String, String, String, XWikiContext)} since new index * searcher instances are created for every query. * * @param query The base query, using the query engine supported by Lucene. * @param myIndexDirs Comma separated list of directories containing the lucene indexes to * search. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param context The context of the request. * @return The list of search results. * @throws Exception If the index directories cannot be read, or the query is invalid. */ public SearchResults getSearchResultsFromIndexes(String query, String myIndexDirs, String languages, XWikiContext context) throws Exception { Searcher[] mySearchers = createSearchers(myIndexDirs); SearchResults retval = search(query, (String) null, null, languages, mySearchers, context); closeSearchers(mySearchers); return retval; } /** * Allows to search special named lucene indexes without having to configure them in xwiki.cfg. * Slower than {@link #getSearchResults}since new index searcher instances are created for * every query. * * @param query The base query, using the query engine supported by Lucene. * @param sortFields A list of fields to sort results by. For each field, if the name starts * with '-', then that field (excluding the -) is used for reverse sorting. If * <tt>null</tt> or empty, sort by hit score. * @param myIndexDirs Comma separated list of directories containing the lucene indexes to * search. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param context The context of the request. * @return The list of search results. * @throws Exception If the index directories cannot be read, or the query is invalid. */ public SearchResults getSearchResultsFromIndexes(String query, String[] sortFields, String myIndexDirs, String languages, XWikiContext context) throws Exception { Searcher[] mySearchers = createSearchers(myIndexDirs); SearchResults retval = search(query, sortFields, null, languages, mySearchers, context); closeSearchers(mySearchers); return retval; } /** * Allows to search special named lucene indexes without having to configure them in * <tt>xwiki.cfg</tt>. Slower than * {@link #getSearchResults(String, String, String, String, XWikiContext)} since new index * searcher instances are created for every query. * * @param query The base query, using the query engine supported by Lucene. * @param sortField The name of a field to sort results by. If the name starts with '-', then * the field (excluding the -) is used for reverse sorting. If <tt>null</tt> or * empty, sort by hit score. * @param myIndexDirs Comma separated list of directories containing the lucene indexes to * search. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param context The context of the request. * @return The list of search results. * @throws Exception If the index directories cannot be read, or the query is invalid. */ public SearchResults getSearchResultsFromIndexes(String query, String sortField, String myIndexDirs, String languages, XWikiContext context) throws Exception { Searcher[] mySearchers = createSearchers(myIndexDirs); SearchResults retval = search(query, sortField, null, languages, mySearchers, context); closeSearchers(mySearchers); return retval; } /** * Searches all Indexes configured in <tt>xwiki.cfg</tt> (property * <code>xwiki.plugins.lucene.indexdir</code>). * * @param query The base query, using the query engine supported by Lucene. * @param sortField The name of a field to sort results by. If the name starts with '-', then * the field (excluding the -) is used for reverse sorting. If <tt>null</tt> or * empty, sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @return The list of search results. * @param context The context of the request. * @throws Exception If the index directories cannot be read, or the query is invalid. */ public SearchResults getSearchResults(String query, String sortField, String virtualWikiNames, String languages, XWikiContext context) throws Exception { // TODO Why is this here? This is slow, as it closes and opens indexes for each query. // openSearchers(); return search(query, sortField, virtualWikiNames, languages, this.searchers, context); } /** * Searches all Indexes configured in <tt>xwiki.cfg</tt> (property * <code>xwiki.plugins.lucene.indexdir</code>). * * @param query The base query, using the query engine supported by Lucene. * @param sortField The name of a field to sort results by. If the name starts with '-', then * the field (excluding the -) is used for reverse sorting. If <tt>null</tt> or * empty, sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @return The list of search results. * @param context The context of the request. * @throws Exception If the index directories cannot be read, or the query is invalid. */ public SearchResults getSearchResults(String query, String[] sortField, String virtualWikiNames, String languages, XWikiContext context) throws Exception { return search(query, sortField, virtualWikiNames, languages, this.searchers, context); } /** * Creates and submits a query to the Lucene engine. * * @param query The base query, using the query engine supported by Lucene. * @param sortField The name of a field to sort results by. If the name starts with '-', then * the field (excluding the -) is used for reverse sorting. If <tt>null</tt> or * empty, sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param indexes List of Lucene indexes (searchers) to search. * @param context The context of the request. * @return The list of search results. * @throws IOException If the Lucene searchers encounter a problem reading the indexes. * @throws ParseException If the query is not valid. */ private SearchResults search(String query, String sortField, String virtualWikiNames, String languages, Searcher[] indexes, XWikiContext context) throws IOException, ParseException { SortField sort = getSortField(sortField); // Perform the actual search return search(query, (sort != null) ? new Sort(sort) : null, virtualWikiNames, languages, indexes, context); } /** * Creates and submits a query to the Lucene engine. * * @param query The base query, using the query engine supported by Lucene. * @param sortFields A list of fields to sort results by. For each field, if the name starts * with '-', then that field (excluding the -) is used for reverse sorting. If * <tt>null</tt> or empty, sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param indexes List of Lucene indexes (searchers) to search. * @param context The context of the request. * @return The list of search results. * @throws IOException If the Lucene searchers encounter a problem reading the indexes. * @throws ParseException If the query is not valid. */ private SearchResults search(String query, String[] sortFields, String virtualWikiNames, String languages, Searcher[] indexes, XWikiContext context) throws IOException, ParseException { // Turn the sorting field names into SortField objects. SortField[] sorts = null; if (sortFields != null && sortFields.length > 0) { sorts = new SortField[sortFields.length]; for (int i = 0; i < sortFields.length; ++i) { sorts[i] = getSortField(sortFields[i]); } // Remove any null values from the list. int prevLength = -1; while (prevLength != sorts.length) { prevLength = sorts.length; sorts = (SortField[]) ArrayUtils.removeElement(sorts, null); } } // Perform the actual search return search(query, (sorts != null) ? new Sort(sorts) : null, virtualWikiNames, languages, indexes, context); } /** * Creates and submits a query to the Lucene engine. * * @param query The base query, using the query engine supported by Lucene. * @param sort A Lucene sort object, can contain one or more sort criterias. If <tt>null</tt>, * sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param indexes List of Lucene indexes (searchers) to search. * @param context The context of the request. * @return The list of search results. * @throws IOException If the Lucene searchers encounter a problem reading the indexes. * @throws ParseException If the query is not valid. */ private SearchResults search(String query, Sort sort, String virtualWikiNames, String languages, Searcher[] indexes, XWikiContext context) throws IOException, ParseException { MultiSearcher searcher = new MultiSearcher(indexes); // Enhance the base query with wiki names and languages. Query q = buildQuery(query, virtualWikiNames, languages); // Perform the actual search Hits hits = (sort == null) ? searcher.search(q) : searcher.search(q, sort); final int hitcount = hits.length(); if (LOG.isDebugEnabled()) { LOG.debug("query " + q + " returned " + hitcount + " hits"); } // Transform the raw Lucene search results into XWiki-aware results return new SearchResults(hits, new com.xpn.xwiki.api.XWiki(context.getWiki(), context), context); } /** * Create a {@link SortField} corresponding to the field name. If the field name starts with * '-', then the field (excluding the leading -) will be used for reverse sorting. * * @param sortField The name of the field to sort by. If <tt>null</tt>, return a * <tt>null</tt> SortField. If starts with '-', then return a SortField that does a * reverse sort on the field. * @return A SortFiled that sorts on the given field, or <tt>null</tt>. */ private SortField getSortField(String sortField) { SortField sort = null; if (!StringUtils.isEmpty(sortField)) { if (sortField.startsWith("-")) { sort = new SortField(sortField.substring(1), true); } else { sort = new SortField(sortField); } } return sort; } /** * @param query * @param virtualWikiNames comma separated list of virtual wiki names * @param languages comma separated list of language codes to search in, may be null to search * all languages */ private Query buildQuery(String query, String virtualWikiNames, String languages) throws ParseException { // build a query like this: <user query string> AND <wikiNamesQuery> AND // <languageQuery> BooleanQuery bQuery = new BooleanQuery(); Query parsedQuery = null; // for object search if (query.startsWith("PROP ")) { String property = query.substring(0, query.indexOf(":")); query = query.substring(query.indexOf(":") + 1, query.length()); QueryParser qp = new QueryParser(property, analyzer); parsedQuery = qp.parse(query); bQuery.add(parsedQuery, BooleanClause.Occur.MUST); } else if (query.startsWith("MULTI ")) { // for fulltext search List<String> fieldList = IndexUpdater.fields; String[] fields = fieldList.toArray(new String[fieldList.size()]); BooleanClause.Occur[] flags = new BooleanClause.Occur[fields.length]; for (int i = 0; i < flags.length; i++) { flags[i] = BooleanClause.Occur.SHOULD; } parsedQuery = MultiFieldQueryParser.parse(query, fields, flags, analyzer); bQuery.add(parsedQuery, BooleanClause.Occur.MUST); } else { QueryParser qp = new QueryParser("ft", analyzer); parsedQuery = qp.parse(query); bQuery.add(parsedQuery, BooleanClause.Occur.MUST); } if (virtualWikiNames != null && virtualWikiNames.length() > 0) { bQuery.add(buildOredTermQuery(virtualWikiNames, IndexFields.DOCUMENT_WIKI), BooleanClause.Occur.MUST); } if (languages != null && languages.length() > 0) { bQuery.add(buildOredTermQuery(languages, IndexFields.DOCUMENT_LANGUAGE), BooleanClause.Occur.SHOULD); } return bQuery; } /** * @param values comma separated list of values to look for * @return A query returning documents matching one of the given values in the given field */ private Query buildOredTermQuery(final String values, final String fieldname) { String[] valueArray = values.split("\\,"); if (valueArray.length > 1) { // build a query like this: <valueArray[0]> OR <valueArray[1]> OR ... BooleanQuery orQuery = new BooleanQuery(); for (int i = 0; i < valueArray.length; i++) { orQuery.add(new TermQuery(new Term(fieldname, valueArray[i].trim())), BooleanClause.Occur.SHOULD); } return orQuery; } // exactly one value, no OR'ed Terms necessary return new TermQuery(new Term(fieldname, valueArray[0])); } public synchronized void init(XWikiContext context) { super.init(context); if (LOG.isDebugEnabled()) { LOG.debug("lucene plugin: in init"); } config = context.getWiki().getConfig(); try { analyzer = (Analyzer) Class.forName(config.getProperty(PROP_ANALYZER, DEFAULT_ANALYZER)) .newInstance(); } catch (Exception e) { LOG.error("error instantiating analyzer : ", e); LOG.warn("using default analyzer class: " + DEFAULT_ANALYZER); try { analyzer = (Analyzer) Class.forName(DEFAULT_ANALYZER).newInstance(); } catch (Exception e1) { throw new RuntimeException("instantiation of default analyzer " + DEFAULT_ANALYZER + " failed", e1); } } this.indexDirs = config.getProperty(PROP_INDEX_DIR); if (indexDirs == null || indexDirs.equals("")) { File workDir = context.getWiki().getWorkSubdirectory("lucene", context); indexDirs = workDir.getAbsolutePath(); } indexUpdater = new IndexUpdater(); indexUpdater.setAnalyzer(analyzer); indexUpdater.init(config, this, context); indexUpdaterThread = new Thread(indexUpdater, "Lucene Index Updater"); indexUpdaterThread.start(); indexRebuilder = new IndexRebuilder(indexUpdater, context); docChangeRule = new DocChangeRule(indexUpdater); xwikiActionRule = new XWikiActionRule(indexUpdater); openSearchers(); context.getWiki().getNotificationManager().addGeneralRule(docChangeRule); context.getWiki().getNotificationManager().addGeneralRule(xwikiActionRule); LOG.info("lucene plugin initialized."); } public void flushCache(XWikiContext context) { context.getWiki().getNotificationManager().removeGeneralRule(xwikiActionRule); context.getWiki().getNotificationManager().removeGeneralRule(docChangeRule); indexRebuilder = null; indexUpdaterThread.stop(); try { closeSearchers(this.searchers); } catch (IOException e) { LOG.warn("cannot close searchers"); } indexUpdater = null; analyzer = null; init(context); } public String getName() { return "lucene"; } public Api getPluginApi(XWikiPluginInterface plugin, XWikiContext context) { return new LucenePluginApi((LucenePlugin) plugin, context); } /** * Creates an array of Searchers for a number of lucene indexes. * * @param indexDirs Comma separated list of Lucene index directories to create searchers for. * @return Array of searchers */ public Searcher[] createSearchers(String indexDirs) throws Exception { String[] dirs = StringUtils.split(indexDirs, ","); List<IndexSearcher> searchersList = new ArrayList<IndexSearcher>(); for (int i = 0; i < dirs.length; i++) { try { if (!IndexReader.indexExists(dirs[i])) { // If there's no index there, create an empty one; otherwise the reader // constructor will throw an exception and fail to initialize new IndexWriter(dirs[i], analyzer).close(); } searchersList.add(new IndexSearcher(dirs[i], true)); } catch (IOException e) { LOG.error("cannot open index " + dirs[i], e); } } return searchersList.toArray(new Searcher[searchersList.size()]); } /** * Opens the searchers for the configured index Dirs after closing any already existing ones. */ protected synchronized void openSearchers() { try { closeSearchers(this.searchers); this.searchers = createSearchers(indexDirs); } catch (Exception e1) { LOG.error("error opening searchers for index dirs " + config.getProperty(PROP_INDEX_DIR), e1); throw new RuntimeException("error opening searchers for index dirs " + config.getProperty(PROP_INDEX_DIR), e1); } } /** * @throws IOException */ protected static void closeSearchers(Searcher[] searchers) throws IOException { if (searchers != null) { for (int i = 0; i < searchers.length; i++) { if (searchers[i] != null) { searchers[i].close(); } } } } public String getIndexDirs() { return indexDirs; } public long getQueueSize() { return indexUpdater.getQueueSize(); } public void queueDocument(XWikiDocument doc, XWikiContext context) { indexUpdater.add(doc, context); } public void queueAttachment(XWikiDocument doc, XWikiAttachment attach, XWikiContext context) { indexUpdater.add(doc, attach, context); } public void queueAttachment(XWikiDocument doc, XWikiContext context) { indexUpdater.addAttachmentsOfDocument(doc, context); } /** * @return the number of documents Lucene index writer. */ public long getLuceneDocCount() { return indexUpdater.getLuceneDocCount(); } /** * @return the number of documents in the second queue gave to Lucene. */ public long getActiveQueueSize() { return indexUpdater.getActiveQueueSize(); } public long getPreIndexQueueSize() { return (indexRebuilder==null) ? 0 : indexRebuilder.getPreIndexQueueSize(); } public List getRefreshedDocuments() { return (indexRebuilder==null) ? new ArrayList() : indexRebuilder.getRefreshedDocuments(); } }