ArchiveSearcher.java example

Explorer
Openfire-master
- dbutil
  - src
    - main
      - java
        org
        jivesoftware
        database
        AbstractConnection.java
        CallableStatementWrapper.java
        PreparedStatementWrapper.java
        ProfiledConnection.java
        ProfiledConnectionEntry.java
        StatementWrapper.java
- src
/*
 * Copyright (C) 2008 Jive Software. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.jivesoftware.openfire.archive;

import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.jivesoftware.database.CachedPreparedStatement;
import org.jivesoftware.database.DbConnectionManager;
import org.picocontainer.Startable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xmpp.packet.JID;

/**
 * Searches archived conversations. If conversation archiving is not enabled,
 * this class does nothing. Searches may or may not include keyword searching. When
 * keywords are used, the search is executed against the Lucene index. When keywords
 * are not used, the search is database driven (e.g., "get all conversations between
 * two users over the past year").
 *
 * @see ArchiveIndexer
 * @author Matt Tucker
 */
public class ArchiveSearcher implements Startable {

	private static final Logger Log = LoggerFactory.getLogger(ArchiveSearch.class);

    private ConversationManager conversationManager;
    private ArchiveIndexer archiveIndexer;

    /**
     * Constructs a new archive searcher.
     *
     * @param conversationManager a ConversationManager instance.
     * @param archiveIndexer a ArchiveIndexer used to search through the search index.
     */
    public ArchiveSearcher(ConversationManager conversationManager, ArchiveIndexer archiveIndexer) {
        this.conversationManager = conversationManager;
        this.archiveIndexer = archiveIndexer;
    }

    public void start() {

    }

    public void stop() {
        conversationManager = null;
        archiveIndexer = null;
    }

    /**
     * Searches the archive using the specified search. The {@link ArchiveSearch} class
     * is used to encapsulate all information about a search.
     *
     * @param search the search.
     * @return a Collection of conversations that match the search query.
     */
    public Collection<Conversation> search(ArchiveSearch search) {
        // If the search has a query string it will be driven by Lucene. Otherwise
        if (search.getQueryString() != null) {
            return luceneSearch(search);
        }
        else {
            return databaseSearch(search);
        }
    }

    /**
     * Searches the Lucene index for all archived conversations using the specified search.
     *
     * @param search the search.
     * @return the collection of conversations that match the search.
     */
    private Collection<Conversation> luceneSearch(ArchiveSearch search) {
        try {
            IndexSearcher searcher = archiveIndexer.getSearcher();

            final StandardAnalyzer analyzer = new StandardAnalyzer();

            // Create the query based on the search terms.
            Query query = new QueryParser("text", analyzer).parse(search.getQueryString());

            // See if the user wants to sort on something other than relevance. If so, we need
            // to tell Lucene to do sorting. Default to a null sort so that it has no
            // effect if sorting hasn't been selected.
            Sort sort = null;
            if (search.getSortField() != ArchiveSearch.SortField.relevance) {
                if (search.getSortField() == ArchiveSearch.SortField.date) {
                    sort =  new Sort("date", search.getSortOrder() == ArchiveSearch.SortOrder.descending);
                }
            }

            // See if we need to filter on date. Default to a null filter so that it has
            // no effect if date filtering hasn't been selected.
            Filter filter = null;
            if (search.getDateRangeMin() != null || search.getDateRangeMax() != null) {
                String min = null;
                if (search.getDateRangeMin() != null) {
                    min = DateTools.dateToString(search.getDateRangeMin(), DateTools.Resolution.DAY);
                }
                String max = null;
                if (search.getDateRangeMax() != null) {
                    max = DateTools.dateToString(search.getDateRangeMax(), DateTools.Resolution.DAY);
                }
                // ENT-271: don't include upper or lower bound if these elements are null
                filter = new RangeFilter("date", min, max, min != null, max != null );
            }

            // See if we need to match external conversations. This will only be true
            // when less than two conversation participants are specified and external
            // wildcard matching is enabled.
            Collection<JID> participants = search.getParticipants();
            if (search.getParticipants().size() < 2 && search.isExternalWildcardMode()) {
                TermQuery externalQuery = new TermQuery(new Term("external", "true"));
                // Add this query to the existing query.
                BooleanQuery booleanQuery = new BooleanQuery();
                booleanQuery.add(query, BooleanClause.Occur.MUST);
                booleanQuery.add(externalQuery, BooleanClause.Occur.MUST);
                query = booleanQuery;
            }

            // See if we need to restrict the search to certain users.
            if (!participants.isEmpty()) {
                if (participants.size() == 1) {
                    String jid = participants.iterator().next().toBareJID();
                    Query participantQuery = new QueryParser("jid", analyzer).parse(jid);

                    // Add this query to the existing query.
                    BooleanQuery booleanQuery = new BooleanQuery();
                    booleanQuery.add(query, BooleanClause.Occur.MUST);
                    booleanQuery.add(participantQuery, BooleanClause.Occur.MUST);
                    query = booleanQuery;
                }
                // Otherwise there are two participants.
                else {
                    Iterator<JID> iter = participants.iterator();
                    String participant1 = iter.next().toBareJID();
                    String participant2 = iter.next().toBareJID();
                    BooleanQuery participantQuery = new BooleanQuery();
                    participantQuery.add(new QueryParser("jid", analyzer).parse(participant1),
                            BooleanClause.Occur.MUST);
                    participantQuery.add(new QueryParser("jid", analyzer).parse(participant2),
                            BooleanClause.Occur.MUST);
                    // Add this query to the existing query.
                    BooleanQuery booleanQuery = new BooleanQuery();
                    booleanQuery.add(query, BooleanClause.Occur.MUST);
                    booleanQuery.add(participantQuery, BooleanClause.Occur.MUST);
                    query = booleanQuery;
                }
            }

            Hits hits = searcher.search(query, filter, sort);

            int startIndex = search.getStartIndex();
            int endIndex = startIndex + search.getNumResults() - 1;

            // The end index can't be after the end of the results.
            if (endIndex > hits.length() - 1) {

               // endIndex = hits.length() - 1;
                // TODO: We need to determine if this is necessary.
            }

            // If the start index is positioned after the end, return an empty list.
            if (((endIndex - startIndex) + 1) <= 0) {
                return Collections.emptyList();
            }
            // Otherwise return the results.
            else {
                return new LuceneQueryResults(hits, startIndex, endIndex);
            }
        }
        catch (ParseException pe) {
            Log.error(pe.getMessage(), pe);
            return Collections.emptySet();
        }
        catch (IOException ioe) {
            Log.error(ioe.getMessage(), ioe);
            return Collections.emptySet();
        }
    }

    /**
     * Searches the database for all archived conversations using the specified search.
     *
     * @param search the search.
     * @return the collection of conversations that match the search.
     */
    private Collection<Conversation> databaseSearch(ArchiveSearch search) {
        CachedPreparedStatement cachedPstmt = new CachedPreparedStatement();

        // Build the SQL
        StringBuilder query = new StringBuilder(160);
        query.append("SELECT DISTINCT ofConversation.conversationID");

        Collection<JID> participants = search.getParticipants();
        boolean filterParticipants = !participants.isEmpty();
        boolean filterDate = search.getDateRangeMin() != null || search.getDateRangeMax() != null;
        boolean filterTimestamp = search.getIncludeTimestamp() != null;
        boolean filterRoom = search.getRoom() != null;

        // SELECT -- need to add value that we sort on. We always sort on date since that's
        // the only valid current option for non-keyword searches.
        query.append(", ofConversation.startDate");

        // FROM -- values (in addition to jiveThread)
        query.append(" FROM ofConversation");
        if (filterParticipants) {
            for (int i=0; i < participants.size(); i++) {
                query.append(", ofConParticipant participant").append(i);
            }
        }

        // WHERE BLOCK
        boolean whereSet = false;
        // See if we need to match against external conversations.
        if (search.isExternalWildcardMode() && search.getParticipants().size() != 2) {
            query.append(" WHERE isExternal=?");
            cachedPstmt.addInt(1);
            whereSet = true;
        }
        // Participants
        if (filterParticipants) {
            Iterator<JID> iter = participants.iterator();
            for (int i=0; i < participants.size(); i++) {
                if (!whereSet) {
                    query.append(" WHERE");
                    whereSet = true;
                }
                else {
                    query.append(" AND");
                }
                query.append(" ofConversation.conversationID=participant").append(i).append(".conversationID");
                query.append(" AND ");
                query.append("participant").append(i).append(".bareJID=?");
                String partJID = iter.next().toString();
                cachedPstmt.addString(partJID);
            }
        }

        // Creation date range
        if (filterDate) {
            if (search.getDateRangeMin() != null) {
                if (!whereSet) {
                    query.append(" WHERE");
                    whereSet = true;
                }
                else {
                    query.append(" AND");
                }
                query.append(" ofConversation.startDate >= ?");
                cachedPstmt.addLong(search.getDateRangeMin().getTime());
            }
            if (search.getDateRangeMax() != null) {
                if (!whereSet) {
                    query.append(" WHERE");
                    whereSet = true;
                }
                else {
                    query.append(" AND");
                }
                query.append(" ofConversation.startDate <= ?");
                cachedPstmt.addLong(search.getDateRangeMax().getTime());
            }
        }

        // Check if conversations have to happen at a given point in time
        if (filterTimestamp) {
            if (!whereSet) {
                query.append(" WHERE");
                whereSet = true;
            }
            else {
                query.append(" AND");
            }
            query.append(" ofConversation.startDate <= ?");
            cachedPstmt.addLong(search.getIncludeTimestamp().getTime());

            query.append(" AND");
            query.append(" ofConversation.lastActivity >= ?");
            cachedPstmt.addLong(search.getIncludeTimestamp().getTime());
        }

        // Filter by room
        if (filterRoom) {
            if (!whereSet) {
                query.append(" WHERE");
                whereSet = true;
            }
            else {
                query.append(" AND");
            }
            query.append(" ofConversation.room = ?");
            cachedPstmt.addString(search.getRoom().toString());
        }

        // ORDER BY
        query.append(" ORDER BY ofConversation.startDate");
        if (search.getSortOrder() == ArchiveSearch.SortOrder.descending) {
            query.append(" DESC");
        }
        else {
            query.append(" ASC");
        }

        int startIndex = search.getStartIndex();
        int numResults = search.getNumResults();
        if (numResults != ArchiveSearch.NULL_INT) {
            // MySQL optimization: use the LIMIT command to tell the database how many
            // rows we need returned. The syntax is LIMIT [offset],[rows]
            if (DbConnectionManager.getDatabaseType() == DbConnectionManager.DatabaseType.mysql) {
                query.append(" LIMIT ").append(startIndex).append(",").append(numResults);
            }
            // PostgreSQL optimization: use the LIMIT command to tell the database how many
            // rows we need returned. The syntax is LIMIT [rows] OFFSET [offset]
            else if (DbConnectionManager.getDatabaseType() == DbConnectionManager.DatabaseType.postgresql) {
                query.append(" LIMIT ").append(numResults).append(" OFFSET ").append(startIndex);
            }
        }

        // Set the database query string.
        cachedPstmt.setSQL(query.toString());

        List<Long> conversationIDs = new ArrayList<Long>();

        // Get all matching conversations from the database.
        Connection con = null;
        PreparedStatement pstmt = null;
        try {
            con = DbConnectionManager.getConnection();
            pstmt = DbConnectionManager.createScrollablePreparedStatement(con, cachedPstmt.getSQL());
            cachedPstmt.setParams(pstmt);
            // Set the maximum number of rows to end at the end of this block.
            // A MySQL optimization using the LIMIT command is part of the SQL.
            // Therefore, we can skip this call on MySQL.
            if (DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.mysql
                && DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.postgresql)
            {
                DbConnectionManager.setMaxRows(pstmt, startIndex+numResults);
            }
            ResultSet rs = pstmt.executeQuery();
            // Position the cursor right before the first row that we're insterested in.
            // A MySQL and Postgres optimization using the LIMIT command is part of the SQL.
            // Therefore, we can skip this call on MySQL or Postgres.
            if (DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.mysql
                && DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.postgresql)
            {
                DbConnectionManager.scrollResultSet(rs, startIndex);
            }
            // Keep reading results until the result set is exhausted or
            // we come to the end of the block.
            int count = 0;
            while (rs.next() && count < numResults) {
                conversationIDs.add(rs.getLong(1));
                count++;
            }
            rs.close();
        }
        catch (SQLException sqle) {
            Log.error(sqle.getMessage(), sqle);
        }
        finally {
            DbConnectionManager.closeConnection(pstmt, con);
        }
        return new DatabaseQueryResults(conversationIDs);
    }

    /**
     * Returns Hits from a database search against archived conversations as a Collection
     * of Conversation objects.
     */
    private class DatabaseQueryResults extends AbstractCollection<Conversation> {

        private List<Long> conversationIDs;

        /**
         * Constructs a new query results object.
         *
         * @param conversationIDs the list of conversation IDs.
         */
        public DatabaseQueryResults(List<Long> conversationIDs) {
            this.conversationIDs = conversationIDs;
        }

        @Override
		public Iterator<Conversation> iterator() {
            final Iterator<Long> convIterator = conversationIDs.iterator();
            return new Iterator<Conversation>() {

                private Conversation nextElement = null;

                public boolean hasNext() {
                    if (nextElement == null) {
                        nextElement = getNextElement();
                        if (nextElement == null) {
                            return false;
                        }
                    }
                    return true;
                }

                public Conversation next() {
                	Conversation element;
                    if (nextElement != null) {
                        element = nextElement;
                        nextElement = null;
                    }
                    else {
                        element = getNextElement();
                        if (element == null) {
                            throw new NoSuchElementException();
                        }
                    }
                    return element;
                }

                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private Conversation getNextElement() {
                    if (!convIterator.hasNext()) {
                        return null;
                    }
                    while (convIterator.hasNext()) {
                        try {
                            long conversationID = convIterator.next();
                            return new Conversation(conversationManager, conversationID);
                        }
                        catch (Exception e) {
                            Log.error(e.getMessage(), e);
                        }
                    }
                    return null;
                }
            };
        }

        @Override
		public int size() {
            return conversationIDs.size();
        }
    }

    /**
     * Returns Hits from a Lucene search against archived conversations as a Collection
     * of Conversation objects.
     */
    private class LuceneQueryResults extends AbstractCollection<Conversation> {

        private Hits hits;
        private int index;
        private int endIndex;

        /**
         * Constructs a new query results object.
         *
         * @param hits the search hits.
         * @param startIndex the starting index that results should be returned from.
         * @param endIndex the ending index that results should be returned to.
         */
        public LuceneQueryResults(Hits hits, int startIndex, int endIndex) {
            this.hits = hits;
            this.index = startIndex;
            this.endIndex = endIndex;
        }

        @Override
		public Iterator<Conversation> iterator() {
            final Iterator<Hit> hitsIterator = hits.iterator();
            // Advance the iterator until we hit the index.
            for (int i=0; i<index; i++) {
                hitsIterator.next();
            }
            return new Iterator<Conversation>() {

                private Conversation nextElement = null;

                public boolean hasNext() {
                    if (nextElement == null) {
                        nextElement = getNextElement();
                        if (nextElement == null) {
                            return false;
                        }
                    }
                    return true;
                }

                public Conversation next() {
                	Conversation element;
                    if (nextElement != null) {
                        element = nextElement;
                        nextElement = null;
                    }
                    else {
                        element = getNextElement();
                        if (element == null) {
                            throw new NoSuchElementException();
                        }
                    }
                    return element;
                }

                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private Conversation getNextElement() {
                    if (!hitsIterator.hasNext()) {
                        return null;
                    }
                    // If we've reached the end index, stop iterating.
                    else if (index >= endIndex) {
                        return null;
                    }
                    while (hitsIterator.hasNext()) {
                        try {
                            Hit hit = hitsIterator.next();
                            // Advance the index.
                            index++;

                            long conversationID = Long.parseLong(hit.get("conversationID"));
                            return new Conversation(conversationManager, conversationID);
                        }
                        catch (Exception e) {
                            Log.error(e.getMessage(), e);
                        }
                    }
                    return null;
                }
            };
        }

        @Override
		public int size() {
            return hits.length();
        }
    }
}