/* * Copyright (C) 2008 Jive Software. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.jivesoftware.openfire.archive; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.AbstractCollection; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Hit; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.RangeFilter; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.jivesoftware.database.CachedPreparedStatement; import org.jivesoftware.database.DbConnectionManager; import org.picocontainer.Startable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xmpp.packet.JID; /** * Searches archived conversations. If conversation archiving is not enabled, * this class does nothing. Searches may or may not include keyword searching. When * keywords are used, the search is executed against the Lucene index. When keywords * are not used, the search is database driven (e.g., "get all conversations between * two users over the past year"). * * @see ArchiveIndexer * @author Matt Tucker */ public class ArchiveSearcher implements Startable { private static final Logger Log = LoggerFactory.getLogger(ArchiveSearch.class); private ConversationManager conversationManager; private ArchiveIndexer archiveIndexer; /** * Constructs a new archive searcher. * * @param conversationManager a ConversationManager instance. * @param archiveIndexer a ArchiveIndexer used to search through the search index. */ public ArchiveSearcher(ConversationManager conversationManager, ArchiveIndexer archiveIndexer) { this.conversationManager = conversationManager; this.archiveIndexer = archiveIndexer; } public void start() { } public void stop() { conversationManager = null; archiveIndexer = null; } /** * Searches the archive using the specified search. The {@link ArchiveSearch} class * is used to encapsulate all information about a search. * * @param search the search. * @return a Collection of conversations that match the search query. */ public Collection<Conversation> search(ArchiveSearch search) { // If the search has a query string it will be driven by Lucene. Otherwise if (search.getQueryString() != null) { return luceneSearch(search); } else { return databaseSearch(search); } } /** * Searches the Lucene index for all archived conversations using the specified search. * * @param search the search. * @return the collection of conversations that match the search. */ private Collection<Conversation> luceneSearch(ArchiveSearch search) { try { IndexSearcher searcher = archiveIndexer.getSearcher(); final StandardAnalyzer analyzer = new StandardAnalyzer(); // Create the query based on the search terms. Query query = new QueryParser("text", analyzer).parse(search.getQueryString()); // See if the user wants to sort on something other than relevance. If so, we need // to tell Lucene to do sorting. Default to a null sort so that it has no // effect if sorting hasn't been selected. Sort sort = null; if (search.getSortField() != ArchiveSearch.SortField.relevance) { if (search.getSortField() == ArchiveSearch.SortField.date) { sort = new Sort("date", search.getSortOrder() == ArchiveSearch.SortOrder.descending); } } // See if we need to filter on date. Default to a null filter so that it has // no effect if date filtering hasn't been selected. Filter filter = null; if (search.getDateRangeMin() != null || search.getDateRangeMax() != null) { String min = null; if (search.getDateRangeMin() != null) { min = DateTools.dateToString(search.getDateRangeMin(), DateTools.Resolution.DAY); } String max = null; if (search.getDateRangeMax() != null) { max = DateTools.dateToString(search.getDateRangeMax(), DateTools.Resolution.DAY); } // ENT-271: don't include upper or lower bound if these elements are null filter = new RangeFilter("date", min, max, min != null, max != null ); } // See if we need to match external conversations. This will only be true // when less than two conversation participants are specified and external // wildcard matching is enabled. Collection<JID> participants = search.getParticipants(); if (search.getParticipants().size() < 2 && search.isExternalWildcardMode()) { TermQuery externalQuery = new TermQuery(new Term("external", "true")); // Add this query to the existing query. BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query, BooleanClause.Occur.MUST); booleanQuery.add(externalQuery, BooleanClause.Occur.MUST); query = booleanQuery; } // See if we need to restrict the search to certain users. if (!participants.isEmpty()) { if (participants.size() == 1) { String jid = participants.iterator().next().toBareJID(); Query participantQuery = new QueryParser("jid", analyzer).parse(jid); // Add this query to the existing query. BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query, BooleanClause.Occur.MUST); booleanQuery.add(participantQuery, BooleanClause.Occur.MUST); query = booleanQuery; } // Otherwise there are two participants. else { Iterator<JID> iter = participants.iterator(); String participant1 = iter.next().toBareJID(); String participant2 = iter.next().toBareJID(); BooleanQuery participantQuery = new BooleanQuery(); participantQuery.add(new QueryParser("jid", analyzer).parse(participant1), BooleanClause.Occur.MUST); participantQuery.add(new QueryParser("jid", analyzer).parse(participant2), BooleanClause.Occur.MUST); // Add this query to the existing query. BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query, BooleanClause.Occur.MUST); booleanQuery.add(participantQuery, BooleanClause.Occur.MUST); query = booleanQuery; } } Hits hits = searcher.search(query, filter, sort); int startIndex = search.getStartIndex(); int endIndex = startIndex + search.getNumResults() - 1; // The end index can't be after the end of the results. if (endIndex > hits.length() - 1) { // endIndex = hits.length() - 1; // TODO: We need to determine if this is necessary. } // If the start index is positioned after the end, return an empty list. if (((endIndex - startIndex) + 1) <= 0) { return Collections.emptyList(); } // Otherwise return the results. else { return new LuceneQueryResults(hits, startIndex, endIndex); } } catch (ParseException pe) { Log.error(pe.getMessage(), pe); return Collections.emptySet(); } catch (IOException ioe) { Log.error(ioe.getMessage(), ioe); return Collections.emptySet(); } } /** * Searches the database for all archived conversations using the specified search. * * @param search the search. * @return the collection of conversations that match the search. */ private Collection<Conversation> databaseSearch(ArchiveSearch search) { CachedPreparedStatement cachedPstmt = new CachedPreparedStatement(); // Build the SQL StringBuilder query = new StringBuilder(160); query.append("SELECT DISTINCT ofConversation.conversationID"); Collection<JID> participants = search.getParticipants(); boolean filterParticipants = !participants.isEmpty(); boolean filterDate = search.getDateRangeMin() != null || search.getDateRangeMax() != null; boolean filterTimestamp = search.getIncludeTimestamp() != null; boolean filterRoom = search.getRoom() != null; // SELECT -- need to add value that we sort on. We always sort on date since that's // the only valid current option for non-keyword searches. query.append(", ofConversation.startDate"); // FROM -- values (in addition to jiveThread) query.append(" FROM ofConversation"); if (filterParticipants) { for (int i=0; i < participants.size(); i++) { query.append(", ofConParticipant participant").append(i); } } // WHERE BLOCK boolean whereSet = false; // See if we need to match against external conversations. if (search.isExternalWildcardMode() && search.getParticipants().size() != 2) { query.append(" WHERE isExternal=?"); cachedPstmt.addInt(1); whereSet = true; } // Participants if (filterParticipants) { Iterator<JID> iter = participants.iterator(); for (int i=0; i < participants.size(); i++) { if (!whereSet) { query.append(" WHERE"); whereSet = true; } else { query.append(" AND"); } query.append(" ofConversation.conversationID=participant").append(i).append(".conversationID"); query.append(" AND "); query.append("participant").append(i).append(".bareJID=?"); String partJID = iter.next().toString(); cachedPstmt.addString(partJID); } } // Creation date range if (filterDate) { if (search.getDateRangeMin() != null) { if (!whereSet) { query.append(" WHERE"); whereSet = true; } else { query.append(" AND"); } query.append(" ofConversation.startDate >= ?"); cachedPstmt.addLong(search.getDateRangeMin().getTime()); } if (search.getDateRangeMax() != null) { if (!whereSet) { query.append(" WHERE"); whereSet = true; } else { query.append(" AND"); } query.append(" ofConversation.startDate <= ?"); cachedPstmt.addLong(search.getDateRangeMax().getTime()); } } // Check if conversations have to happen at a given point in time if (filterTimestamp) { if (!whereSet) { query.append(" WHERE"); whereSet = true; } else { query.append(" AND"); } query.append(" ofConversation.startDate <= ?"); cachedPstmt.addLong(search.getIncludeTimestamp().getTime()); query.append(" AND"); query.append(" ofConversation.lastActivity >= ?"); cachedPstmt.addLong(search.getIncludeTimestamp().getTime()); } // Filter by room if (filterRoom) { if (!whereSet) { query.append(" WHERE"); whereSet = true; } else { query.append(" AND"); } query.append(" ofConversation.room = ?"); cachedPstmt.addString(search.getRoom().toString()); } // ORDER BY query.append(" ORDER BY ofConversation.startDate"); if (search.getSortOrder() == ArchiveSearch.SortOrder.descending) { query.append(" DESC"); } else { query.append(" ASC"); } int startIndex = search.getStartIndex(); int numResults = search.getNumResults(); if (numResults != ArchiveSearch.NULL_INT) { // MySQL optimization: use the LIMIT command to tell the database how many // rows we need returned. The syntax is LIMIT [offset],[rows] if (DbConnectionManager.getDatabaseType() == DbConnectionManager.DatabaseType.mysql) { query.append(" LIMIT ").append(startIndex).append(",").append(numResults); } // PostgreSQL optimization: use the LIMIT command to tell the database how many // rows we need returned. The syntax is LIMIT [rows] OFFSET [offset] else if (DbConnectionManager.getDatabaseType() == DbConnectionManager.DatabaseType.postgresql) { query.append(" LIMIT ").append(numResults).append(" OFFSET ").append(startIndex); } } // Set the database query string. cachedPstmt.setSQL(query.toString()); List<Long> conversationIDs = new ArrayList<Long>(); // Get all matching conversations from the database. Connection con = null; PreparedStatement pstmt = null; try { con = DbConnectionManager.getConnection(); pstmt = DbConnectionManager.createScrollablePreparedStatement(con, cachedPstmt.getSQL()); cachedPstmt.setParams(pstmt); // Set the maximum number of rows to end at the end of this block. // A MySQL optimization using the LIMIT command is part of the SQL. // Therefore, we can skip this call on MySQL. if (DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.mysql && DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.postgresql) { DbConnectionManager.setMaxRows(pstmt, startIndex+numResults); } ResultSet rs = pstmt.executeQuery(); // Position the cursor right before the first row that we're insterested in. // A MySQL and Postgres optimization using the LIMIT command is part of the SQL. // Therefore, we can skip this call on MySQL or Postgres. if (DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.mysql && DbConnectionManager.getDatabaseType() != DbConnectionManager.DatabaseType.postgresql) { DbConnectionManager.scrollResultSet(rs, startIndex); } // Keep reading results until the result set is exhausted or // we come to the end of the block. int count = 0; while (rs.next() && count < numResults) { conversationIDs.add(rs.getLong(1)); count++; } rs.close(); } catch (SQLException sqle) { Log.error(sqle.getMessage(), sqle); } finally { DbConnectionManager.closeConnection(pstmt, con); } return new DatabaseQueryResults(conversationIDs); } /** * Returns Hits from a database search against archived conversations as a Collection * of Conversation objects. */ private class DatabaseQueryResults extends AbstractCollection<Conversation> { private List<Long> conversationIDs; /** * Constructs a new query results object. * * @param conversationIDs the list of conversation IDs. */ public DatabaseQueryResults(List<Long> conversationIDs) { this.conversationIDs = conversationIDs; } @Override public Iterator<Conversation> iterator() { final Iterator<Long> convIterator = conversationIDs.iterator(); return new Iterator<Conversation>() { private Conversation nextElement = null; public boolean hasNext() { if (nextElement == null) { nextElement = getNextElement(); if (nextElement == null) { return false; } } return true; } public Conversation next() { Conversation element; if (nextElement != null) { element = nextElement; nextElement = null; } else { element = getNextElement(); if (element == null) { throw new NoSuchElementException(); } } return element; } public void remove() { throw new UnsupportedOperationException(); } private Conversation getNextElement() { if (!convIterator.hasNext()) { return null; } while (convIterator.hasNext()) { try { long conversationID = convIterator.next(); return new Conversation(conversationManager, conversationID); } catch (Exception e) { Log.error(e.getMessage(), e); } } return null; } }; } @Override public int size() { return conversationIDs.size(); } } /** * Returns Hits from a Lucene search against archived conversations as a Collection * of Conversation objects. */ private class LuceneQueryResults extends AbstractCollection<Conversation> { private Hits hits; private int index; private int endIndex; /** * Constructs a new query results object. * * @param hits the search hits. * @param startIndex the starting index that results should be returned from. * @param endIndex the ending index that results should be returned to. */ public LuceneQueryResults(Hits hits, int startIndex, int endIndex) { this.hits = hits; this.index = startIndex; this.endIndex = endIndex; } @Override public Iterator<Conversation> iterator() { final Iterator<Hit> hitsIterator = hits.iterator(); // Advance the iterator until we hit the index. for (int i=0; i<index; i++) { hitsIterator.next(); } return new Iterator<Conversation>() { private Conversation nextElement = null; public boolean hasNext() { if (nextElement == null) { nextElement = getNextElement(); if (nextElement == null) { return false; } } return true; } public Conversation next() { Conversation element; if (nextElement != null) { element = nextElement; nextElement = null; } else { element = getNextElement(); if (element == null) { throw new NoSuchElementException(); } } return element; } public void remove() { throw new UnsupportedOperationException(); } private Conversation getNextElement() { if (!hitsIterator.hasNext()) { return null; } // If we've reached the end index, stop iterating. else if (index >= endIndex) { return null; } while (hitsIterator.hasNext()) { try { Hit hit = hitsIterator.next(); // Advance the index. index++; long conversationID = Long.parseLong(hit.get("conversationID")); return new Conversation(conversationManager, conversationID); } catch (Exception e) { Log.error(e.getMessage(), e); } } return null; } }; } @Override public int size() { return hits.length(); } } }